websockify/utils/websocket.py

#!/usr/bin/python

'''
Python WebSocket library with support for "wss://" encryption.

You can make a cert/key with openssl using:
openssl req -new -x509 -days 365 -nodes -out self.pem -keyout self.pem
as taken from http://docs.python.org/dev/library/ssl.html#certificates

'''

import sys, socket, ssl, traceback
from base64 import b64encode, b64decode

client_settings = {}
send_seq = 0

server_handshake = """HTTP/1.1 101 Web Socket Protocol Handshake\r
Upgrade: WebSocket\r
Connection: Upgrade\r
WebSocket-Origin: %s\r
WebSocket-Location: %s://%s%s\r
WebSocket-Protocol: sample\r
\r
"""

policy_response = """<cross-domain-policy><allow-access-from domain="*" to-ports="*" /></cross-domain-policy>\n"""

def traffic(token="."):
    sys.stdout.write(token)
    sys.stdout.flush()

def decode(buf):
    """ Parse out WebSocket packets. """
    if buf.count('\xff') > 1:
        if client_settings["b64encode"]:
            return [b64decode(d[1:]) for d in buf.split('\xff')]
        else:
            # Modified UTF-8 decode
            return [d[1:].replace("\xc4\x80", "\x00").decode('utf-8').encode('latin-1') for d in buf.split('\xff')]
    else:
        if client_settings["b64encode"]:
            return [b64decode(buf[1:-1])]
        else:
            return [buf[1:-1].replace("\xc4\x80", "\x00").decode('utf-8').encode('latin-1')]

def encode(buf):
    global send_seq
    if client_settings["b64encode"]:
        buf = b64encode(buf)
    else:
        # Modified UTF-8 encode
        buf = buf.decode('latin-1').encode('utf-8').replace("\x00", "\xc4\x80")

    if client_settings["seq_num"]:
        send_seq += 1
        return "\x00%d:%s\xff" % (send_seq-1, buf)
    else:
        return "\x00%s\xff" % buf


def do_handshake(sock):
    global client_settings, send_seq
    send_seq = 0
    # Peek, but don't read the data
    handshake = sock.recv(1024, socket.MSG_PEEK)
    #print "Handshake [%s]" % repr(handshake)
    if handshake.startswith("<policy-file-request/>"):
        handshake = sock.recv(1024)
        print "Sending flash policy response"
        sock.send(policy_response)
        sock.close()
        return False
    elif handshake.startswith("\x16"):
        retsock = ssl.wrap_socket(
                sock,
                server_side=True,
                certfile='self.pem',
                ssl_version=ssl.PROTOCOL_TLSv1)
        scheme = "wss"
        print "Using SSL/TLS"
    else:
        retsock = sock
        scheme = "ws"
        print "Using plain (not SSL) socket"
    handshake = retsock.recv(4096)
    req_lines = handshake.split("\r\n")
    _, path, _ = req_lines[0].split(" ")
    _, origin = req_lines[4].split(" ")
    _, host = req_lines[3].split(" ")

    # Parse settings from the path
    cvars = path.partition('?')[2].partition('#')[0].split('&')
    client_settings = {'b64encode': None, 'seq_num': None}
    for cvar in [c for c in cvars if c]:
        name, _, value = cvar.partition('=')
        client_settings[name] = value and value or True

    print "client_settings:", client_settings

    retsock.send(server_handshake % (origin, scheme, host, path))
    return retsock

def start_server(listen_port, handler):
    lsock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    lsock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
    lsock.bind(('', listen_port))
    lsock.listen(100)
    while True:
        try:
            csock = None
            print 'waiting for connection on port %s' % listen_port
            startsock, address = lsock.accept()
            print 'Got client connection from %s' % address[0]
            csock = do_handshake(startsock)
            if not csock: continue

            handler(csock)

        except Exception:
            print "Ignoring exception:"
            print traceback.format_exc()
            if csock: csock.close()
websocket.py: generic code from wsproxy and wstest. 2010-05-06 16:32:07 +01:00			`#!/usr/bin/python`

			`'''`
			`Python WebSocket library with support for "wss://" encryption.`

			`You can make a cert/key with openssl using:`
			`openssl req -new -x509 -days 365 -nodes -out self.pem -keyout self.pem`
			`as taken from http://docs.python.org/dev/library/ssl.html#certificates`

			`'''`

			`import sys, socket, ssl, traceback`
			`from base64 import b64encode, b64decode`

			`client_settings = {}`
			`send_seq = 0`

			`server_handshake = """HTTP/1.1 101 Web Socket Protocol Handshake\r`
			`Upgrade: WebSocket\r`
			`Connection: Upgrade\r`
			`WebSocket-Origin: %s\r`
			`WebSocket-Location: %s://%s%s\r`
			`WebSocket-Protocol: sample\r`
			`\r`
			`"""`

			`policy_response = """<cross-domain-policy><allow-access-from domain="" to-ports="" /></cross-domain-policy>\n"""`

			`def traffic(token="."):`
			`sys.stdout.write(token)`
			`sys.stdout.flush()`

			`def decode(buf):`
			`""" Parse out WebSocket packets. """`
			`if buf.count('\xff') > 1:`
Test non-base64 (straight UTF-8) encoding. Also add a wsencoding test client/server program to test send a set of values between client and server and vice-versa to test encodings. Not turned on by default. Add support for encode/decode of UTF-8 in the proxy. This leverages the browser for decoding the WebSocket stream directly instead of doing base64 decode in the browser itself. Unfortunately, in Chrome this has negligible impact (round-trip time is increased slightly likely due to extra python processing). In firefox, due to the use of the flash WebSocket emulator the performance is even worse. This is because it's really annoying to get the flash WebSocket emulator to properly decode a UTF-8 bytestream. The problem is that the readUTFBytes and readMultiByte methods of an ActionScript ByteArray don't treat 0x00 correctly. They return a string that ends at the first 0x00, but the index into the ByteArray has been advanced by however much you requested. This is very silly for two reasons: ActionScript (and Javascript) strings can contain 0x00 (they are not null terminated) and second, UTF-8 can legitimately contain 0x00 values. Since UTF-8 is not constant width there isn't a great way to determine if those methods in fact did encounter a 0x00 or they just read the number of bytes requested. Doing manual decoding using readUTFByte one character at a time slows things down quite a bit. And to top it all off, those methods don't support the alternate UTF-8 encoding for 0x00 ("\xc0\x80"). They also just treat that encoding as the end of string too. So to get around this, for now I'm encoding zero as 256 ("\xc4\x80") and then doing mod 256 in Javascript. Still doesn't result in much benefit in firefox. But, it's an interesting approach that could use some more exploration so I'm leaving in the code in both places. 2010-05-28 21:39:38 +01:00			`if client_settings["b64encode"]:`
			`return [b64decode(d[1:]) for d in buf.split('\xff')]`
			`else:`
			`# Modified UTF-8 decode`
			`return [d[1:].replace("\xc4\x80", "\x00").decode('utf-8').encode('latin-1') for d in buf.split('\xff')]`
websocket.py: generic code from wsproxy and wstest. 2010-05-06 16:32:07 +01:00			`else:`
Test non-base64 (straight UTF-8) encoding. Also add a wsencoding test client/server program to test send a set of values between client and server and vice-versa to test encodings. Not turned on by default. Add support for encode/decode of UTF-8 in the proxy. This leverages the browser for decoding the WebSocket stream directly instead of doing base64 decode in the browser itself. Unfortunately, in Chrome this has negligible impact (round-trip time is increased slightly likely due to extra python processing). In firefox, due to the use of the flash WebSocket emulator the performance is even worse. This is because it's really annoying to get the flash WebSocket emulator to properly decode a UTF-8 bytestream. The problem is that the readUTFBytes and readMultiByte methods of an ActionScript ByteArray don't treat 0x00 correctly. They return a string that ends at the first 0x00, but the index into the ByteArray has been advanced by however much you requested. This is very silly for two reasons: ActionScript (and Javascript) strings can contain 0x00 (they are not null terminated) and second, UTF-8 can legitimately contain 0x00 values. Since UTF-8 is not constant width there isn't a great way to determine if those methods in fact did encounter a 0x00 or they just read the number of bytes requested. Doing manual decoding using readUTFByte one character at a time slows things down quite a bit. And to top it all off, those methods don't support the alternate UTF-8 encoding for 0x00 ("\xc0\x80"). They also just treat that encoding as the end of string too. So to get around this, for now I'm encoding zero as 256 ("\xc4\x80") and then doing mod 256 in Javascript. Still doesn't result in much benefit in firefox. But, it's an interesting approach that could use some more exploration so I'm leaving in the code in both places. 2010-05-28 21:39:38 +01:00			`if client_settings["b64encode"]:`
			`return [b64decode(buf[1:-1])]`
			`else:`
			`return [buf[1:-1].replace("\xc4\x80", "\x00").decode('utf-8').encode('latin-1')]`
websocket.py: generic code from wsproxy and wstest. 2010-05-06 16:32:07 +01:00
			`def encode(buf):`
			`global send_seq`
Test non-base64 (straight UTF-8) encoding. Also add a wsencoding test client/server program to test send a set of values between client and server and vice-versa to test encodings. Not turned on by default. Add support for encode/decode of UTF-8 in the proxy. This leverages the browser for decoding the WebSocket stream directly instead of doing base64 decode in the browser itself. Unfortunately, in Chrome this has negligible impact (round-trip time is increased slightly likely due to extra python processing). In firefox, due to the use of the flash WebSocket emulator the performance is even worse. This is because it's really annoying to get the flash WebSocket emulator to properly decode a UTF-8 bytestream. The problem is that the readUTFBytes and readMultiByte methods of an ActionScript ByteArray don't treat 0x00 correctly. They return a string that ends at the first 0x00, but the index into the ByteArray has been advanced by however much you requested. This is very silly for two reasons: ActionScript (and Javascript) strings can contain 0x00 (they are not null terminated) and second, UTF-8 can legitimately contain 0x00 values. Since UTF-8 is not constant width there isn't a great way to determine if those methods in fact did encounter a 0x00 or they just read the number of bytes requested. Doing manual decoding using readUTFByte one character at a time slows things down quite a bit. And to top it all off, those methods don't support the alternate UTF-8 encoding for 0x00 ("\xc0\x80"). They also just treat that encoding as the end of string too. So to get around this, for now I'm encoding zero as 256 ("\xc4\x80") and then doing mod 256 in Javascript. Still doesn't result in much benefit in firefox. But, it's an interesting approach that could use some more exploration so I'm leaving in the code in both places. 2010-05-28 21:39:38 +01:00			`if client_settings["b64encode"]:`
websocket.py: generic code from wsproxy and wstest. 2010-05-06 16:32:07 +01:00			`buf = b64encode(buf)`
Test non-base64 (straight UTF-8) encoding. Also add a wsencoding test client/server program to test send a set of values between client and server and vice-versa to test encodings. Not turned on by default. Add support for encode/decode of UTF-8 in the proxy. This leverages the browser for decoding the WebSocket stream directly instead of doing base64 decode in the browser itself. Unfortunately, in Chrome this has negligible impact (round-trip time is increased slightly likely due to extra python processing). In firefox, due to the use of the flash WebSocket emulator the performance is even worse. This is because it's really annoying to get the flash WebSocket emulator to properly decode a UTF-8 bytestream. The problem is that the readUTFBytes and readMultiByte methods of an ActionScript ByteArray don't treat 0x00 correctly. They return a string that ends at the first 0x00, but the index into the ByteArray has been advanced by however much you requested. This is very silly for two reasons: ActionScript (and Javascript) strings can contain 0x00 (they are not null terminated) and second, UTF-8 can legitimately contain 0x00 values. Since UTF-8 is not constant width there isn't a great way to determine if those methods in fact did encounter a 0x00 or they just read the number of bytes requested. Doing manual decoding using readUTFByte one character at a time slows things down quite a bit. And to top it all off, those methods don't support the alternate UTF-8 encoding for 0x00 ("\xc0\x80"). They also just treat that encoding as the end of string too. So to get around this, for now I'm encoding zero as 256 ("\xc4\x80") and then doing mod 256 in Javascript. Still doesn't result in much benefit in firefox. But, it's an interesting approach that could use some more exploration so I'm leaving in the code in both places. 2010-05-28 21:39:38 +01:00			`else:`
			`# Modified UTF-8 encode`
			`buf = buf.decode('latin-1').encode('utf-8').replace("\x00", "\xc4\x80")`
websocket.py: generic code from wsproxy and wstest. 2010-05-06 16:32:07 +01:00
Test non-base64 (straight UTF-8) encoding. Also add a wsencoding test client/server program to test send a set of values between client and server and vice-versa to test encodings. Not turned on by default. Add support for encode/decode of UTF-8 in the proxy. This leverages the browser for decoding the WebSocket stream directly instead of doing base64 decode in the browser itself. Unfortunately, in Chrome this has negligible impact (round-trip time is increased slightly likely due to extra python processing). In firefox, due to the use of the flash WebSocket emulator the performance is even worse. This is because it's really annoying to get the flash WebSocket emulator to properly decode a UTF-8 bytestream. The problem is that the readUTFBytes and readMultiByte methods of an ActionScript ByteArray don't treat 0x00 correctly. They return a string that ends at the first 0x00, but the index into the ByteArray has been advanced by however much you requested. This is very silly for two reasons: ActionScript (and Javascript) strings can contain 0x00 (they are not null terminated) and second, UTF-8 can legitimately contain 0x00 values. Since UTF-8 is not constant width there isn't a great way to determine if those methods in fact did encounter a 0x00 or they just read the number of bytes requested. Doing manual decoding using readUTFByte one character at a time slows things down quite a bit. And to top it all off, those methods don't support the alternate UTF-8 encoding for 0x00 ("\xc0\x80"). They also just treat that encoding as the end of string too. So to get around this, for now I'm encoding zero as 256 ("\xc4\x80") and then doing mod 256 in Javascript. Still doesn't result in much benefit in firefox. But, it's an interesting approach that could use some more exploration so I'm leaving in the code in both places. 2010-05-28 21:39:38 +01:00			`if client_settings["seq_num"]:`
websocket.py: generic code from wsproxy and wstest. 2010-05-06 16:32:07 +01:00			`send_seq += 1`
			`return "\x00%d:%s\xff" % (send_seq-1, buf)`
			`else:`
			`return "\x00%s\xff" % buf`


			`def do_handshake(sock):`
			`global client_settings, send_seq`
			`send_seq = 0`
			`# Peek, but don't read the data`
			`handshake = sock.recv(1024, socket.MSG_PEEK)`
			`#print "Handshake [%s]" % repr(handshake)`
			`if handshake.startswith("<policy-file-request/>"):`
			`handshake = sock.recv(1024)`
			`print "Sending flash policy response"`
			`sock.send(policy_response)`
			`sock.close()`
			`return False`
			`elif handshake.startswith("\x16"):`
			`retsock = ssl.wrap_socket(`
			`sock,`
			`server_side=True,`
			`certfile='self.pem',`
			`ssl_version=ssl.PROTOCOL_TLSv1)`
			`scheme = "wss"`
			`print "Using SSL/TLS"`
			`else:`
			`retsock = sock`
			`scheme = "ws"`
			`print "Using plain (not SSL) socket"`
			`handshake = retsock.recv(4096)`
			`req_lines = handshake.split("\r\n")`
			`_, path, _ = req_lines[0].split(" ")`
			`_, origin = req_lines[4].split(" ")`
			`_, host = req_lines[3].split(" ")`

			`# Parse settings from the path`
			`cvars = path.partition('?')[2].partition('#')[0].split('&')`
Test non-base64 (straight UTF-8) encoding. Also add a wsencoding test client/server program to test send a set of values between client and server and vice-versa to test encodings. Not turned on by default. Add support for encode/decode of UTF-8 in the proxy. This leverages the browser for decoding the WebSocket stream directly instead of doing base64 decode in the browser itself. Unfortunately, in Chrome this has negligible impact (round-trip time is increased slightly likely due to extra python processing). In firefox, due to the use of the flash WebSocket emulator the performance is even worse. This is because it's really annoying to get the flash WebSocket emulator to properly decode a UTF-8 bytestream. The problem is that the readUTFBytes and readMultiByte methods of an ActionScript ByteArray don't treat 0x00 correctly. They return a string that ends at the first 0x00, but the index into the ByteArray has been advanced by however much you requested. This is very silly for two reasons: ActionScript (and Javascript) strings can contain 0x00 (they are not null terminated) and second, UTF-8 can legitimately contain 0x00 values. Since UTF-8 is not constant width there isn't a great way to determine if those methods in fact did encounter a 0x00 or they just read the number of bytes requested. Doing manual decoding using readUTFByte one character at a time slows things down quite a bit. And to top it all off, those methods don't support the alternate UTF-8 encoding for 0x00 ("\xc0\x80"). They also just treat that encoding as the end of string too. So to get around this, for now I'm encoding zero as 256 ("\xc4\x80") and then doing mod 256 in Javascript. Still doesn't result in much benefit in firefox. But, it's an interesting approach that could use some more exploration so I'm leaving in the code in both places. 2010-05-28 21:39:38 +01:00			`client_settings = {'b64encode': None, 'seq_num': None}`
websocket.py: generic code from wsproxy and wstest. 2010-05-06 16:32:07 +01:00			`for cvar in [c for c in cvars if c]:`
			`name, _, value = cvar.partition('=')`
			`client_settings[name] = value and value or True`

			`print "client_settings:", client_settings`

			`retsock.send(server_handshake % (origin, scheme, host, path))`
			`return retsock`

			`def start_server(listen_port, handler):`
			`lsock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)`
			`lsock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)`
			`lsock.bind(('', listen_port))`
			`lsock.listen(100)`
			`while True:`
			`try:`
			`csock = None`
			`print 'waiting for connection on port %s' % listen_port`
			`startsock, address = lsock.accept()`
			`print 'Got client connection from %s' % address[0]`
			`csock = do_handshake(startsock)`
			`if not csock: continue`

			`handler(csock)`

			`except Exception:`
			`print "Ignoring exception:"`
			`print traceback.format_exc()`
			`if csock: csock.close()`