Test non-base64 (straight UTF-8) encoding.

Also add a wsencoding test client/server program to test send a set of
values between client and server and vice-versa to test encodings.

Not turned on by default.

Add support for encode/decode of UTF-8 in the proxy. This leverages
the browser for decoding the WebSocket stream directly instead of
doing base64 decode in the browser itself.

Unfortunately, in Chrome this has negligible impact (round-trip time
is increased slightly likely due to extra python processing).

In firefox, due to the use of the flash WebSocket emulator the
performance is even worse. This is because it's really annoying to get
the flash WebSocket emulator to properly decode a UTF-8 bytestream.
The problem is that the readUTFBytes and readMultiByte methods of an
ActionScript ByteArray don't treat 0x00 correctly. They return
a string that ends at the first 0x00, but the index into the ByteArray
has been advanced by however much you requested.

This is very silly for two reasons: ActionScript (and Javascript)
strings can contain 0x00 (they are not null terminated) and second,
UTF-8 can legitimately contain 0x00 values. Since UTF-8 is not
constant width there isn't a great way to determine if those methods
in fact did encounter a 0x00 or they just read the number of bytes
requested.

Doing manual decoding using readUTFByte one character at a time slows
things down quite a bit. And to top it all off, those methods don't
support the alternate UTF-8 encoding for 0x00 ("\xc0\x80"). They also
just treat that encoding as the end of string too.

So to get around this, for now I'm encoding zero as 256 ("\xc4\x80")
and then doing mod 256 in Javascript. Still doesn't result in much
benefit in firefox.

But, it's an interesting approach that could use some more exploration
so I'm leaving in the code in both places.
This commit is contained in:
Joel Martin 2010-05-28 15:39:38 -05:00
parent c3996e24f5
commit 507b473a2e
6 changed files with 311 additions and 15 deletions

View File

@ -219,10 +219,32 @@ public class WebSocket extends EventDispatcher {
}
} else {
if (buffer[pos] == 0xff) {
//if (buffer.bytesAvailable > 1) {
if (buffer.readByte() != 0x00) {
close();
main.fatal("data must start with \\x00");
}
/*
var data:String = "", byte:uint;
while (buffer.bytesAvailable > 1) {
byte = buffer[buffer.position];
if (byte === 0x00) {
// readUTFBytes mishandles 0x00
data = data + "\x00";
buffer.position++;
} else if (byte === 0xff) {
// End of WebSocket frame
//ExternalInterface.call("console.log", "[WebSocket] early 0xff found");
break;
} else if ((byte & 0x80) === 0x00) {
// One UTF-8 input byte to one output byte
data = data + buffer.readUTFBytes(1);
} else {
// Assume two UTF-8 input bytes to one output byte
data = data + buffer.readUTFBytes(2);
}
}
*/
var data:String = buffer.readUTFBytes(pos - 1);
main.log("received: " + data);
dispatchEvent(new WebSocketMessageEvent("message", encodeURIComponent(data)));

155
tests/wsencoding.html Normal file
View File

@ -0,0 +1,155 @@
<html>
<head><title>WebSockets Test</title></head>
<body>
Host: <input id='host' style='width:100'>&nbsp;
Port: <input id='port' style='width:50'>&nbsp;
Encrypt: <input id='encrypt' type='checkbox'>&nbsp;
<input id='connectButton' type='button' value='Start' style='width:100px'
onclick="connect();">&nbsp;
<br>
Messages:<br>
<textarea id="messages" style="font-size: 9;" cols=80 rows=25></textarea>
</body>
<!-- Uncomment to activate firebug lite -->
<!--
<script type='text/javascript'
src='http://getfirebug.com/releases/lite/1.2/firebug-lite-compressed.js'></script>
-->
<script src="include/mootools.js"></script>
<script src="include/base64.js"></script>
<script src="include/util.js"></script>
<script>
var host = null, port = null;
var ws = null;
var VNC_native_ws = true;
function message(str) {
console.log(str);
cell = $('messages');
cell.innerHTML += str + "\n";
cell.scrollTop = cell.scrollHeight;
}
function print_response(str) {
message("str.length: " + str.length);
for (i=0; i < str.length; i++) {
message(i + ": " + (str.charCodeAt(i) % 256));
}
}
function send() {
var str = "";
str = str + String.fromCharCode(0x81);
str = str + String.fromCharCode(0xff);
for (var i=0; i<256; i+=4) {
str = str + String.fromCharCode(i);
}
str = str + String.fromCharCode(0);
str = str + String.fromCharCode(0x40);
str = str + String.fromCharCode(0x41);
str = str + String.fromCharCode(0xff);
str = str + String.fromCharCode(0x81);
ws.send(str);
}
function init_ws() {
console.log(">> init_ws");
var scheme = "ws://";
if ($('encrypt').checked) {
scheme = "wss://";
}
var uri = scheme + host + ":" + port;
//var uri = scheme + host + ":" + port + "/?b64encode";
//if (RFB.use_seq) {
// uri += "&seq_num";
//}
console.log("connecting to " + uri);
ws = new WebSocket(uri);
ws.onmessage = function(e) {
console.log(">> WebSockets.onmessage");
print_response(e.data);
console.log("<< WebSockets.onmessage");
};
ws.onopen = function(e) {
console.log(">> WebSockets.onopen");
send();
console.log("<< WebSockets.onopen");
};
ws.onclose = function(e) {
console.log(">> WebSockets.onclose");
console.log("<< WebSockets.onclose");
};
ws.onerror = function(e) {
console.log(">> WebSockets.onerror");
console.log(" " + e);
console.log("<< WebSockets.onerror");
};
console.log("<< init_ws");
}
function connect() {
console.log(">> connect");
host = $('host').value;
port = $('port').value;
if ((!host) || (!port)) {
console.log("must set host and port");
return;
}
if (ws) {
ws.close();
}
init_ws();
$('connectButton').value = "Stop";
$('connectButton').onclick = disconnect;
console.log("<< connect");
}
function disconnect() {
console.log(">> disconnect");
if (ws) {
ws.close();
}
$('connectButton').value = "Start";
$('connectButton').onclick = connect;
console.log("<< disconnect");
}
/* If no builtin websockets then load web_socket.js */
if (! window.WebSocket) {
console.log("Loading web-socket-js flash bridge");
var extra = "<script src='include/web-socket-js/swfobject.js'><\/script>";
extra += "<script src='include/web-socket-js/FABridge.js'><\/script>";
extra += "<script src='include/web-socket-js/web_socket.js'><\/script>";
document.write(extra);
VNC_native_ws = false;
}
window.onload = function() {
console.log("onload");
if (! VNC_native_ws) {
WebSocket.__swfLocation = "include/web-socket-js/WebSocketMain.swf";
WebSocket.__initialize();
}
var url = document.location.href;
$('host').value = (url.match(/host=([^&#]*)/) || ['',''])[1];
$('port').value = (url.match(/port=([^&#]*)/) || ['',''])[1];
}
</script>
</html>

84
tests/wsencoding.py Executable file
View File

@ -0,0 +1,84 @@
#!/usr/bin/python
'''
WebSocket server-side load test program. Sends and receives traffic
that has a random payload (length and content) that is checksummed and
given a sequence number. Any errors are reported and counted.
'''
import sys, os, socket, ssl, time, traceback
import random, time
from base64 import b64encode, b64decode
from codecs import utf_8_encode, utf_8_decode
from select import select
sys.path.insert(0,os.path.dirname(__file__) + "/../")
from websocket import *
buffer_size = 65536
recv_cnt = send_cnt = 0
def check(buf):
if buf[0] != '\x00' or buf[-1] != '\xff':
raise Exception("Invalid WS packet")
for decoded in decode(buf):
nums = [ord(c) for c in decoded]
print "Received nums: ", nums
return
def responder(client):
cpartial = ""
socks = [client]
sent = False
received = False
while True:
ins, outs, excepts = select(socks, socks, socks, 1)
if excepts: raise Exception("Socket exception")
if client in ins:
buf = client.recv(buffer_size)
if len(buf) == 0: raise Exception("Client closed")
received = True
#print "Client recv: %s (%d)" % (repr(buf[1:-1]), len(buf))
if buf[-1] == '\xff':
if cpartial:
err = check(cpartial + buf)
cpartial = ""
else:
err = check(buf)
if err:
print err
else:
print "received partitial"
cpartial = cpartial + buf
if received and not sent and client in outs:
sent = True
#nums = "".join([unichr(c) for c in range(0,256)])
#nums = "".join([chr(c) for c in range(1,128)])
#nums = nums + chr(194) + chr(128) + chr(194) + chr(129)
#nums = "".join([chr(c) for c in range(0,256)])
nums = "\x81\xff"
nums = nums + "".join([chr(c) for c in range(0,256,4)])
nums = nums + "\x00\x40\x41\xff\x81"
# print nums
client.send(encode(nums))
# client.send("\x00" + nums + "\xff")
# print "Sent characters 0-255"
# #print "Client send: %s (%d)" % (repr(nums), len(nums))
if __name__ == '__main__':
try:
if len(sys.argv) < 2: raise
listen_port = int(sys.argv[1])
except:
print "Usage: <listen_port>"
sys.exit(1)
start_server(listen_port, responder)

45
vnc.js
View File

@ -53,6 +53,7 @@ RFB = {
ws : null, // Web Socket object
sendID : null,
use_seq : false,
b64encode : true,
// Receive and send queues
RQ : [], // Receive Queue
@ -789,6 +790,27 @@ clientCutText: function (text) {
* Utility routines
*/
encode_message: function(arr) {
if (RFB.b64encode) {
RFB.SQ = RFB.SQ + Base64.encode(arr);
} else {
RFB.SQ = RFB.SQ + arr.map(function (num) {
return String.fromCharCode(num); } ).join('');
}
},
decode_message: function(data, offset) {
//console.log(">> decode_message: " + data);
if (RFB.b64encode) {
RFB.RQ = RFB.RQ.concat(Base64.decode(data, offset));
} else {
RFB.RQ = RFB.RQ.concat(data.split('').slice(offset).
map(function (chr) {
return (chr.charCodeAt(0) % 256); }));
}
//console.log(">> decode_message, RQ: " + RFB.RQ);
},
recv_message: function(e) {
//console.log(">> recv_message");
@ -796,7 +818,7 @@ recv_message: function(e) {
if (RFB.use_seq) {
RFB.recv_message_reorder(e);
} else {
RFB.RQ = RFB.RQ.concat(Base64.decode(e.data, 0));
RFB.decode_message(e.data, 0);
RFB.handle_message();
}
@ -819,7 +841,7 @@ recv_message_reorder: function(e) {
offset = e.data.indexOf(":") + 1;
seq_num = parseInt(e.data.substr(0, offset-1), 10);
if (RFB.RQ_seq_num === seq_num) {
RFB.RQ = RFB.RQ.concat(Base64.decode(e.data, offset));
RFB.decode_message(e.data, offset);
RFB.RQ_seq_num++;
} else {
console.warn("sequence number mismatch: expected " +
@ -838,9 +860,7 @@ recv_message_reorder: function(e) {
/* Remove it from reorder queue, decode it and
* add it to the receive queue */
console.log("Found re-ordered packet seq_num " + seq_num);
RFB.RQ = RFB.RQ.concat(
Base64.decode(RFB.RQ_reorder.splice(i, 1)[0],
offset));
RFB.decode_message(RFB.RQ_reorder.splice(i, 1)[0], offset);
RFB.RQ_seq_num++;
i = 0; // Start search again for next one
} else {
@ -892,8 +912,7 @@ send_string: function (str) {
send_array: function (arr) {
//console.log(">> send_array: " + arr);
//console.log(">> send_array: " + Base64.encode(arr));
RFB.SQ = RFB.SQ + Base64.encode(arr);
RFB.encode_message(arr);
if (RFB.ws.bufferedAmount === 0) {
RFB.ws.send(RFB.SQ);
RFB.SQ = "";
@ -1097,15 +1116,21 @@ updateState: function(state, statusMsg) {
init_ws: function () {
console.log(">> init_ws");
var uri = "";
var uri = "", vars = [];
if (RFB.encrypt) {
uri = "wss://";
} else {
uri = "ws://";
}
uri += RFB.host + ":" + RFB.port + "/?b64encode";
uri += RFB.host + ":" + RFB.port + "/";
if (RFB.b64encode) {
vars.push("b64encode");
}
if (RFB.use_seq) {
uri += "&seq_num";
vars.push("seq_num");
}
if (vars.length > 0) {
uri += "?" + vars.join("&");
}
console.log("connecting to " + uri);
RFB.ws = new WebSocket(uri);

View File

@ -33,16 +33,26 @@ def traffic(token="."):
def decode(buf):
""" Parse out WebSocket packets. """
if buf.count('\xff') > 1:
return [b64decode(d[1:]) for d in buf.split('\xff')]
if client_settings["b64encode"]:
return [b64decode(d[1:]) for d in buf.split('\xff')]
else:
# Modified UTF-8 decode
return [d[1:].replace("\xc4\x80", "\x00").decode('utf-8').encode('latin-1') for d in buf.split('\xff')]
else:
return [b64decode(buf[1:-1])]
if client_settings["b64encode"]:
return [b64decode(buf[1:-1])]
else:
return [buf[1:-1].replace("\xc4\x80", "\x00").decode('utf-8').encode('latin-1')]
def encode(buf):
global send_seq
if client_settings.get("b64encode"):
if client_settings["b64encode"]:
buf = b64encode(buf)
else:
# Modified UTF-8 encode
buf = buf.decode('latin-1').encode('utf-8').replace("\x00", "\xc4\x80")
if client_settings.get("seq_num"):
if client_settings["seq_num"]:
send_seq += 1
return "\x00%d:%s\xff" % (send_seq-1, buf)
else:
@ -81,7 +91,7 @@ def do_handshake(sock):
# Parse settings from the path
cvars = path.partition('?')[2].partition('#')[0].split('&')
client_settings = {}
client_settings = {'b64encode': None, 'seq_num': None}
for cvar in [c for c in cvars if c]:
name, _, value = cvar.partition('=')
client_settings[name] = value and value or True