Added a tell function to the decstream for debugging.
[pycfml.git] / classfile / binfmt.py
CommitLineData
964561d6
FT
1from struct import pack, unpack, calcsize
2
3class fmterror(Exception):
4 pass
5
6class eomerror(fmterror):
7 pass
8
9def mutf8dec(bs):
10 ret = ""
11 i = 0
12 while i < len(bs):
13 b = bs[i]
14 i += 1
15 if b & 0x80 == 0:
16 ret += chr(b)
17 else:
18 c = 0
19 while (c < 7) and (b & (1 << (6 - c))):
20 c += 1
21 if c == 0 or c == 7: raise fmterror("invalid utf8 start-byte")
22 iacc = acc = b & ((1 << (6 - c)) - 1)
23 ic = c
24 while c > 0:
25 if i >= len(bs): raise fmterror("unterminated utf8 compound")
26 b = bs[i]
27 i += 1
28 if b & 0xc0 != 0x80: raise fmterror("invalid utf8 continuation byte")
eaf86c37 29 acc = (acc << 6) | b & 0x3f
964561d6
FT
30 c -= 1
31 if iacc == 0 and ic != 2 and acc != 0: raise fmterror("invalid utf8 compound")
32 ret += chr(acc)
33 return ret
34
35def mutf8enc(cs):
36 ret = bytearray()
37 for c in cs:
38 c = ord(c)
39 if c == 0:
40 ret.extend(b"\xc0\x80")
41 elif 1 <= c < 128:
42 ret.append(c)
43 elif 128 <= c < 2048:
44 ret.append(0xc0 | ((c & 0x7c0) >> 6))
45 ret.append(0x80 | (c & 0x03f))
46 elif 2048 <= c < 65536:
47 ret.append(0xe0 | ((c & 0xf000) >> 12))
48 ret.append(0x80 | ((c & 0x0fc0) >> 6))
49 ret.append(0x80 | (c & 0x003f))
50 else:
51 raise fmterror("non-BMP unicode not supported by Java")
52 return bytes(ret)
53
54class decoder(object):
55 def destruct(self, fmt):
56 return unpack(fmt, self.splice(calcsize(fmt)))
57
58 def skip(self, ln):
59 self.splice(ln)
60
61 def int8(self):
62 return self.destruct(">b")[0]
63 def uint8(self):
64 return self.destruct(">B")[0]
65 def int16(self):
66 return self.destruct(">h")[0]
67 def uint16(self):
68 return self.destruct(">H")[0]
69 def int32(self):
70 return self.destruct(">i")[0]
71 def uint32(self):
72 return self.destruct(">I")[0]
73 def int64(self):
74 return self.destruct(">q")[0]
75 def uint64(self):
76 return self.destruct(">Q")[0]
77 def float32(self):
78 return self.destruct(">f")[0]
79 def float64(self):
80 return self.destruct(">d")[0]
81
82class decstream(decoder):
83 def __init__(self, bk):
84 self.bk = bk
85 self.buf = bytearray()
86
87 def eom(self):
88 if len(self.buf) > 0:
89 return False
90 ret = self.bk.read(1024)
91 if ret == b"":
92 return True
93 self.buf.extend(ret)
94 return False
95
7db3019f
FT
96 def tell(self):
97 return self.bk.tell() - len(self.buf)
98
964561d6
FT
99 def splice(self, ln=-1):
100 buf = self.buf
101 if ln < 0:
102 while True:
103 ret = self.bk.read()
104 if ret == b"":
105 self.buf = bytearray()
106 return bytes(buf)
107 buf.extend(ret)
108 else:
109 while len(buf) < ln:
110 rl = max(ln - len(buf), 1024)
111 ret = self.bk.read(rl)
112 if ret == b"":
113 raise eomerror("unexpected end-of-file")
114 buf.extend(ret)
115 self.buf = buf[ln:]
116 return bytes(buf[:ln])
117
118 def skip(self, ln):
119 if ln < len(self.buf):
120 self.buf = self.buf[ln:]
121 else:
122 ln -= len(self.buf)
123 self.buf = bytearray()
124 if hasattr(self.bk, "seek"):
125 self.bk.seek(ln - 1, 1)
126 if len(self.bk.read(1)) != 1:
127 raise eomerror("unexpected end-of-file")
128 else:
129 while ln > 0:
130 r = self.bk.read(ln)
131 if r == b"":
132 raise eomerror("unexpected end-of-file")
133 ln -= len(r)
134
135 def str(self):
136 buf = self.buf
137 p = 0
138 while True:
139 p2 = buf.find(b'\0', p)
140 if p2 > 0:
141 self.buf = buf[p2 + 1:]
142 return str(buf[:p2], "utf-8")
143 ret = self.bk.read(1024)
144 if ret == b"":
145 if len(buf) == 0:
146 raise eomerror("unexpected end-of-file")
147 raise fmterror("no string terminator found")
148 p = len(buf)
149 buf.extend(ret)
150
151 def close(self):
152 self.bk.close()
153
154 def __enter__(self):
155 return self
156
157 def __exit__(self, *excinfo):
158 self.close()
159 return False
160
161class decbuf(decoder):
162 def __init__(self, data):
163 self.data = data
164 self.offset = 0
165
166 def __len__(self):
167 return len(self.data) - self.offset
168
169 def eom(self):
170 return self.offset >= len(self.data)
171
172 def splice(self, ln=-1):
173 if ln < 0:
174 ret = self.data[self.offset:]
175 self.offset = len(self.data)
176 return ret
177 else:
178 if self.offset + ln > len(self.data):
179 raise eomerror("out of data to decode")
180 ret = self.data[self.offset:self.offset + ln]
181 self.offset += ln
182 return ret
183
184 def str(self):
185 p = self.data.find(b'\0', self.offset)
186 if p < 0:
187 if self.offset == len(self.data):
188 raise eomerror("out of data to decode")
189 raise fmterror("no string terminator found")
190 ret = str(self.data[self.offset:p], "utf-8")
191 self.offset = p + 1
192 return str(ret)
193
194class encoder(object):
195 def enstruct(self, fmt, *args):
196 self.extend(pack(fmt, *args))
197 return self
198
199 def int8(self, val):
200 self.enstruct(">b", val)
201 return self
202 def uint8(self, val):
203 self.enstruct(">B", val)
204 return self
205 def int16(self, val):
206 self.enstruct(">h", val)
207 return self
208 def uint16(self, val):
209 self.enstruct(">H", val)
210 return self
211 def int32(self, val):
212 self.enstruct(">i", val)
213 return self
214 def uint32(self, val):
215 self.enstruct(">I", val)
216 return self
217 def int64(self, val):
218 self.enstruct(">q", val)
219 return self
220 def uint64(self, val):
221 self.enstruct(">Q", val)
222 return self
223 def float32(self, val):
224 self.enstruct(">f", val)
225 return self
226 def float64(self, val):
227 self.enstruct(">d", val)
228 return self
229
230 def str(self, val):
231 if val.find('\0') >= 0:
232 raise ValueError("encoded strings must not contain NULs")
233 self.extend(val.encode("utf-8"))
234 self.extend(b"\0")
235 return self
236
237 def ttol(self, val, term=False):
238 for obj in val:
239 if isinstance(obj, int):
240 if 0 <= obj < 256:
241 self.uint8(T_UINT8)
242 self.uint8(obj)
243 elif 0 <= obj < 65536:
244 self.uint8(T_UINT16)
245 self.uint16(obj)
246 else:
247 self.uint8(T_INT)
248 self.int32(obj)
249 elif isinstance(obj, str):
250 self.uint8(T_STR)
251 self.str(obj)
252 elif isinstance(obj, utils.coord):
253 self.uint8(T_COORD)
254 self.coord(obj)
255 elif isinstance(obj, utils.color):
256 self.uint8(T_COLOR)
257 self.color(obj)
258 elif isinstance(obj, list):
259 self.uint8(T_TTOL)
260 self.ttol(obj, True)
261 elif isinstance(obj, float):
262 self.uint8(T_FLOAT32)
263 self.float32(obj)
264 elif obj is None:
265 self.uint8(T_NIL)
266 elif isinstance(obj, collections.ByteString):
267 self.uint8(T_BYTES)
268 if len(obj) < 128:
269 self.uint8(len(obj))
270 else:
271 self.uint8(0x80).int32(len(obj))
272 self.extend(obj)
273 else:
274 raise ValueError("unexpected type in tto-list: %s" % type(obj))
275 if term:
276 self.uint8(T_END)
277 return self
278
279class encstream(encoder):
280 def __init__(self, bk):
281 self.bk = bk
282
283 def extend(self, data):
284 self.bk.write(data)
285 return self
286
287 def close(self):
288 self.bk.close()
289
290 def __enter__(self):
291 return self
292
293 def __exit__(self, *excinfo):
294 self.close()
295 return False
296
297class encbuf(encoder, bytearray):
298 def extend(self, data):
299 bytearray.extend(self, data)
300 return self