Added a simple XML parser as an entry point for making Next.
[jsvc.git] / src / dolda / jsvc / next / Parser.java
CommitLineData
a5e6bd24
FT
1package dolda.jsvc.next;
2
3import java.io.*;
4import java.util.*;
5import org.w3c.dom.*;
6import org.w3c.dom.bootstrap.*;
7
8public class Parser {
9 private static final DOMImplementation domimp;
10
11 static {
12 DOMImplementationRegistry reg;
13 try {
14 reg = DOMImplementationRegistry.newInstance();
15 } catch(Exception e) {
16 throw(new Error(e));
17 }
18 DOMImplementation di = reg.getDOMImplementation("");
19 if(di == null)
20 throw(new RuntimeException("Could not get a DOM implemenation"));
21 domimp = di;
22 }
23
24 private static boolean namechar(char c) {
25 return((c == ':') || (c == '_') || (c == '$') || (c == '.') || (c == '-') || ((c >= '0') && (c <= '9')) || ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')));
26 }
27
28 protected String entity(String name) {
29 if(name.equals("amp"))
30 return("&");
31 if(name.equals("lt"))
32 return("<");
33 if(name.equals("gt"))
34 return(">");
35 if(name.equals("apos"))
36 return("'");
37 if(name.equals("quot"))
38 return("\"");
39 return(null);
40 }
41
42 protected Element makenode(Document doc, String name) {
43 return(doc.createElementNS(null, name));
44 }
45
46 protected Attr makeattr(Document doc, Element el, String name) {
47 return(doc.createAttributeNS(el.getNamespaceURI(), name));
48 }
49
50 public DocumentFragment parse(Reader in) throws IOException {
51 Stack<Node> stack = new Stack<Node>();
52 Document doc = domimp.createDocument(null, "dummy", null);
53 DocumentFragment frag = doc.createDocumentFragment();
54 stack.push(frag);
55 String st = "content";
56 int c = in.read();
57 StringBuilder buf = new StringBuilder();
58 StringBuilder ebuf = new StringBuilder();
59 char atype = 0;
60 int cdashcnt = 0;
61 while(true) {
62 if(st == "content") {
63 if(c == '<') {
64 st = "tag";
65 c = in.read();
66 } else if(c < 0) {
67 if(stack.peek() == frag)
68 return(frag);
69 else
70 throw(new ParseException("Unexpected end-of-file while parsing non-root element"));
71 } else {
72 st = "text";
73 }
74 } else if(st == "tag") {
75 if(Character.isWhitespace((char)c)) {
76 c = in.read();
77 } else if(c == '!') {
78 cdashcnt = 0;
79 c = in.read();
80 st = "comment";
81 } else if(namechar((char)c)) {
82 st = "stag";
83 } else if(c == '/') {
84 c = in.read();
85 st = "etag";
86 } else if(c < 0) {
87 throw(new ParseException("Unexpected end-of-file while parsing tag"));
88 } else {
89 throw(new ParseException("Unexpected character `" + printable((char)c) + "' encountered in tag name"));
90 }
91 } else if(st == "stag") {
92 boolean flush = false;
93 if(namechar((char)c)) {
94 buf.append((char)c);
95 c = in.read();
96 } else if(c == '>') {
97 flush = true;
98 } else if(Character.isWhitespace((char)c)) {
99 flush = true;
100 c = in.read();
101 } else if(c < 0) {
102 throw(new ParseException("Unexpected end-of-file while parsing tag name"));
103 } else {
104 throw(new ParseException("Unexpected character `" + printable((char)c) + "' encountered in tag name"));
105 }
106 if(flush) {
107 Element n = makenode(doc, buf.toString());
108 buf = new StringBuilder();
109 stack.peek().appendChild(n);
110 stack.push(n);
111 st = "attr";
112 }
113 } else if(st == "comment") {
114 if(c == '-') {
115 cdashcnt++;
116 c = in.read();
117 } else if((c == '>') && (cdashcnt == 4)) {
118 stack.peek().appendChild(doc.createComment(buf.toString()));
119 buf = new StringBuilder();
120 st = "content";
121 c = in.read();
122 } else if(cdashcnt >= 2) {
123 if(cdashcnt > 2)
124 cdashcnt = 2;
125 buf.append((char)c);
126 c = in.read();
127 } else if(c < 0) {
128 throw(new ParseException("Unexpected end-of-file while parsing comment"));
129 } else {
130 throw(new ParseException("Unexpected character `" + printable((char)c) + "' encountered in comment"));
131 }
132 } else if(st == "attr") {
133 if(namechar((char)c)) {
134 st = "aname";
135 } else if(c == '>') {
136 st = "content";
137 c = in.read();
138 } else if(c == '/') {
139 st = "stagend";
140 c = in.read();
141 } else if(Character.isWhitespace((char)c)) {
142 c = in.read();
143 } else if(c < 0) {
144 throw(new ParseException("Unexpected end-of-file while parsing attributes"));
145 } else {
146 throw(new ParseException("Unexpected character `" + printable((char)c) + "' encountered inside tag"));
147 }
148 } else if(st == "stagend") {
149 if(c == '>') {
150 stack.pop();
151 c = in.read();
152 st = "content";
153 } else if(Character.isWhitespace((char)c)) {
154 c = in.read();
155 } else if(c < 0) {
156 throw(new ParseException("Unexpected end-of-file at end of empty tag"));
157 } else {
158 throw(new ParseException("Unexpected character `" + printable((char)c) + "' encountered at and of empty tag"));
159 }
160 } else if(st == "aname") {
161 if(namechar((char)c)) {
162 buf.append((char)c);
163 c = in.read();
164 } else if(Character.isWhitespace((char)c)) {
165 c = in.read();
166 } else if(c == '=') {
167 Element el = (Element)stack.peek();
168 Attr attr = makeattr(doc, el, buf.toString());
169 el.setAttributeNodeNS(attr);
170 buf = new StringBuilder();
171 stack.push(attr);
172 st = "avalstart";
173 c = in.read();
174 } else if(c < 0) {
175 throw(new ParseException("Unexpected end-of-file while parsing attribute name"));
176 } else {
177 throw(new ParseException("Unexpected character `" + printable((char)c) + "' encountered in attribute name"));
178 }
179 } else if(st == "avalstart") {
180 if((c == '\'') || (c == '"')) {
181 atype = (char)c;
182 c = in.read();
183 st = "aval";
184 } else if(Character.isWhitespace((char)c)) {
185 c = in.read();
186 } else if(c < 0) {
187 throw(new ParseException("Unexpected end-of-file while parsing attribute value"));
188 } else {
189 throw(new ParseException("Unexpected character `" + printable((char)c) + "' encountered in attribute value"));
190 }
191 } else if(st == "aval") {
192 if(c == atype) {
193 c = in.read();
194 Attr a = (Attr)stack.pop();
195 a.setValue(buf.toString());
196 buf = new StringBuilder();
197 st = "attr";
198 } else if(c == '&') {
199 c = in.read();
200 st = "aent";
201 } else if(c < 0) {
202 throw(new ParseException("Unexpected end-of-file while parsing attribute value"));
203 } else {
204 buf.append((char)c);
205 c = in.read();
206 }
207 } else if(st == "etag") {
208 if(namechar((char)c)) {
209 buf.append((char)c);
210 c = in.read();
211 } else if(c == '>') {
212 String nm = buf.toString();
213 buf = new StringBuilder();
214 Node n = stack.pop();
215 if(n instanceof DocumentFragment)
216 throw(new ParseException("Unexpected end tag for `" + nm + "' while parsing root content"));
217 Element el = (Element)n;
218 if(!nm.equals(el.getTagName()))
219 throw(new ParseException("Unexpected end tag for `" + nm + "' while parsing `" + el.getTagName() + "'"));
220 c = in.read();
221 st = "content";
222 } else if(c < 0) {
223 throw(new ParseException("Unexpected end-of-file while parsing end tag"));
224 } else {
225 throw(new ParseException("Unexpected character `" + printable((char)c) + "' encountered in end tag"));
226 }
227 } else if(st == "text") {
228 boolean flush = false;
229 if(c == '&') {
230 st = "ent";
231 c = in.read();
232 } else if(c == '<') {
233 flush = true;
234 st = "content";
235 } else if(c < 0) {
236 flush = true;
237 st = "content";
238 } else {
239 buf.append((char)c);
240 c = in.read();
241 }
242 if(flush) {
243 Text n = doc.createTextNode(buf.toString());
244 buf = new StringBuilder();
245 stack.peek().appendChild(n);
246 }
247 } else if(st == "ent") {
248 if(c == ';') {
249 String ename = ebuf.toString();
250 ebuf = new StringBuilder();
251 String rep = entity(ename);
252 if(rep == null)
253 throw(new ParseException("Unknown entity `" + ename + "' encountered"));
254 buf.append(rep);
255 st = "text";
256 c = in.read();
257 } else if(c < 0) {
258 throw(new ParseException("Unexpected end-of-file while parsing entity name"));
259 } else if(namechar((char)c)) {
260 ebuf.append((char)c);
261 c = in.read();
262 } else {
263 throw(new ParseException("Unexpected character `" + printable((char)c) + "' encountered in entity name"));
264 }
265 } else if(st == "aent") {
266 if(c == ';') {
267 String ename = ebuf.toString();
268 ebuf = new StringBuilder();
269 String rep = entity(ename);
270 if(rep == null)
271 throw(new ParseException("Unknown entity `" + ename + "' encountered"));
272 buf.append(rep);
273 st = "aval";
274 c = in.read();
275 } else if(c < 0) {
276 throw(new ParseException("Unexpected end-of-file while parsing entity name"));
277 } else if(namechar((char)c)) {
278 ebuf.append((char)c);
279 c = in.read();
280 } else {
281 throw(new ParseException("Unexpected character `" + printable((char)c) + "' encountered in entity name"));
282 }
283 } else {
284 throw(new Error("BUG: Typoed state " + st));
285 }
286 }
287 }
288
289 private static String printable(char c) {
290 if(c < 32)
291 return(String.format("\\%03o", (int)c));
292 return(Character.toString(c));
293 }
294
295 public static void main(String[] args) throws Exception {
296 Parser p = new Parser();
297 DocumentFragment f = p.parse(new FileReader(args[0]));
298 javax.xml.transform.TransformerFactory fac = javax.xml.transform.TransformerFactory.newInstance();
299 fac.setAttribute("indent-number", 2);
300 javax.xml.transform.Transformer t = fac.newTransformer();
301 t.setOutputProperty(javax.xml.transform.OutputKeys.INDENT, "yes");
302 t.transform(new javax.xml.transform.dom.DOMSource(f), new javax.xml.transform.stream.StreamResult(System.out));
303 System.out.println(t.getClass());
304 }
305}