Added a simple XML parser as an entry point for making Next.
[jsvc.git] / src / dolda / jsvc / next / Parser.java
1 package dolda.jsvc.next;
2
3 import java.io.*;
4 import java.util.*;
5 import org.w3c.dom.*;
6 import org.w3c.dom.bootstrap.*;
7
8 public class Parser {
9     private static final DOMImplementation domimp;
10     
11     static {
12         DOMImplementationRegistry reg;
13         try {
14             reg = DOMImplementationRegistry.newInstance();
15         } catch(Exception e) {
16             throw(new Error(e));
17         }
18         DOMImplementation di = reg.getDOMImplementation("");
19         if(di == null)
20             throw(new RuntimeException("Could not get a DOM implemenation"));
21         domimp = di;
22     }
23
24     private static boolean namechar(char c) {
25         return((c == ':') || (c == '_') || (c == '$') || (c == '.') || (c == '-') || ((c >= '0') && (c <= '9')) || ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')));
26     }
27
28     protected String entity(String name) {
29         if(name.equals("amp"))
30             return("&");
31         if(name.equals("lt"))
32             return("<");
33         if(name.equals("gt"))
34             return(">");
35         if(name.equals("apos"))
36             return("'");
37         if(name.equals("quot"))
38             return("\"");
39         return(null);
40     }
41     
42     protected Element makenode(Document doc, String name) {
43         return(doc.createElementNS(null, name));
44     }
45     
46     protected Attr makeattr(Document doc, Element el, String name) {
47         return(doc.createAttributeNS(el.getNamespaceURI(), name));
48     }
49
50     public DocumentFragment parse(Reader in) throws IOException {
51         Stack<Node> stack = new Stack<Node>();
52         Document doc = domimp.createDocument(null, "dummy", null);
53         DocumentFragment frag = doc.createDocumentFragment();
54         stack.push(frag);
55         String st = "content";
56         int c = in.read();
57         StringBuilder buf = new StringBuilder();
58         StringBuilder ebuf = new StringBuilder();
59         char atype = 0;
60         int cdashcnt = 0;
61         while(true) {
62             if(st == "content") {
63                 if(c == '<') {
64                     st = "tag";
65                     c = in.read();
66                 } else if(c < 0) {
67                     if(stack.peek() == frag)
68                         return(frag);
69                     else
70                         throw(new ParseException("Unexpected end-of-file while parsing non-root element"));
71                 } else {
72                     st = "text";
73                 }
74             } else if(st == "tag") {
75                 if(Character.isWhitespace((char)c)) {
76                     c = in.read();
77                 } else if(c == '!') {
78                     cdashcnt = 0;
79                     c = in.read();
80                     st = "comment";
81                 } else if(namechar((char)c)) {
82                     st = "stag";
83                 } else if(c == '/') {
84                     c = in.read();
85                     st = "etag";
86                 } else if(c < 0) {
87                     throw(new ParseException("Unexpected end-of-file while parsing tag"));
88                 } else {
89                     throw(new ParseException("Unexpected character `" + printable((char)c) + "' encountered in tag name"));
90                 }
91             } else if(st == "stag") {
92                 boolean flush = false;
93                 if(namechar((char)c)) {
94                     buf.append((char)c);
95                     c = in.read();
96                 } else if(c == '>') {
97                     flush = true;
98                 } else if(Character.isWhitespace((char)c)) {
99                     flush = true;
100                     c = in.read();
101                 } else if(c < 0) {
102                     throw(new ParseException("Unexpected end-of-file while parsing tag name"));
103                 } else {
104                     throw(new ParseException("Unexpected character `" + printable((char)c) + "' encountered in tag name"));
105                 }
106                 if(flush) {
107                     Element n = makenode(doc, buf.toString());
108                     buf = new StringBuilder();
109                     stack.peek().appendChild(n);
110                     stack.push(n);
111                     st = "attr";
112                 }
113             } else if(st == "comment") {
114                 if(c == '-') {
115                     cdashcnt++;
116                     c = in.read();
117                 } else if((c == '>') && (cdashcnt == 4)) {
118                     stack.peek().appendChild(doc.createComment(buf.toString()));
119                     buf = new StringBuilder();
120                     st = "content";
121                     c = in.read();
122                 } else if(cdashcnt >= 2) {
123                     if(cdashcnt > 2)
124                         cdashcnt = 2;
125                     buf.append((char)c);
126                     c = in.read();
127                 } else if(c < 0) {
128                     throw(new ParseException("Unexpected end-of-file while parsing comment"));
129                 } else {
130                     throw(new ParseException("Unexpected character `" + printable((char)c) + "' encountered in comment"));
131                 }
132             } else if(st == "attr") {
133                 if(namechar((char)c)) {
134                     st = "aname";
135                 } else if(c == '>') {
136                     st = "content";
137                     c = in.read();
138                 } else if(c == '/') {
139                     st = "stagend";
140                     c = in.read();
141                 } else if(Character.isWhitespace((char)c)) {
142                     c = in.read();
143                 } else if(c < 0) {
144                     throw(new ParseException("Unexpected end-of-file while parsing attributes"));
145                 } else {
146                     throw(new ParseException("Unexpected character `" + printable((char)c) + "' encountered inside tag"));
147                 }
148             } else if(st == "stagend") {
149                 if(c == '>') {
150                     stack.pop();
151                     c = in.read();
152                     st = "content";
153                 } else if(Character.isWhitespace((char)c)) {
154                     c = in.read();
155                 } else if(c < 0) {
156                     throw(new ParseException("Unexpected end-of-file at end of empty tag"));
157                 } else {
158                     throw(new ParseException("Unexpected character `" + printable((char)c) + "' encountered at and of empty tag"));
159                 }
160             } else if(st == "aname") {
161                 if(namechar((char)c)) {
162                     buf.append((char)c);
163                     c = in.read();
164                 } else if(Character.isWhitespace((char)c)) {
165                     c = in.read();
166                 } else if(c == '=') {
167                     Element el = (Element)stack.peek();
168                     Attr attr = makeattr(doc, el, buf.toString());
169                     el.setAttributeNodeNS(attr);
170                     buf = new StringBuilder();
171                     stack.push(attr);
172                     st = "avalstart";
173                     c = in.read();
174                 } else if(c < 0) {
175                     throw(new ParseException("Unexpected end-of-file while parsing attribute name"));
176                 } else {
177                     throw(new ParseException("Unexpected character `" + printable((char)c) + "' encountered in attribute name"));
178                 }
179             } else if(st == "avalstart") {
180                 if((c == '\'') || (c == '"')) {
181                     atype = (char)c;
182                     c = in.read();
183                     st = "aval";
184                 } else if(Character.isWhitespace((char)c)) {
185                     c = in.read();
186                 } else if(c < 0) {
187                     throw(new ParseException("Unexpected end-of-file while parsing attribute value"));
188                 } else {
189                     throw(new ParseException("Unexpected character `" + printable((char)c) + "' encountered in attribute value"));
190                 }
191             } else if(st == "aval") {
192                 if(c == atype) {
193                     c = in.read();
194                     Attr a = (Attr)stack.pop();
195                     a.setValue(buf.toString());
196                     buf = new StringBuilder();
197                     st = "attr";
198                 } else if(c == '&') {
199                     c = in.read();
200                     st = "aent";
201                 } else if(c < 0) {
202                     throw(new ParseException("Unexpected end-of-file while parsing attribute value"));
203                 } else {
204                     buf.append((char)c);
205                     c = in.read();
206                 }
207             } else if(st == "etag") {
208                 if(namechar((char)c)) {
209                     buf.append((char)c);
210                     c = in.read();
211                 } else if(c == '>') {
212                     String nm = buf.toString();
213                     buf = new StringBuilder();
214                     Node n = stack.pop();
215                     if(n instanceof DocumentFragment)
216                         throw(new ParseException("Unexpected end tag for `" + nm + "' while parsing root content"));
217                     Element el = (Element)n;
218                     if(!nm.equals(el.getTagName()))
219                         throw(new ParseException("Unexpected end tag for `" + nm + "' while parsing `" + el.getTagName() + "'"));
220                     c = in.read();
221                     st = "content";
222                 } else if(c < 0) {
223                     throw(new ParseException("Unexpected end-of-file while parsing end tag"));
224                 } else {
225                     throw(new ParseException("Unexpected character `" + printable((char)c) + "' encountered in end tag"));
226                 }
227             } else if(st == "text") {
228                 boolean flush = false;
229                 if(c == '&') {
230                     st = "ent";
231                     c = in.read();
232                 } else if(c == '<') {
233                     flush = true;
234                     st = "content";
235                 } else if(c < 0) {
236                     flush = true;
237                     st = "content";
238                 } else {
239                     buf.append((char)c);
240                     c = in.read();
241                 }
242                 if(flush) {
243                     Text n = doc.createTextNode(buf.toString());
244                     buf = new StringBuilder();
245                     stack.peek().appendChild(n);
246                 }
247             } else if(st == "ent") {
248                 if(c == ';') {
249                     String ename = ebuf.toString();
250                     ebuf = new StringBuilder();
251                     String rep = entity(ename);
252                     if(rep == null)
253                         throw(new ParseException("Unknown entity `" + ename + "' encountered"));
254                     buf.append(rep);
255                     st = "text";
256                     c = in.read();
257                 } else if(c < 0) {
258                     throw(new ParseException("Unexpected end-of-file while parsing entity name"));
259                 } else if(namechar((char)c)) {
260                     ebuf.append((char)c);
261                     c = in.read();
262                 } else {
263                     throw(new ParseException("Unexpected character `" + printable((char)c) + "' encountered in entity name"));
264                 }
265             } else if(st == "aent") {
266                 if(c == ';') {
267                     String ename = ebuf.toString();
268                     ebuf = new StringBuilder();
269                     String rep = entity(ename);
270                     if(rep == null)
271                         throw(new ParseException("Unknown entity `" + ename + "' encountered"));
272                     buf.append(rep);
273                     st = "aval";
274                     c = in.read();
275                 } else if(c < 0) {
276                     throw(new ParseException("Unexpected end-of-file while parsing entity name"));
277                 } else if(namechar((char)c)) {
278                     ebuf.append((char)c);
279                     c = in.read();
280                 } else {
281                     throw(new ParseException("Unexpected character `" + printable((char)c) + "' encountered in entity name"));
282                 }
283             } else {
284                 throw(new Error("BUG: Typoed state " + st));
285             }
286         }
287     }
288     
289     private static String printable(char c) {
290         if(c < 32)
291             return(String.format("\\%03o", (int)c));
292         return(Character.toString(c));
293     }
294
295     public static void main(String[] args) throws Exception {
296         Parser p = new Parser();
297         DocumentFragment f = p.parse(new FileReader(args[0]));
298         javax.xml.transform.TransformerFactory fac = javax.xml.transform.TransformerFactory.newInstance();
299         fac.setAttribute("indent-number", 2);
300         javax.xml.transform.Transformer t = fac.newTransformer();
301         t.setOutputProperty(javax.xml.transform.OutputKeys.INDENT, "yes");
302         t.transform(new javax.xml.transform.dom.DOMSource(f), new javax.xml.transform.stream.StreamResult(System.out));
303         System.out.println(t.getClass());
304     }
305 }