Replaced the NEXT parser with a new, better, recursive-decent one.
[jsvc.git] / src / dolda / jsvc / next / Parser.java
CommitLineData
a5e6bd24
FT
1package dolda.jsvc.next;
2
3import java.io.*;
4import java.util.*;
5import org.w3c.dom.*;
6import org.w3c.dom.bootstrap.*;
7
8public class Parser {
9 private static final DOMImplementation domimp;
10
11 static {
12 DOMImplementationRegistry reg;
13 try {
14 reg = DOMImplementationRegistry.newInstance();
15 } catch(Exception e) {
16 throw(new Error(e));
17 }
18 DOMImplementation di = reg.getDOMImplementation("");
19 if(di == null)
20 throw(new RuntimeException("Could not get a DOM implemenation"));
21 domimp = di;
22 }
23
7c0e72ac
FT
24 public class State {
25 public final Document doc = domimp.createDocument(null, "dummy", null);
26 public final PeekReader in;
27
28 private State(Reader in) {
29 this.in = new PeekReader(in);
30 }
31 }
32
a5e6bd24
FT
33 private static boolean namechar(char c) {
34 return((c == ':') || (c == '_') || (c == '$') || (c == '.') || (c == '-') || ((c >= '0') && (c <= '9')) || ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')));
35 }
36
37 protected String entity(String name) {
38 if(name.equals("amp"))
39 return("&");
40 if(name.equals("lt"))
41 return("<");
42 if(name.equals("gt"))
43 return(">");
44 if(name.equals("apos"))
45 return("'");
46 if(name.equals("quot"))
47 return("\"");
48 return(null);
49 }
50
51 protected Element makenode(Document doc, String name) {
52 return(doc.createElementNS(null, name));
53 }
54
7c0e72ac
FT
55 protected Attr makeattr(Document doc, Element el, String name, String val) {
56 Attr a = doc.createAttributeNS(el.getNamespaceURI(), name);
57 a.setValue(val);
58 return(a);
59 }
60
a5e6bd24
FT
61 protected Attr makeattr(Document doc, Element el, String name) {
62 return(doc.createAttributeNS(el.getNamespaceURI(), name));
63 }
64
7c0e72ac 65 protected String name(State s) throws IOException {
a5e6bd24 66 StringBuilder buf = new StringBuilder();
a5e6bd24 67 while(true) {
7c0e72ac
FT
68 int c = s.in.peek();
69 if(c < 0) {
70 break;
71 } else if(namechar((char)c)) {
72 buf.append((char)s.in.read());
73 } else {
74 break;
75 }
76 }
77 if(buf.length() == 0)
78 throw(new ParseException("Expected name, got `" + printable(s.in.peek()) + "'"));
79 return(buf.toString());
80 }
81
82 protected String entity(State s) throws IOException {
83 int c = s.in.read();
84 if(c != '&')
85 throw(new ParseException("Expected `&' while reading entity, got `" + printable(c) + "'"));
86 String nm = name(s);
87 c = s.in.read();
88 if(c != ';')
89 throw(new ParseException("Expected `;' while reading entity, got `" + printable(c) + "'"));
90 return(entity(nm));
91 }
92
93 protected Attr attribute(State s, Element el) throws IOException {
94 String nm = name(s);
95 s.in.peek(true);
96 int c = s.in.read();
97 if(c != '=')
98 throw(new ParseException("Expected `=' while reading attribute, got `" + printable(c) + "'"));
99 s.in.peek(true);
100 int qt = s.in.read();
101 if((qt != '"') && (qt != '\''))
102 throw(new ParseException("Expected double or single quote while reading attribute, got `" + printable(qt) + "'"));
103 StringBuilder buf = new StringBuilder();
104 while(true) {
105 c = s.in.peek();
106 if(c < 0) {
107 throw(new ParseException("Unexpected end-of-file while reading attribute value"));
108 } else if(c == qt) {
109 s.in.read();
110 break;
111 } else if(c == '&') {
112 buf.append(entity(s));
113 } else {
114 buf.append((char)s.in.read());
115 }
116 }
117 return(makeattr(s.doc, el, nm, buf.toString()));
118 }
119
120 protected Element element(State s) throws IOException {
121 Element n = makenode(s.doc, name(s));
122 while(true) {
123 int c = s.in.peek(true);
124 if(c < 0) {
125 throw(new ParseException("Unexpected end-of-file while parsing start tag"));
126 } else if(c == '>') {
127 s.in.read();
128 break;
129 } else if(c == '/') {
130 s.in.read();
131 s.in.peek(true);
132 c = s.in.read();
133 if(c != '>')
134 throw(new ParseException("Unexpected character `" + printable(c) + "' encountered in end of empty tag"));
135 return(n);
136 } else if(namechar((char)c)) {
137 n.setAttributeNodeNS(attribute(s, n));
138 } else {
139 throw(new ParseException("Unexpected character `" + printable(c) + "' encountered in start tag"));
140 }
141 }
142 while(true) {
143 int c = s.in.peek();
144 if(c < 0) {
145 break;
146 } else if(c == '<') {
147 s.in.read();
148 c = s.in.peek(true);
149 if(c == '/') {
150 s.in.read();
151 s.in.peek(true);
152 String nm = name(s);
153 if(!nm.equals(n.getTagName()))
154 throw(new ParseException("Unexpected end tag for `" + nm + "' while parsing `" + n.getTagName() + "'"));
155 if(s.in.peek(true) != '>')
156 throw(new ParseException("Expected `>' while reading end tag, got `" + printable(c) + "'"));
157 s.in.read();
158 break;
a5e6bd24 159 } else {
7c0e72ac 160 n.appendChild(stag(s));
a5e6bd24 161 }
7c0e72ac
FT
162 } else {
163 n.appendChild(text(s));
164 }
165 }
166 return(n);
167 }
168
169 protected Comment comment(State s) throws IOException {
170 if((s.in.read() != '!') ||
171 (s.in.read() != '-') ||
172 (s.in.read() != '-'))
173 throw(new ParseException("Illegal start of comment"));
174 StringBuilder buf = new StringBuilder();
175 while(true) {
176 int c = s.in.peek();
177 if(c < 0) {
178 throw(new ParseException("Unexpected end-of-file while parsing comment"));
179 } else if(c == '-') {
180 s.in.read();
181 if(s.in.peek() == '-') {
182 s.in.read();
183 if(s.in.peek() == '>') {
184 s.in.read();
185 break;
186 } else {
187 buf.append("--");
188 }
a5e6bd24 189 } else {
7c0e72ac 190 buf.append("-");
a5e6bd24
FT
191 }
192 } else {
7c0e72ac 193 buf.append((char)s.in.read());
a5e6bd24
FT
194 }
195 }
7c0e72ac 196 return(s.doc.createComment(buf.toString()));
a5e6bd24 197 }
7c0e72ac
FT
198
199 protected Node stag(State s) throws IOException {
200 int c = s.in.peek(true);
201 if(c < 0) {
202 throw(new ParseException("Unexpected end-of-file while parsing tag type"));
203 } else if(c == '!') {
204 return(comment(s));
205 } else {
206 return(element(s));
207 }
208 }
209
210 protected Text text(State s) throws IOException {
211 StringBuilder buf = new StringBuilder();
212 while(true) {
213 int c = s.in.peek();
214 if(c < 0) {
215 break;
216 } else if(c == '<') {
217 break;
218 } else if(c == '&') {
219 buf.append(entity(s));
220 } else {
221 buf.append((char)s.in.read());
222 }
223 }
224 return(s.doc.createTextNode(buf.toString()));
225 }
226
227 public DocumentFragment parse(Reader in) throws IOException {
228 State s = new State(in);
229 DocumentFragment frag = s.doc.createDocumentFragment();
230 while(true) {
231 int c = s.in.peek();
232 if(c < 0) {
233 return(frag);
234 } else if(c == '<') {
235 s.in.read();
236 frag.appendChild(stag(s));
237 } else {
238 frag.appendChild(text(s));
239 }
240 }
241 }
242
243 private static String printable(int c) {
244 if(c < 0)
245 return("EOF");
a5e6bd24
FT
246 if(c < 32)
247 return(String.format("\\%03o", (int)c));
7c0e72ac 248 return(Character.toString((char)c));
a5e6bd24
FT
249 }
250
251 public static void main(String[] args) throws Exception {
252 Parser p = new Parser();
253 DocumentFragment f = p.parse(new FileReader(args[0]));
254 javax.xml.transform.TransformerFactory fac = javax.xml.transform.TransformerFactory.newInstance();
255 fac.setAttribute("indent-number", 2);
256 javax.xml.transform.Transformer t = fac.newTransformer();
257 t.setOutputProperty(javax.xml.transform.OutputKeys.INDENT, "yes");
258 t.transform(new javax.xml.transform.dom.DOMSource(f), new javax.xml.transform.stream.StreamResult(System.out));
259 System.out.println(t.getClass());
260 }
261}