Corrected the parser's attribute handling.
[jsvc.git] / src / dolda / jsvc / next / Parser.java
CommitLineData
a5e6bd24
FT
1package dolda.jsvc.next;
2
3import java.io.*;
4import java.util.*;
5import org.w3c.dom.*;
a5e6bd24
FT
6
7public class Parser {
7c0e72ac 8 public class State {
816cbb00 9 public final Document doc = DomUtil.document(null, "dummy");
7c0e72ac
FT
10 public final PeekReader in;
11
12 private State(Reader in) {
13 this.in = new PeekReader(in);
14 }
15 }
16
a5e6bd24
FT
17 private static boolean namechar(char c) {
18 return((c == ':') || (c == '_') || (c == '$') || (c == '.') || (c == '-') || ((c >= '0') && (c <= '9')) || ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')));
19 }
20
21 protected String entity(String name) {
22 if(name.equals("amp"))
23 return("&");
24 if(name.equals("lt"))
25 return("<");
26 if(name.equals("gt"))
27 return(">");
28 if(name.equals("apos"))
29 return("'");
30 if(name.equals("quot"))
31 return("\"");
32 return(null);
33 }
34
35 protected Element makenode(Document doc, String name) {
36 return(doc.createElementNS(null, name));
37 }
38
7c0e72ac 39 protected String name(State s) throws IOException {
a5e6bd24 40 StringBuilder buf = new StringBuilder();
a5e6bd24 41 while(true) {
7c0e72ac
FT
42 int c = s.in.peek();
43 if(c < 0) {
44 break;
45 } else if(namechar((char)c)) {
46 buf.append((char)s.in.read());
47 } else {
48 break;
49 }
50 }
51 if(buf.length() == 0)
52 throw(new ParseException("Expected name, got `" + printable(s.in.peek()) + "'"));
53 return(buf.toString());
54 }
55
56 protected String entity(State s) throws IOException {
57 int c = s.in.read();
58 if(c != '&')
59 throw(new ParseException("Expected `&' while reading entity, got `" + printable(c) + "'"));
60 String nm = name(s);
61 c = s.in.read();
62 if(c != ';')
63 throw(new ParseException("Expected `;' while reading entity, got `" + printable(c) + "'"));
64 return(entity(nm));
65 }
66
67 protected Attr attribute(State s, Element el) throws IOException {
3de0fa23 68 Attr a = s.doc.createAttributeNS(null, name(s));
7c0e72ac
FT
69 s.in.peek(true);
70 int c = s.in.read();
71 if(c != '=')
72 throw(new ParseException("Expected `=' while reading attribute, got `" + printable(c) + "'"));
73 s.in.peek(true);
74 int qt = s.in.read();
75 if((qt != '"') && (qt != '\''))
76 throw(new ParseException("Expected double or single quote while reading attribute, got `" + printable(qt) + "'"));
77 StringBuilder buf = new StringBuilder();
78 while(true) {
79 c = s.in.peek();
80 if(c < 0) {
81 throw(new ParseException("Unexpected end-of-file while reading attribute value"));
82 } else if(c == qt) {
83 s.in.read();
84 break;
85 } else if(c == '&') {
86 buf.append(entity(s));
87 } else {
88 buf.append((char)s.in.read());
89 }
90 }
3de0fa23
FT
91 a.setValue(buf.toString());
92 return(a);
7c0e72ac
FT
93 }
94
95 protected Element element(State s) throws IOException {
96 Element n = makenode(s.doc, name(s));
97 while(true) {
98 int c = s.in.peek(true);
99 if(c < 0) {
100 throw(new ParseException("Unexpected end-of-file while parsing start tag"));
101 } else if(c == '>') {
102 s.in.read();
103 break;
104 } else if(c == '/') {
105 s.in.read();
106 s.in.peek(true);
107 c = s.in.read();
108 if(c != '>')
109 throw(new ParseException("Unexpected character `" + printable(c) + "' encountered in end of empty tag"));
110 return(n);
111 } else if(namechar((char)c)) {
112 n.setAttributeNodeNS(attribute(s, n));
113 } else {
114 throw(new ParseException("Unexpected character `" + printable(c) + "' encountered in start tag"));
115 }
116 }
117 while(true) {
118 int c = s.in.peek();
119 if(c < 0) {
120 break;
121 } else if(c == '<') {
122 s.in.read();
123 c = s.in.peek(true);
124 if(c == '/') {
125 s.in.read();
126 s.in.peek(true);
127 String nm = name(s);
128 if(!nm.equals(n.getTagName()))
129 throw(new ParseException("Unexpected end tag for `" + nm + "' while parsing `" + n.getTagName() + "'"));
130 if(s.in.peek(true) != '>')
131 throw(new ParseException("Expected `>' while reading end tag, got `" + printable(c) + "'"));
132 s.in.read();
133 break;
a5e6bd24 134 } else {
7c0e72ac 135 n.appendChild(stag(s));
a5e6bd24 136 }
7c0e72ac
FT
137 } else {
138 n.appendChild(text(s));
139 }
140 }
141 return(n);
142 }
143
144 protected Comment comment(State s) throws IOException {
145 if((s.in.read() != '!') ||
146 (s.in.read() != '-') ||
147 (s.in.read() != '-'))
148 throw(new ParseException("Illegal start of comment"));
149 StringBuilder buf = new StringBuilder();
150 while(true) {
151 int c = s.in.peek();
152 if(c < 0) {
153 throw(new ParseException("Unexpected end-of-file while parsing comment"));
154 } else if(c == '-') {
155 s.in.read();
156 if(s.in.peek() == '-') {
157 s.in.read();
158 if(s.in.peek() == '>') {
159 s.in.read();
160 break;
161 } else {
162 buf.append("--");
163 }
a5e6bd24 164 } else {
7c0e72ac 165 buf.append("-");
a5e6bd24
FT
166 }
167 } else {
7c0e72ac 168 buf.append((char)s.in.read());
a5e6bd24
FT
169 }
170 }
7c0e72ac 171 return(s.doc.createComment(buf.toString()));
a5e6bd24 172 }
7c0e72ac
FT
173
174 protected Node stag(State s) throws IOException {
175 int c = s.in.peek(true);
176 if(c < 0) {
177 throw(new ParseException("Unexpected end-of-file while parsing tag type"));
178 } else if(c == '!') {
179 return(comment(s));
180 } else {
181 return(element(s));
182 }
183 }
184
185 protected Text text(State s) throws IOException {
186 StringBuilder buf = new StringBuilder();
187 while(true) {
188 int c = s.in.peek();
189 if(c < 0) {
190 break;
191 } else if(c == '<') {
192 break;
193 } else if(c == '&') {
194 buf.append(entity(s));
195 } else {
196 buf.append((char)s.in.read());
197 }
198 }
199 return(s.doc.createTextNode(buf.toString()));
200 }
201
202 public DocumentFragment parse(Reader in) throws IOException {
203 State s = new State(in);
204 DocumentFragment frag = s.doc.createDocumentFragment();
205 while(true) {
206 int c = s.in.peek();
207 if(c < 0) {
208 return(frag);
209 } else if(c == '<') {
210 s.in.read();
211 frag.appendChild(stag(s));
212 } else {
213 frag.appendChild(text(s));
214 }
215 }
216 }
217
218 private static String printable(int c) {
219 if(c < 0)
220 return("EOF");
a5e6bd24
FT
221 if(c < 32)
222 return(String.format("\\%03o", (int)c));
7c0e72ac 223 return(Character.toString((char)c));
a5e6bd24
FT
224 }
225
226 public static void main(String[] args) throws Exception {
227 Parser p = new Parser();
228 DocumentFragment f = p.parse(new FileReader(args[0]));
229 javax.xml.transform.TransformerFactory fac = javax.xml.transform.TransformerFactory.newInstance();
230 fac.setAttribute("indent-number", 2);
231 javax.xml.transform.Transformer t = fac.newTransformer();
232 t.setOutputProperty(javax.xml.transform.OutputKeys.INDENT, "yes");
233 t.transform(new javax.xml.transform.dom.DOMSource(f), new javax.xml.transform.stream.StreamResult(System.out));
234 System.out.println(t.getClass());
235 }
236}