WIP: Convenient document creation utilities.
[jsvc.git] / src / dolda / jsvc / next / Parser.java
CommitLineData
a5e6bd24
FT
1package dolda.jsvc.next;
2
3import java.io.*;
4import java.util.*;
5import org.w3c.dom.*;
a5e6bd24
FT
6
7public class Parser {
7c0e72ac 8 public class State {
816cbb00 9 public final Document doc = DomUtil.document(null, "dummy");
7c0e72ac
FT
10 public final PeekReader in;
11
12 private State(Reader in) {
13 this.in = new PeekReader(in);
14 }
15 }
16
a5e6bd24
FT
17 private static boolean namechar(char c) {
18 return((c == ':') || (c == '_') || (c == '$') || (c == '.') || (c == '-') || ((c >= '0') && (c <= '9')) || ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z')));
19 }
20
21 protected String entity(String name) {
22 if(name.equals("amp"))
23 return("&");
24 if(name.equals("lt"))
25 return("<");
26 if(name.equals("gt"))
27 return(">");
28 if(name.equals("apos"))
29 return("'");
30 if(name.equals("quot"))
31 return("\"");
32 return(null);
33 }
34
35 protected Element makenode(Document doc, String name) {
36 return(doc.createElementNS(null, name));
37 }
38
7c0e72ac
FT
39 protected Attr makeattr(Document doc, Element el, String name, String val) {
40 Attr a = doc.createAttributeNS(el.getNamespaceURI(), name);
41 a.setValue(val);
42 return(a);
43 }
44
a5e6bd24
FT
45 protected Attr makeattr(Document doc, Element el, String name) {
46 return(doc.createAttributeNS(el.getNamespaceURI(), name));
47 }
48
7c0e72ac 49 protected String name(State s) throws IOException {
a5e6bd24 50 StringBuilder buf = new StringBuilder();
a5e6bd24 51 while(true) {
7c0e72ac
FT
52 int c = s.in.peek();
53 if(c < 0) {
54 break;
55 } else if(namechar((char)c)) {
56 buf.append((char)s.in.read());
57 } else {
58 break;
59 }
60 }
61 if(buf.length() == 0)
62 throw(new ParseException("Expected name, got `" + printable(s.in.peek()) + "'"));
63 return(buf.toString());
64 }
65
66 protected String entity(State s) throws IOException {
67 int c = s.in.read();
68 if(c != '&')
69 throw(new ParseException("Expected `&' while reading entity, got `" + printable(c) + "'"));
70 String nm = name(s);
71 c = s.in.read();
72 if(c != ';')
73 throw(new ParseException("Expected `;' while reading entity, got `" + printable(c) + "'"));
74 return(entity(nm));
75 }
76
77 protected Attr attribute(State s, Element el) throws IOException {
78 String nm = name(s);
79 s.in.peek(true);
80 int c = s.in.read();
81 if(c != '=')
82 throw(new ParseException("Expected `=' while reading attribute, got `" + printable(c) + "'"));
83 s.in.peek(true);
84 int qt = s.in.read();
85 if((qt != '"') && (qt != '\''))
86 throw(new ParseException("Expected double or single quote while reading attribute, got `" + printable(qt) + "'"));
87 StringBuilder buf = new StringBuilder();
88 while(true) {
89 c = s.in.peek();
90 if(c < 0) {
91 throw(new ParseException("Unexpected end-of-file while reading attribute value"));
92 } else if(c == qt) {
93 s.in.read();
94 break;
95 } else if(c == '&') {
96 buf.append(entity(s));
97 } else {
98 buf.append((char)s.in.read());
99 }
100 }
101 return(makeattr(s.doc, el, nm, buf.toString()));
102 }
103
104 protected Element element(State s) throws IOException {
105 Element n = makenode(s.doc, name(s));
106 while(true) {
107 int c = s.in.peek(true);
108 if(c < 0) {
109 throw(new ParseException("Unexpected end-of-file while parsing start tag"));
110 } else if(c == '>') {
111 s.in.read();
112 break;
113 } else if(c == '/') {
114 s.in.read();
115 s.in.peek(true);
116 c = s.in.read();
117 if(c != '>')
118 throw(new ParseException("Unexpected character `" + printable(c) + "' encountered in end of empty tag"));
119 return(n);
120 } else if(namechar((char)c)) {
121 n.setAttributeNodeNS(attribute(s, n));
122 } else {
123 throw(new ParseException("Unexpected character `" + printable(c) + "' encountered in start tag"));
124 }
125 }
126 while(true) {
127 int c = s.in.peek();
128 if(c < 0) {
129 break;
130 } else if(c == '<') {
131 s.in.read();
132 c = s.in.peek(true);
133 if(c == '/') {
134 s.in.read();
135 s.in.peek(true);
136 String nm = name(s);
137 if(!nm.equals(n.getTagName()))
138 throw(new ParseException("Unexpected end tag for `" + nm + "' while parsing `" + n.getTagName() + "'"));
139 if(s.in.peek(true) != '>')
140 throw(new ParseException("Expected `>' while reading end tag, got `" + printable(c) + "'"));
141 s.in.read();
142 break;
a5e6bd24 143 } else {
7c0e72ac 144 n.appendChild(stag(s));
a5e6bd24 145 }
7c0e72ac
FT
146 } else {
147 n.appendChild(text(s));
148 }
149 }
150 return(n);
151 }
152
153 protected Comment comment(State s) throws IOException {
154 if((s.in.read() != '!') ||
155 (s.in.read() != '-') ||
156 (s.in.read() != '-'))
157 throw(new ParseException("Illegal start of comment"));
158 StringBuilder buf = new StringBuilder();
159 while(true) {
160 int c = s.in.peek();
161 if(c < 0) {
162 throw(new ParseException("Unexpected end-of-file while parsing comment"));
163 } else if(c == '-') {
164 s.in.read();
165 if(s.in.peek() == '-') {
166 s.in.read();
167 if(s.in.peek() == '>') {
168 s.in.read();
169 break;
170 } else {
171 buf.append("--");
172 }
a5e6bd24 173 } else {
7c0e72ac 174 buf.append("-");
a5e6bd24
FT
175 }
176 } else {
7c0e72ac 177 buf.append((char)s.in.read());
a5e6bd24
FT
178 }
179 }
7c0e72ac 180 return(s.doc.createComment(buf.toString()));
a5e6bd24 181 }
7c0e72ac
FT
182
183 protected Node stag(State s) throws IOException {
184 int c = s.in.peek(true);
185 if(c < 0) {
186 throw(new ParseException("Unexpected end-of-file while parsing tag type"));
187 } else if(c == '!') {
188 return(comment(s));
189 } else {
190 return(element(s));
191 }
192 }
193
194 protected Text text(State s) throws IOException {
195 StringBuilder buf = new StringBuilder();
196 while(true) {
197 int c = s.in.peek();
198 if(c < 0) {
199 break;
200 } else if(c == '<') {
201 break;
202 } else if(c == '&') {
203 buf.append(entity(s));
204 } else {
205 buf.append((char)s.in.read());
206 }
207 }
208 return(s.doc.createTextNode(buf.toString()));
209 }
210
211 public DocumentFragment parse(Reader in) throws IOException {
212 State s = new State(in);
213 DocumentFragment frag = s.doc.createDocumentFragment();
214 while(true) {
215 int c = s.in.peek();
216 if(c < 0) {
217 return(frag);
218 } else if(c == '<') {
219 s.in.read();
220 frag.appendChild(stag(s));
221 } else {
222 frag.appendChild(text(s));
223 }
224 }
225 }
226
227 private static String printable(int c) {
228 if(c < 0)
229 return("EOF");
a5e6bd24
FT
230 if(c < 32)
231 return(String.format("\\%03o", (int)c));
7c0e72ac 232 return(Character.toString((char)c));
a5e6bd24
FT
233 }
234
235 public static void main(String[] args) throws Exception {
236 Parser p = new Parser();
237 DocumentFragment f = p.parse(new FileReader(args[0]));
238 javax.xml.transform.TransformerFactory fac = javax.xml.transform.TransformerFactory.newInstance();
239 fac.setAttribute("indent-number", 2);
240 javax.xml.transform.Transformer t = fac.newTransformer();
241 t.setOutputProperty(javax.xml.transform.OutputKeys.INDENT, "yes");
242 t.transform(new javax.xml.transform.dom.DOMSource(f), new javax.xml.transform.stream.StreamResult(System.out));
243 System.out.println(t.getClass());
244 }
245}