Commit | Line | Data |
---|---|---|
a5e6bd24 FT |
1 | package dolda.jsvc.next; |
2 | ||
3 | import java.io.*; | |
4 | import java.util.*; | |
5 | import org.w3c.dom.*; | |
6 | import org.w3c.dom.bootstrap.*; | |
7 | ||
8 | public class Parser { | |
9 | private static final DOMImplementation domimp; | |
10 | ||
11 | static { | |
12 | DOMImplementationRegistry reg; | |
13 | try { | |
14 | reg = DOMImplementationRegistry.newInstance(); | |
15 | } catch(Exception e) { | |
16 | throw(new Error(e)); | |
17 | } | |
18 | DOMImplementation di = reg.getDOMImplementation(""); | |
19 | if(di == null) | |
20 | throw(new RuntimeException("Could not get a DOM implemenation")); | |
21 | domimp = di; | |
22 | } | |
23 | ||
7c0e72ac FT |
24 | public class State { |
25 | public final Document doc = domimp.createDocument(null, "dummy", null); | |
26 | public final PeekReader in; | |
27 | ||
28 | private State(Reader in) { | |
29 | this.in = new PeekReader(in); | |
30 | } | |
31 | } | |
32 | ||
a5e6bd24 FT |
33 | private static boolean namechar(char c) { |
34 | return((c == ':') || (c == '_') || (c == '$') || (c == '.') || (c == '-') || ((c >= '0') && (c <= '9')) || ((c >= 'A') && (c <= 'Z')) || ((c >= 'a') && (c <= 'z'))); | |
35 | } | |
36 | ||
37 | protected String entity(String name) { | |
38 | if(name.equals("amp")) | |
39 | return("&"); | |
40 | if(name.equals("lt")) | |
41 | return("<"); | |
42 | if(name.equals("gt")) | |
43 | return(">"); | |
44 | if(name.equals("apos")) | |
45 | return("'"); | |
46 | if(name.equals("quot")) | |
47 | return("\""); | |
48 | return(null); | |
49 | } | |
50 | ||
51 | protected Element makenode(Document doc, String name) { | |
52 | return(doc.createElementNS(null, name)); | |
53 | } | |
54 | ||
7c0e72ac FT |
55 | protected Attr makeattr(Document doc, Element el, String name, String val) { |
56 | Attr a = doc.createAttributeNS(el.getNamespaceURI(), name); | |
57 | a.setValue(val); | |
58 | return(a); | |
59 | } | |
60 | ||
a5e6bd24 FT |
61 | protected Attr makeattr(Document doc, Element el, String name) { |
62 | return(doc.createAttributeNS(el.getNamespaceURI(), name)); | |
63 | } | |
64 | ||
7c0e72ac | 65 | protected String name(State s) throws IOException { |
a5e6bd24 | 66 | StringBuilder buf = new StringBuilder(); |
a5e6bd24 | 67 | while(true) { |
7c0e72ac FT |
68 | int c = s.in.peek(); |
69 | if(c < 0) { | |
70 | break; | |
71 | } else if(namechar((char)c)) { | |
72 | buf.append((char)s.in.read()); | |
73 | } else { | |
74 | break; | |
75 | } | |
76 | } | |
77 | if(buf.length() == 0) | |
78 | throw(new ParseException("Expected name, got `" + printable(s.in.peek()) + "'")); | |
79 | return(buf.toString()); | |
80 | } | |
81 | ||
82 | protected String entity(State s) throws IOException { | |
83 | int c = s.in.read(); | |
84 | if(c != '&') | |
85 | throw(new ParseException("Expected `&' while reading entity, got `" + printable(c) + "'")); | |
86 | String nm = name(s); | |
87 | c = s.in.read(); | |
88 | if(c != ';') | |
89 | throw(new ParseException("Expected `;' while reading entity, got `" + printable(c) + "'")); | |
90 | return(entity(nm)); | |
91 | } | |
92 | ||
93 | protected Attr attribute(State s, Element el) throws IOException { | |
94 | String nm = name(s); | |
95 | s.in.peek(true); | |
96 | int c = s.in.read(); | |
97 | if(c != '=') | |
98 | throw(new ParseException("Expected `=' while reading attribute, got `" + printable(c) + "'")); | |
99 | s.in.peek(true); | |
100 | int qt = s.in.read(); | |
101 | if((qt != '"') && (qt != '\'')) | |
102 | throw(new ParseException("Expected double or single quote while reading attribute, got `" + printable(qt) + "'")); | |
103 | StringBuilder buf = new StringBuilder(); | |
104 | while(true) { | |
105 | c = s.in.peek(); | |
106 | if(c < 0) { | |
107 | throw(new ParseException("Unexpected end-of-file while reading attribute value")); | |
108 | } else if(c == qt) { | |
109 | s.in.read(); | |
110 | break; | |
111 | } else if(c == '&') { | |
112 | buf.append(entity(s)); | |
113 | } else { | |
114 | buf.append((char)s.in.read()); | |
115 | } | |
116 | } | |
117 | return(makeattr(s.doc, el, nm, buf.toString())); | |
118 | } | |
119 | ||
120 | protected Element element(State s) throws IOException { | |
121 | Element n = makenode(s.doc, name(s)); | |
122 | while(true) { | |
123 | int c = s.in.peek(true); | |
124 | if(c < 0) { | |
125 | throw(new ParseException("Unexpected end-of-file while parsing start tag")); | |
126 | } else if(c == '>') { | |
127 | s.in.read(); | |
128 | break; | |
129 | } else if(c == '/') { | |
130 | s.in.read(); | |
131 | s.in.peek(true); | |
132 | c = s.in.read(); | |
133 | if(c != '>') | |
134 | throw(new ParseException("Unexpected character `" + printable(c) + "' encountered in end of empty tag")); | |
135 | return(n); | |
136 | } else if(namechar((char)c)) { | |
137 | n.setAttributeNodeNS(attribute(s, n)); | |
138 | } else { | |
139 | throw(new ParseException("Unexpected character `" + printable(c) + "' encountered in start tag")); | |
140 | } | |
141 | } | |
142 | while(true) { | |
143 | int c = s.in.peek(); | |
144 | if(c < 0) { | |
145 | break; | |
146 | } else if(c == '<') { | |
147 | s.in.read(); | |
148 | c = s.in.peek(true); | |
149 | if(c == '/') { | |
150 | s.in.read(); | |
151 | s.in.peek(true); | |
152 | String nm = name(s); | |
153 | if(!nm.equals(n.getTagName())) | |
154 | throw(new ParseException("Unexpected end tag for `" + nm + "' while parsing `" + n.getTagName() + "'")); | |
155 | if(s.in.peek(true) != '>') | |
156 | throw(new ParseException("Expected `>' while reading end tag, got `" + printable(c) + "'")); | |
157 | s.in.read(); | |
158 | break; | |
a5e6bd24 | 159 | } else { |
7c0e72ac | 160 | n.appendChild(stag(s)); |
a5e6bd24 | 161 | } |
7c0e72ac FT |
162 | } else { |
163 | n.appendChild(text(s)); | |
164 | } | |
165 | } | |
166 | return(n); | |
167 | } | |
168 | ||
169 | protected Comment comment(State s) throws IOException { | |
170 | if((s.in.read() != '!') || | |
171 | (s.in.read() != '-') || | |
172 | (s.in.read() != '-')) | |
173 | throw(new ParseException("Illegal start of comment")); | |
174 | StringBuilder buf = new StringBuilder(); | |
175 | while(true) { | |
176 | int c = s.in.peek(); | |
177 | if(c < 0) { | |
178 | throw(new ParseException("Unexpected end-of-file while parsing comment")); | |
179 | } else if(c == '-') { | |
180 | s.in.read(); | |
181 | if(s.in.peek() == '-') { | |
182 | s.in.read(); | |
183 | if(s.in.peek() == '>') { | |
184 | s.in.read(); | |
185 | break; | |
186 | } else { | |
187 | buf.append("--"); | |
188 | } | |
a5e6bd24 | 189 | } else { |
7c0e72ac | 190 | buf.append("-"); |
a5e6bd24 FT |
191 | } |
192 | } else { | |
7c0e72ac | 193 | buf.append((char)s.in.read()); |
a5e6bd24 FT |
194 | } |
195 | } | |
7c0e72ac | 196 | return(s.doc.createComment(buf.toString())); |
a5e6bd24 | 197 | } |
7c0e72ac FT |
198 | |
199 | protected Node stag(State s) throws IOException { | |
200 | int c = s.in.peek(true); | |
201 | if(c < 0) { | |
202 | throw(new ParseException("Unexpected end-of-file while parsing tag type")); | |
203 | } else if(c == '!') { | |
204 | return(comment(s)); | |
205 | } else { | |
206 | return(element(s)); | |
207 | } | |
208 | } | |
209 | ||
210 | protected Text text(State s) throws IOException { | |
211 | StringBuilder buf = new StringBuilder(); | |
212 | while(true) { | |
213 | int c = s.in.peek(); | |
214 | if(c < 0) { | |
215 | break; | |
216 | } else if(c == '<') { | |
217 | break; | |
218 | } else if(c == '&') { | |
219 | buf.append(entity(s)); | |
220 | } else { | |
221 | buf.append((char)s.in.read()); | |
222 | } | |
223 | } | |
224 | return(s.doc.createTextNode(buf.toString())); | |
225 | } | |
226 | ||
227 | public DocumentFragment parse(Reader in) throws IOException { | |
228 | State s = new State(in); | |
229 | DocumentFragment frag = s.doc.createDocumentFragment(); | |
230 | while(true) { | |
231 | int c = s.in.peek(); | |
232 | if(c < 0) { | |
233 | return(frag); | |
234 | } else if(c == '<') { | |
235 | s.in.read(); | |
236 | frag.appendChild(stag(s)); | |
237 | } else { | |
238 | frag.appendChild(text(s)); | |
239 | } | |
240 | } | |
241 | } | |
242 | ||
243 | private static String printable(int c) { | |
244 | if(c < 0) | |
245 | return("EOF"); | |
a5e6bd24 FT |
246 | if(c < 32) |
247 | return(String.format("\\%03o", (int)c)); | |
7c0e72ac | 248 | return(Character.toString((char)c)); |
a5e6bd24 FT |
249 | } |
250 | ||
251 | public static void main(String[] args) throws Exception { | |
252 | Parser p = new Parser(); | |
253 | DocumentFragment f = p.parse(new FileReader(args[0])); | |
254 | javax.xml.transform.TransformerFactory fac = javax.xml.transform.TransformerFactory.newInstance(); | |
255 | fac.setAttribute("indent-number", 2); | |
256 | javax.xml.transform.Transformer t = fac.newTransformer(); | |
257 | t.setOutputProperty(javax.xml.transform.OutputKeys.INDENT, "yes"); | |
258 | t.transform(new javax.xml.transform.dom.DOMSource(f), new javax.xml.transform.stream.StreamResult(System.out)); | |
259 | System.out.println(t.getClass()); | |
260 | } | |
261 | } |