1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46 package groovy.util;
47
48 import groovy.xml.QName;
49
50 import java.io.File;
51 import java.io.FileInputStream;
52 import java.io.IOException;
53 import java.io.InputStream;
54 import java.io.Reader;
55 import java.io.StringReader;
56 import java.security.AccessController;
57 import java.security.PrivilegedActionException;
58 import java.security.PrivilegedExceptionAction;
59 import java.util.ArrayList;
60 import java.util.HashMap;
61 import java.util.List;
62 import java.util.Map;
63
64 import javax.xml.parsers.ParserConfigurationException;
65 import javax.xml.parsers.SAXParser;
66 import javax.xml.parsers.SAXParserFactory;
67
68 import org.xml.sax.Attributes;
69 import org.xml.sax.ContentHandler;
70 import org.xml.sax.InputSource;
71 import org.xml.sax.Locator;
72 import org.xml.sax.SAXException;
73 import org.xml.sax.XMLReader;
74
75 /***
76 * A helper class for parsing XML into a tree of Node instances for
77 * a simple way of processing XML. This parser does not preserve the
78 * XML InfoSet - if thats what you need try using W3C DOM, dom4j, JDOM, XOM etc.
79 * This parser ignores comments and processing instructions and converts the
80 * XML into a Node for each element in the XML with attributes
81 * and child Nodes and Strings. This simple model is sufficient for
82 * most simple use cases of processing XML.
83 *
84 * @author <a href="mailto:james@coredevelopers.net">James Strachan</a>
85 * @version $Revision: 1.4 $
86 */
87 public class XmlParser implements ContentHandler {
88
89 private StringBuffer bodyText = new StringBuffer();
90 private List stack = new ArrayList();
91 private Locator locator;
92 private XMLReader reader;
93 private Node parent;
94 private boolean trimWhitespace = true;
95
96 public XmlParser() throws ParserConfigurationException, SAXException {
97 this(false, true);
98 }
99
100 public XmlParser(boolean validating, boolean namespaceAware) throws ParserConfigurationException, SAXException {
101 SAXParserFactory factory = null;
102 try {
103 factory = (SAXParserFactory) AccessController.doPrivileged(new PrivilegedExceptionAction() {
104 public Object run() throws ParserConfigurationException {
105 return SAXParserFactory.newInstance();
106 }
107 });
108 } catch (PrivilegedActionException pae) {
109 Exception e = pae.getException();
110 if (e instanceof ParserConfigurationException) {
111 throw (ParserConfigurationException) e;
112 } else {
113 throw new RuntimeException(e);
114 }
115 }
116 factory.setNamespaceAware(namespaceAware);
117 factory.setValidating(validating);
118
119 SAXParser parser = factory.newSAXParser();
120 reader = parser.getXMLReader();
121 }
122
123 public XmlParser(XMLReader reader) {
124 this.reader = reader;
125 }
126
127 public XmlParser(SAXParser parser) throws SAXException {
128 reader = parser.getXMLReader();
129 }
130
131
132 /***
133 * Parses the content of the given file as XML turning it into a tree
134 * of Nodes
135 */
136 public Node parse(File file) throws IOException, SAXException {
137
138 InputSource input = new InputSource(new FileInputStream(file));
139 input.setSystemId("file://" + file.getAbsolutePath());
140 getXMLReader().parse(input);
141 return parent;
142
143 }
144
145 /***
146 * Parse the content of the specified input source into a tree of Nodes.
147 */
148 public Node parse(InputSource input) throws IOException, SAXException {
149 getXMLReader().parse(input);
150 return parent;
151 }
152
153 /***
154 * Parse the content of the specified input stream into a tree of Nodes.
155 * Note that using this method will not provide the parser with any URI
156 * for which to find DTDs etc
157 */
158 public Node parse(InputStream input) throws IOException, SAXException {
159 InputSource is = new InputSource(input);
160 getXMLReader().parse(is);
161 return parent;
162 }
163
164 /***
165 * Parse the content of the specified reader into a tree of Nodes.
166 * Note that using this method will not provide the parser with any URI
167 * for which to find DTDs etc
168 */
169 public Node parse(Reader in) throws IOException, SAXException {
170 InputSource is = new InputSource(in);
171 getXMLReader().parse(is);
172 return parent;
173 }
174
175 /***
176 * Parse the content of the specified URI into a tree of Nodes
177 */
178 public Node parse(String uri) throws IOException, SAXException {
179 InputSource is = new InputSource(uri);
180 getXMLReader().parse(is);
181 return parent;
182 }
183
184 /***
185 * A helper method to parse the given text as XML
186 *
187 * @param text
188 * @return
189 */
190 public Node parseText(String text) throws IOException, SAXException {
191 return parse(new StringReader(text));
192 }
193
194
195
196
197 public void startDocument() throws SAXException {
198 parent = null;
199 }
200
201 public void endDocument() throws SAXException {
202 stack.clear();
203 }
204
205 public void startElement(String namespaceURI, String localName, String qName, Attributes list)
206 throws SAXException {
207 addTextToNode();
208
209 Object name = getElementName(namespaceURI, localName, qName);
210
211 int size = list.getLength();
212 Map attributes = new HashMap(size);
213 for (int i = 0; i < size; i++) {
214 Object attributeName = getElementName(list.getURI(i), list.getLocalName(i), list.getQName(i));
215 String value = list.getValue(i);
216 attributes.put(attributeName, value);
217 }
218 parent = new Node(parent, name, attributes, new ArrayList());
219 stack.add(parent);
220 }
221
222 public void endElement(String namespaceURI, String localName, String qName) throws SAXException {
223 addTextToNode();
224
225 if (!stack.isEmpty()) {
226 stack.remove(stack.size() - 1);
227 if (!stack.isEmpty()) {
228 parent = (Node) stack.get(stack.size() - 1);
229 }
230 }
231 }
232
233 public void characters(char buffer[], int start, int length) throws SAXException {
234 bodyText.append(buffer, start, length);
235 }
236
237 public void startPrefixMapping(String prefix, String namespaceURI) throws SAXException {
238 }
239
240 public void endPrefixMapping(String prefix) throws SAXException {
241 }
242
243 public void ignorableWhitespace(char buffer[], int start, int len) throws SAXException {
244 }
245
246 public void processingInstruction(String target, String data) throws SAXException {
247 }
248
249 public Locator getDocumentLocator() {
250 return locator;
251 }
252
253 public void setDocumentLocator(Locator locator) {
254 this.locator = locator;
255 }
256
257 public void skippedEntity(String name) throws SAXException {
258 }
259
260
261
262 protected XMLReader getXMLReader() {
263 reader.setContentHandler(this);
264 return reader;
265 }
266
267 protected void addTextToNode() {
268 String text = bodyText.toString();
269 if (trimWhitespace) {
270 text = text.trim();
271 }
272 if (text.length() > 0) {
273 parent.children().add(text);
274 }
275 bodyText = new StringBuffer();
276 }
277
278 protected Object getElementName(String namespaceURI, String localName, String qName) throws SAXException {
279 String name = localName;
280 if ((name == null) || (name.length() < 1)) {
281 name = qName;
282 }
283 if (namespaceURI == null || namespaceURI.length() <= 0) {
284 return name;
285 }
286 else {
287 return new QName(namespaceURI, name, qName);
288 }
289 }
290 }