1 | package org.json; |
---|
2 | |
---|
3 | /* |
---|
4 | Copyright (c) 2002 JSON.org |
---|
5 | |
---|
6 | Permission is hereby granted, free of charge, to any person obtaining a copy |
---|
7 | of this software and associated documentation files (the "Software"), to deal |
---|
8 | in the Software without restriction, including without limitation the rights |
---|
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
---|
10 | copies of the Software, and to permit persons to whom the Software is |
---|
11 | furnished to do so, subject to the following conditions: |
---|
12 | |
---|
13 | The above copyright notice and this permission notice shall be included in all |
---|
14 | copies or substantial portions of the Software. |
---|
15 | |
---|
16 | The Software shall be used for Good, not Evil. |
---|
17 | |
---|
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
---|
19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
---|
20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
---|
21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
---|
22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
---|
23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
---|
24 | SOFTWARE. |
---|
25 | */ |
---|
26 | |
---|
27 | /** |
---|
28 | * The XMLTokener extends the JSONTokener to provide additional methods |
---|
29 | * for the parsing of XML texts. |
---|
30 | * @author JSON.org |
---|
31 | * @version 2008-09-18 |
---|
32 | */ |
---|
33 | public class XMLTokener extends JSONTokener { |
---|
34 | |
---|
35 | |
---|
36 | /** The table of entity values. It initially contains Character values for |
---|
37 | * amp, apos, gt, lt, quot. |
---|
38 | */ |
---|
39 | public static final java.util.HashMap entity; |
---|
40 | |
---|
41 | static { |
---|
42 | entity = new java.util.HashMap(8); |
---|
43 | entity.put("amp", XML.AMP); |
---|
44 | entity.put("apos", XML.APOS); |
---|
45 | entity.put("gt", XML.GT); |
---|
46 | entity.put("lt", XML.LT); |
---|
47 | entity.put("quot", XML.QUOT); |
---|
48 | } |
---|
49 | |
---|
50 | /** |
---|
51 | * Construct an XMLTokener from a string. |
---|
52 | * @param s A source string. |
---|
53 | */ |
---|
54 | public XMLTokener(String s) { |
---|
55 | super(s); |
---|
56 | } |
---|
57 | |
---|
58 | /** |
---|
59 | * Get the text in the CDATA block. |
---|
60 | * @return The string up to the <code>]]></code>. |
---|
61 | * @throws JSONException If the <code>]]></code> is not found. |
---|
62 | */ |
---|
63 | public String nextCDATA() throws JSONException { |
---|
64 | char c; |
---|
65 | int i; |
---|
66 | StringBuffer sb = new StringBuffer(); |
---|
67 | for (;;) { |
---|
68 | c = next(); |
---|
69 | if (c == 0) { |
---|
70 | throw syntaxError("Unclosed CDATA"); |
---|
71 | } |
---|
72 | sb.append(c); |
---|
73 | i = sb.length() - 3; |
---|
74 | if (i >= 0 && sb.charAt(i) == ']' && |
---|
75 | sb.charAt(i + 1) == ']' && sb.charAt(i + 2) == '>') { |
---|
76 | sb.setLength(i); |
---|
77 | return sb.toString(); |
---|
78 | } |
---|
79 | } |
---|
80 | } |
---|
81 | |
---|
82 | |
---|
83 | /** |
---|
84 | * Get the next XML outer token, trimming whitespace. There are two kinds |
---|
85 | * of tokens: the '<' character which begins a markup tag, and the content |
---|
86 | * text between markup tags. |
---|
87 | * |
---|
88 | * @return A string, or a '<' Character, or null if there is no more |
---|
89 | * source text. |
---|
90 | * @throws JSONException |
---|
91 | */ |
---|
92 | public Object nextContent() throws JSONException { |
---|
93 | char c; |
---|
94 | StringBuffer sb; |
---|
95 | do { |
---|
96 | c = next(); |
---|
97 | } while (Character.isWhitespace(c)); |
---|
98 | if (c == 0) { |
---|
99 | return null; |
---|
100 | } |
---|
101 | if (c == '<') { |
---|
102 | return XML.LT; |
---|
103 | } |
---|
104 | sb = new StringBuffer(); |
---|
105 | for (;;) { |
---|
106 | if (c == '<' || c == 0) { |
---|
107 | back(); |
---|
108 | return sb.toString().trim(); |
---|
109 | } |
---|
110 | if (c == '&') { |
---|
111 | sb.append(nextEntity(c)); |
---|
112 | } else { |
---|
113 | sb.append(c); |
---|
114 | } |
---|
115 | c = next(); |
---|
116 | } |
---|
117 | } |
---|
118 | |
---|
119 | |
---|
120 | /** |
---|
121 | * Return the next entity. These entities are translated to Characters: |
---|
122 | * <code>& ' > < "</code>. |
---|
123 | * @param a An ampersand character. |
---|
124 | * @return A Character or an entity String if the entity is not recognized. |
---|
125 | * @throws JSONException If missing ';' in XML entity. |
---|
126 | */ |
---|
127 | public Object nextEntity(char a) throws JSONException { |
---|
128 | StringBuffer sb = new StringBuffer(); |
---|
129 | for (;;) { |
---|
130 | char c = next(); |
---|
131 | if (Character.isLetterOrDigit(c) || c == '#') { |
---|
132 | sb.append(Character.toLowerCase(c)); |
---|
133 | } else if (c == ';') { |
---|
134 | break; |
---|
135 | } else { |
---|
136 | throw syntaxError("Missing ';' in XML entity: &" + sb); |
---|
137 | } |
---|
138 | } |
---|
139 | String s = sb.toString(); |
---|
140 | Object e = entity.get(s); |
---|
141 | return e != null ? e : a + s + ";"; |
---|
142 | } |
---|
143 | |
---|
144 | |
---|
145 | /** |
---|
146 | * Returns the next XML meta token. This is used for skipping over <!...> |
---|
147 | * and <?...?> structures. |
---|
148 | * @return Syntax characters (<code>< > / = ! ?</code>) are returned as |
---|
149 | * Character, and strings and names are returned as Boolean. We don't care |
---|
150 | * what the values actually are. |
---|
151 | * @throws JSONException If a string is not properly closed or if the XML |
---|
152 | * is badly structured. |
---|
153 | */ |
---|
154 | public Object nextMeta() throws JSONException { |
---|
155 | char c; |
---|
156 | char q; |
---|
157 | do { |
---|
158 | c = next(); |
---|
159 | } while (Character.isWhitespace(c)); |
---|
160 | switch (c) { |
---|
161 | case 0: |
---|
162 | throw syntaxError("Misshaped meta tag"); |
---|
163 | case '<': |
---|
164 | return XML.LT; |
---|
165 | case '>': |
---|
166 | return XML.GT; |
---|
167 | case '/': |
---|
168 | return XML.SLASH; |
---|
169 | case '=': |
---|
170 | return XML.EQ; |
---|
171 | case '!': |
---|
172 | return XML.BANG; |
---|
173 | case '?': |
---|
174 | return XML.QUEST; |
---|
175 | case '"': |
---|
176 | case '\'': |
---|
177 | q = c; |
---|
178 | for (;;) { |
---|
179 | c = next(); |
---|
180 | if (c == 0) { |
---|
181 | throw syntaxError("Unterminated string"); |
---|
182 | } |
---|
183 | if (c == q) { |
---|
184 | return Boolean.TRUE; |
---|
185 | } |
---|
186 | } |
---|
187 | default: |
---|
188 | for (;;) { |
---|
189 | c = next(); |
---|
190 | if (Character.isWhitespace(c)) { |
---|
191 | return Boolean.TRUE; |
---|
192 | } |
---|
193 | switch (c) { |
---|
194 | case 0: |
---|
195 | case '<': |
---|
196 | case '>': |
---|
197 | case '/': |
---|
198 | case '=': |
---|
199 | case '!': |
---|
200 | case '?': |
---|
201 | case '"': |
---|
202 | case '\'': |
---|
203 | back(); |
---|
204 | return Boolean.TRUE; |
---|
205 | } |
---|
206 | } |
---|
207 | } |
---|
208 | } |
---|
209 | |
---|
210 | |
---|
211 | /** |
---|
212 | * Get the next XML Token. These tokens are found inside of angle |
---|
213 | * brackets. It may be one of these characters: <code>/ > = ! ?</code> or it |
---|
214 | * may be a string wrapped in single quotes or double quotes, or it may be a |
---|
215 | * name. |
---|
216 | * @return a String or a Character. |
---|
217 | * @throws JSONException If the XML is not well formed. |
---|
218 | */ |
---|
219 | public Object nextToken() throws JSONException { |
---|
220 | char c; |
---|
221 | char q; |
---|
222 | StringBuffer sb; |
---|
223 | do { |
---|
224 | c = next(); |
---|
225 | } while (Character.isWhitespace(c)); |
---|
226 | switch (c) { |
---|
227 | case 0: |
---|
228 | throw syntaxError("Misshaped element"); |
---|
229 | case '<': |
---|
230 | throw syntaxError("Misplaced '<'"); |
---|
231 | case '>': |
---|
232 | return XML.GT; |
---|
233 | case '/': |
---|
234 | return XML.SLASH; |
---|
235 | case '=': |
---|
236 | return XML.EQ; |
---|
237 | case '!': |
---|
238 | return XML.BANG; |
---|
239 | case '?': |
---|
240 | return XML.QUEST; |
---|
241 | |
---|
242 | // Quoted string |
---|
243 | |
---|
244 | case '"': |
---|
245 | case '\'': |
---|
246 | q = c; |
---|
247 | sb = new StringBuffer(); |
---|
248 | for (;;) { |
---|
249 | c = next(); |
---|
250 | if (c == 0) { |
---|
251 | throw syntaxError("Unterminated string"); |
---|
252 | } |
---|
253 | if (c == q) { |
---|
254 | return sb.toString(); |
---|
255 | } |
---|
256 | if (c == '&') { |
---|
257 | sb.append(nextEntity(c)); |
---|
258 | } else { |
---|
259 | sb.append(c); |
---|
260 | } |
---|
261 | } |
---|
262 | default: |
---|
263 | |
---|
264 | // Name |
---|
265 | |
---|
266 | sb = new StringBuffer(); |
---|
267 | for (;;) { |
---|
268 | sb.append(c); |
---|
269 | c = next(); |
---|
270 | if (Character.isWhitespace(c)) { |
---|
271 | return sb.toString(); |
---|
272 | } |
---|
273 | switch (c) { |
---|
274 | case 0: |
---|
275 | return sb.toString(); |
---|
276 | case '>': |
---|
277 | case '/': |
---|
278 | case '=': |
---|
279 | case '!': |
---|
280 | case '?': |
---|
281 | case '[': |
---|
282 | case ']': |
---|
283 | back(); |
---|
284 | return sb.toString(); |
---|
285 | case '<': |
---|
286 | case '"': |
---|
287 | case '\'': |
---|
288 | throw syntaxError("Bad character in a name"); |
---|
289 | } |
---|
290 | } |
---|
291 | } |
---|
292 | } |
---|
293 | |
---|
294 | |
---|
295 | /** |
---|
296 | * Skip characters until past the requested string. |
---|
297 | * If it is not found, we are left at the end of the source with a result of false. |
---|
298 | * @param to A string to skip past. |
---|
299 | * @throws JSONException |
---|
300 | */ |
---|
301 | public boolean skipPast(String to) throws JSONException { |
---|
302 | boolean b; |
---|
303 | char c; |
---|
304 | int i; |
---|
305 | int j; |
---|
306 | int offset = 0; |
---|
307 | int n = to.length(); |
---|
308 | char[] circle = new char[n]; |
---|
309 | |
---|
310 | /* |
---|
311 | * First fill the circle buffer with as many characters as are in the |
---|
312 | * to string. If we reach an early end, bail. |
---|
313 | */ |
---|
314 | |
---|
315 | for (i = 0; i < n; i += 1) { |
---|
316 | c = next(); |
---|
317 | if (c == 0) { |
---|
318 | return false; |
---|
319 | } |
---|
320 | circle[i] = c; |
---|
321 | } |
---|
322 | /* |
---|
323 | * We will loop, possibly for all of the remaining characters. |
---|
324 | */ |
---|
325 | for (;;) { |
---|
326 | j = offset; |
---|
327 | b = true; |
---|
328 | /* |
---|
329 | * Compare the circle buffer with the to string. |
---|
330 | */ |
---|
331 | for (i = 0; i < n; i += 1) { |
---|
332 | if (circle[j] != to.charAt(i)) { |
---|
333 | b = false; |
---|
334 | break; |
---|
335 | } |
---|
336 | j += 1; |
---|
337 | if (j >= n) { |
---|
338 | j -= n; |
---|
339 | } |
---|
340 | } |
---|
341 | /* |
---|
342 | * If we exit the loop with b intact, then victory is ours. |
---|
343 | */ |
---|
344 | if (b) { |
---|
345 | return true; |
---|
346 | } |
---|
347 | /* |
---|
348 | * Get the next character. If there isn't one, then defeat is ours. |
---|
349 | */ |
---|
350 | c = next(); |
---|
351 | if (c == 0) { |
---|
352 | return false; |
---|
353 | } |
---|
354 | /* |
---|
355 | * Shove the character in the circle buffer and advance the |
---|
356 | * circle offset. The offset is mod n. |
---|
357 | */ |
---|
358 | circle[offset] = c; |
---|
359 | offset += 1; |
---|
360 | if (offset >= n) { |
---|
361 | offset -= n; |
---|
362 | } |
---|
363 | } |
---|
364 | } |
---|
365 | } |
---|