25
|
1 /* |
|
2 The contents of this file are subject to the Mozilla Public License |
|
3 Version 1.1 (the "License"); you may not use this file except in |
|
4 compliance with the License. You may obtain a copy of the License at |
|
5 http://www.mozilla.org/MPL/ |
|
6 |
|
7 Software distributed under the License is distributed on an "AS IS" |
|
8 basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the |
|
9 License for the specific language governing rights and limitations |
|
10 under the License. |
|
11 |
|
12 The Original Code is expat. |
|
13 |
|
14 The Initial Developer of the Original Code is James Clark. |
|
15 Portions created by James Clark are Copyright (C) 1998, 1999 |
|
16 James Clark. All Rights Reserved. |
|
17 |
|
18 Contributor(s): |
|
19 |
|
20 Alternatively, the contents of this file may be used under the terms |
|
21 of the GNU General Public License (the "GPL"), in which case the |
|
22 provisions of the GPL are applicable instead of those above. If you |
|
23 wish to allow use of your version of this file only under the terms of |
|
24 the GPL and not to allow others to use your version of this file under |
|
25 the MPL, indicate your decision by deleting the provisions above and |
|
26 replace them with the notice and other provisions required by the |
|
27 GPL. If you do not delete the provisions above, a recipient may use |
|
28 your version of this file under either the MPL or the GPL. |
|
29 */ |
|
30 |
|
31 #ifndef IS_INVALID_CHAR |
|
32 #define IS_INVALID_CHAR(enc, ptr, n) (0) |
|
33 #endif |
|
34 |
|
35 #define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \ |
|
36 case BT_LEAD ## n: \ |
|
37 if (end - ptr < n) \ |
|
38 return XML_TOK_PARTIAL_CHAR; \ |
|
39 if (IS_INVALID_CHAR(enc, ptr, n)) { \ |
|
40 *(nextTokPtr) = (ptr); \ |
|
41 return XML_TOK_INVALID; \ |
|
42 } \ |
|
43 ptr += n; \ |
|
44 break; |
|
45 |
|
46 #define INVALID_CASES(ptr, nextTokPtr) \ |
|
47 INVALID_LEAD_CASE(2, ptr, nextTokPtr) \ |
|
48 INVALID_LEAD_CASE(3, ptr, nextTokPtr) \ |
|
49 INVALID_LEAD_CASE(4, ptr, nextTokPtr) \ |
|
50 case BT_NONXML: \ |
|
51 case BT_MALFORM: \ |
|
52 case BT_TRAIL: \ |
|
53 *(nextTokPtr) = (ptr); \ |
|
54 return XML_TOK_INVALID; |
|
55 |
|
56 #define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \ |
|
57 case BT_LEAD ## n: \ |
|
58 if (end - ptr < n) \ |
|
59 return XML_TOK_PARTIAL_CHAR; \ |
|
60 if (!IS_NAME_CHAR(enc, ptr, n)) { \ |
|
61 *nextTokPtr = ptr; \ |
|
62 return XML_TOK_INVALID; \ |
|
63 } \ |
|
64 ptr += n; \ |
|
65 break; |
|
66 |
|
67 #define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \ |
|
68 case BT_NONASCII: \ |
|
69 if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \ |
|
70 *nextTokPtr = ptr; \ |
|
71 return XML_TOK_INVALID; \ |
|
72 } \ |
|
73 case BT_NMSTRT: \ |
|
74 case BT_HEX: \ |
|
75 case BT_DIGIT: \ |
|
76 case BT_NAME: \ |
|
77 case BT_MINUS: \ |
|
78 ptr += MINBPC(enc); \ |
|
79 break; \ |
|
80 CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \ |
|
81 CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \ |
|
82 CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr) |
|
83 |
|
84 #define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \ |
|
85 case BT_LEAD ## n: \ |
|
86 if (end - ptr < n) \ |
|
87 return XML_TOK_PARTIAL_CHAR; \ |
|
88 if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \ |
|
89 *nextTokPtr = ptr; \ |
|
90 return XML_TOK_INVALID; \ |
|
91 } \ |
|
92 ptr += n; \ |
|
93 break; |
|
94 |
|
95 #define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \ |
|
96 case BT_NONASCII: \ |
|
97 if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \ |
|
98 *nextTokPtr = ptr; \ |
|
99 return XML_TOK_INVALID; \ |
|
100 } \ |
|
101 case BT_NMSTRT: \ |
|
102 case BT_HEX: \ |
|
103 ptr += MINBPC(enc); \ |
|
104 break; \ |
|
105 CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \ |
|
106 CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \ |
|
107 CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr) |
|
108 |
|
109 #ifndef PREFIX |
|
110 #define PREFIX(ident) ident |
|
111 #endif |
|
112 |
|
113 /* ptr points to character following "<!-" */ |
|
114 |
|
115 static |
|
116 int PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end, |
|
117 const char **nextTokPtr) |
|
118 { |
|
119 if (ptr != end) { |
|
120 if (!CHAR_MATCHES(enc, ptr, '-')) { |
|
121 *nextTokPtr = ptr; |
|
122 return XML_TOK_INVALID; |
|
123 } |
|
124 ptr += MINBPC(enc); |
|
125 while (ptr != end) { |
|
126 switch (BYTE_TYPE(enc, ptr)) { |
|
127 INVALID_CASES(ptr, nextTokPtr) |
|
128 case BT_MINUS: |
|
129 if ((ptr += MINBPC(enc)) == end) |
|
130 return XML_TOK_PARTIAL; |
|
131 if (CHAR_MATCHES(enc, ptr, '-')) { |
|
132 if ((ptr += MINBPC(enc)) == end) |
|
133 return XML_TOK_PARTIAL; |
|
134 if (!CHAR_MATCHES(enc, ptr, '>')) { |
|
135 *nextTokPtr = ptr; |
|
136 return XML_TOK_INVALID; |
|
137 } |
|
138 *nextTokPtr = ptr + MINBPC(enc); |
|
139 return XML_TOK_COMMENT; |
|
140 } |
|
141 break; |
|
142 default: |
|
143 ptr += MINBPC(enc); |
|
144 break; |
|
145 } |
|
146 } |
|
147 } |
|
148 return XML_TOK_PARTIAL; |
|
149 } |
|
150 |
|
151 /* ptr points to character following "<!" */ |
|
152 |
|
153 static |
|
154 int PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end, |
|
155 const char **nextTokPtr) |
|
156 { |
|
157 if (ptr == end) |
|
158 return XML_TOK_PARTIAL; |
|
159 switch (BYTE_TYPE(enc, ptr)) { |
|
160 case BT_MINUS: |
|
161 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
162 case BT_LSQB: |
|
163 *nextTokPtr = ptr + MINBPC(enc); |
|
164 return XML_TOK_COND_SECT_OPEN; |
|
165 case BT_NMSTRT: |
|
166 case BT_HEX: |
|
167 ptr += MINBPC(enc); |
|
168 break; |
|
169 default: |
|
170 *nextTokPtr = ptr; |
|
171 return XML_TOK_INVALID; |
|
172 } |
|
173 while (ptr != end) { |
|
174 switch (BYTE_TYPE(enc, ptr)) { |
|
175 case BT_PERCNT: |
|
176 if (ptr + MINBPC(enc) == end) |
|
177 return XML_TOK_PARTIAL; |
|
178 /* don't allow <!ENTITY% foo "whatever"> */ |
|
179 switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) { |
|
180 case BT_S: case BT_CR: case BT_LF: case BT_PERCNT: |
|
181 *nextTokPtr = ptr; |
|
182 return XML_TOK_INVALID; |
|
183 } |
|
184 /* fall through */ |
|
185 case BT_S: case BT_CR: case BT_LF: |
|
186 *nextTokPtr = ptr; |
|
187 return XML_TOK_DECL_OPEN; |
|
188 case BT_NMSTRT: |
|
189 case BT_HEX: |
|
190 ptr += MINBPC(enc); |
|
191 break; |
|
192 default: |
|
193 *nextTokPtr = ptr; |
|
194 return XML_TOK_INVALID; |
|
195 } |
|
196 } |
|
197 return XML_TOK_PARTIAL; |
|
198 } |
|
199 |
|
200 static |
|
201 int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, int *tokPtr) |
|
202 { |
|
203 int upper = 0; |
|
204 *tokPtr = XML_TOK_PI; |
|
205 if (end - ptr != MINBPC(enc)*3) |
|
206 return 1; |
|
207 switch (BYTE_TO_ASCII(enc, ptr)) { |
|
208 case 'x': |
|
209 break; |
|
210 case 'X': |
|
211 upper = 1; |
|
212 break; |
|
213 default: |
|
214 return 1; |
|
215 } |
|
216 ptr += MINBPC(enc); |
|
217 switch (BYTE_TO_ASCII(enc, ptr)) { |
|
218 case 'm': |
|
219 break; |
|
220 case 'M': |
|
221 upper = 1; |
|
222 break; |
|
223 default: |
|
224 return 1; |
|
225 } |
|
226 ptr += MINBPC(enc); |
|
227 switch (BYTE_TO_ASCII(enc, ptr)) { |
|
228 case 'l': |
|
229 break; |
|
230 case 'L': |
|
231 upper = 1; |
|
232 break; |
|
233 default: |
|
234 return 1; |
|
235 } |
|
236 if (upper) |
|
237 return 0; |
|
238 *tokPtr = XML_TOK_XML_DECL; |
|
239 return 1; |
|
240 } |
|
241 |
|
242 /* ptr points to character following "<?" */ |
|
243 |
|
244 static |
|
245 int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end, |
|
246 const char **nextTokPtr) |
|
247 { |
|
248 int tok; |
|
249 const char *target = ptr; |
|
250 if (ptr == end) |
|
251 return XML_TOK_PARTIAL; |
|
252 switch (BYTE_TYPE(enc, ptr)) { |
|
253 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
|
254 default: |
|
255 *nextTokPtr = ptr; |
|
256 return XML_TOK_INVALID; |
|
257 } |
|
258 while (ptr != end) { |
|
259 switch (BYTE_TYPE(enc, ptr)) { |
|
260 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
|
261 case BT_S: case BT_CR: case BT_LF: |
|
262 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { |
|
263 *nextTokPtr = ptr; |
|
264 return XML_TOK_INVALID; |
|
265 } |
|
266 ptr += MINBPC(enc); |
|
267 while (ptr != end) { |
|
268 switch (BYTE_TYPE(enc, ptr)) { |
|
269 INVALID_CASES(ptr, nextTokPtr) |
|
270 case BT_QUEST: |
|
271 ptr += MINBPC(enc); |
|
272 if (ptr == end) |
|
273 return XML_TOK_PARTIAL; |
|
274 if (CHAR_MATCHES(enc, ptr, '>')) { |
|
275 *nextTokPtr = ptr + MINBPC(enc); |
|
276 return tok; |
|
277 } |
|
278 break; |
|
279 default: |
|
280 ptr += MINBPC(enc); |
|
281 break; |
|
282 } |
|
283 } |
|
284 return XML_TOK_PARTIAL; |
|
285 case BT_QUEST: |
|
286 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) { |
|
287 *nextTokPtr = ptr; |
|
288 return XML_TOK_INVALID; |
|
289 } |
|
290 ptr += MINBPC(enc); |
|
291 if (ptr == end) |
|
292 return XML_TOK_PARTIAL; |
|
293 if (CHAR_MATCHES(enc, ptr, '>')) { |
|
294 *nextTokPtr = ptr + MINBPC(enc); |
|
295 return tok; |
|
296 } |
|
297 /* fall through */ |
|
298 default: |
|
299 *nextTokPtr = ptr; |
|
300 return XML_TOK_INVALID; |
|
301 } |
|
302 } |
|
303 return XML_TOK_PARTIAL; |
|
304 } |
|
305 |
|
306 |
|
307 static |
|
308 int PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *end, |
|
309 const char **nextTokPtr) |
|
310 { |
|
311 int i; |
|
312 /* CDATA[ */ |
|
313 if (end - ptr < 6 * MINBPC(enc)) |
|
314 return XML_TOK_PARTIAL; |
|
315 for (i = 0; i < 6; i++, ptr += MINBPC(enc)) { |
|
316 if (!CHAR_MATCHES(enc, ptr, "CDATA["[i])) { |
|
317 *nextTokPtr = ptr; |
|
318 return XML_TOK_INVALID; |
|
319 } |
|
320 } |
|
321 *nextTokPtr = ptr; |
|
322 return XML_TOK_CDATA_SECT_OPEN; |
|
323 } |
|
324 |
|
325 static |
|
326 int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *end, |
|
327 const char **nextTokPtr) |
|
328 { |
|
329 if (ptr == end) |
|
330 return XML_TOK_NONE; |
|
331 if (MINBPC(enc) > 1) { |
|
332 size_t n = end - ptr; |
|
333 if (n & (MINBPC(enc) - 1)) { |
|
334 n &= ~(MINBPC(enc) - 1); |
|
335 if (n == 0) |
|
336 return XML_TOK_PARTIAL; |
|
337 end = ptr + n; |
|
338 } |
|
339 } |
|
340 switch (BYTE_TYPE(enc, ptr)) { |
|
341 case BT_RSQB: |
|
342 ptr += MINBPC(enc); |
|
343 if (ptr == end) |
|
344 return XML_TOK_PARTIAL; |
|
345 if (!CHAR_MATCHES(enc, ptr, ']')) |
|
346 break; |
|
347 ptr += MINBPC(enc); |
|
348 if (ptr == end) |
|
349 return XML_TOK_PARTIAL; |
|
350 if (!CHAR_MATCHES(enc, ptr, '>')) { |
|
351 ptr -= MINBPC(enc); |
|
352 break; |
|
353 } |
|
354 *nextTokPtr = ptr + MINBPC(enc); |
|
355 return XML_TOK_CDATA_SECT_CLOSE; |
|
356 case BT_CR: |
|
357 ptr += MINBPC(enc); |
|
358 if (ptr == end) |
|
359 return XML_TOK_PARTIAL; |
|
360 if (BYTE_TYPE(enc, ptr) == BT_LF) |
|
361 ptr += MINBPC(enc); |
|
362 *nextTokPtr = ptr; |
|
363 return XML_TOK_DATA_NEWLINE; |
|
364 case BT_LF: |
|
365 *nextTokPtr = ptr + MINBPC(enc); |
|
366 return XML_TOK_DATA_NEWLINE; |
|
367 INVALID_CASES(ptr, nextTokPtr) |
|
368 default: |
|
369 ptr += MINBPC(enc); |
|
370 break; |
|
371 } |
|
372 while (ptr != end) { |
|
373 switch (BYTE_TYPE(enc, ptr)) { |
|
374 #define LEAD_CASE(n) \ |
|
375 case BT_LEAD ## n: \ |
|
376 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ |
|
377 *nextTokPtr = ptr; \ |
|
378 return XML_TOK_DATA_CHARS; \ |
|
379 } \ |
|
380 ptr += n; \ |
|
381 break; |
|
382 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
|
383 #undef LEAD_CASE |
|
384 case BT_NONXML: |
|
385 case BT_MALFORM: |
|
386 case BT_TRAIL: |
|
387 case BT_CR: |
|
388 case BT_LF: |
|
389 case BT_RSQB: |
|
390 *nextTokPtr = ptr; |
|
391 return XML_TOK_DATA_CHARS; |
|
392 default: |
|
393 ptr += MINBPC(enc); |
|
394 break; |
|
395 } |
|
396 } |
|
397 *nextTokPtr = ptr; |
|
398 return XML_TOK_DATA_CHARS; |
|
399 } |
|
400 |
|
401 /* ptr points to character following "</" */ |
|
402 |
|
403 static |
|
404 int PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end, |
|
405 const char **nextTokPtr) |
|
406 { |
|
407 if (ptr == end) |
|
408 return XML_TOK_PARTIAL; |
|
409 switch (BYTE_TYPE(enc, ptr)) { |
|
410 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
|
411 default: |
|
412 *nextTokPtr = ptr; |
|
413 return XML_TOK_INVALID; |
|
414 } |
|
415 while (ptr != end) { |
|
416 switch (BYTE_TYPE(enc, ptr)) { |
|
417 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
|
418 case BT_S: case BT_CR: case BT_LF: |
|
419 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { |
|
420 switch (BYTE_TYPE(enc, ptr)) { |
|
421 case BT_S: case BT_CR: case BT_LF: |
|
422 break; |
|
423 case BT_GT: |
|
424 *nextTokPtr = ptr + MINBPC(enc); |
|
425 return XML_TOK_END_TAG; |
|
426 default: |
|
427 *nextTokPtr = ptr; |
|
428 return XML_TOK_INVALID; |
|
429 } |
|
430 } |
|
431 return XML_TOK_PARTIAL; |
|
432 #ifdef XML_NS |
|
433 case BT_COLON: |
|
434 /* no need to check qname syntax here, since end-tag must match exactly */ |
|
435 ptr += MINBPC(enc); |
|
436 break; |
|
437 #endif |
|
438 case BT_GT: |
|
439 *nextTokPtr = ptr + MINBPC(enc); |
|
440 return XML_TOK_END_TAG; |
|
441 default: |
|
442 *nextTokPtr = ptr; |
|
443 return XML_TOK_INVALID; |
|
444 } |
|
445 } |
|
446 return XML_TOK_PARTIAL; |
|
447 } |
|
448 |
|
449 /* ptr points to character following "&#X" */ |
|
450 |
|
451 static |
|
452 int PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end, |
|
453 const char **nextTokPtr) |
|
454 { |
|
455 if (ptr != end) { |
|
456 switch (BYTE_TYPE(enc, ptr)) { |
|
457 case BT_DIGIT: |
|
458 case BT_HEX: |
|
459 break; |
|
460 default: |
|
461 *nextTokPtr = ptr; |
|
462 return XML_TOK_INVALID; |
|
463 } |
|
464 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { |
|
465 switch (BYTE_TYPE(enc, ptr)) { |
|
466 case BT_DIGIT: |
|
467 case BT_HEX: |
|
468 break; |
|
469 case BT_SEMI: |
|
470 *nextTokPtr = ptr + MINBPC(enc); |
|
471 return XML_TOK_CHAR_REF; |
|
472 default: |
|
473 *nextTokPtr = ptr; |
|
474 return XML_TOK_INVALID; |
|
475 } |
|
476 } |
|
477 } |
|
478 return XML_TOK_PARTIAL; |
|
479 } |
|
480 |
|
481 /* ptr points to character following "&#" */ |
|
482 |
|
483 static |
|
484 int PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end, |
|
485 const char **nextTokPtr) |
|
486 { |
|
487 if (ptr != end) { |
|
488 if (CHAR_MATCHES(enc, ptr, 'x')) |
|
489 return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
490 switch (BYTE_TYPE(enc, ptr)) { |
|
491 case BT_DIGIT: |
|
492 break; |
|
493 default: |
|
494 *nextTokPtr = ptr; |
|
495 return XML_TOK_INVALID; |
|
496 } |
|
497 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { |
|
498 switch (BYTE_TYPE(enc, ptr)) { |
|
499 case BT_DIGIT: |
|
500 break; |
|
501 case BT_SEMI: |
|
502 *nextTokPtr = ptr + MINBPC(enc); |
|
503 return XML_TOK_CHAR_REF; |
|
504 default: |
|
505 *nextTokPtr = ptr; |
|
506 return XML_TOK_INVALID; |
|
507 } |
|
508 } |
|
509 } |
|
510 return XML_TOK_PARTIAL; |
|
511 } |
|
512 |
|
513 /* ptr points to character following "&" */ |
|
514 |
|
515 static |
|
516 int PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end, |
|
517 const char **nextTokPtr) |
|
518 { |
|
519 if (ptr == end) |
|
520 return XML_TOK_PARTIAL; |
|
521 switch (BYTE_TYPE(enc, ptr)) { |
|
522 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
|
523 case BT_NUM: |
|
524 return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
525 default: |
|
526 *nextTokPtr = ptr; |
|
527 return XML_TOK_INVALID; |
|
528 } |
|
529 while (ptr != end) { |
|
530 switch (BYTE_TYPE(enc, ptr)) { |
|
531 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
|
532 case BT_SEMI: |
|
533 *nextTokPtr = ptr + MINBPC(enc); |
|
534 return XML_TOK_ENTITY_REF; |
|
535 default: |
|
536 *nextTokPtr = ptr; |
|
537 return XML_TOK_INVALID; |
|
538 } |
|
539 } |
|
540 return XML_TOK_PARTIAL; |
|
541 } |
|
542 |
|
543 /* ptr points to character following first character of attribute name */ |
|
544 |
|
545 static |
|
546 int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, |
|
547 const char **nextTokPtr) |
|
548 { |
|
549 #ifdef XML_NS |
|
550 int hadColon = 0; |
|
551 #endif |
|
552 while (ptr != end) { |
|
553 switch (BYTE_TYPE(enc, ptr)) { |
|
554 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
|
555 #ifdef XML_NS |
|
556 case BT_COLON: |
|
557 if (hadColon) { |
|
558 *nextTokPtr = ptr; |
|
559 return XML_TOK_INVALID; |
|
560 } |
|
561 hadColon = 1; |
|
562 ptr += MINBPC(enc); |
|
563 if (ptr == end) |
|
564 return XML_TOK_PARTIAL; |
|
565 switch (BYTE_TYPE(enc, ptr)) { |
|
566 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
|
567 default: |
|
568 *nextTokPtr = ptr; |
|
569 return XML_TOK_INVALID; |
|
570 } |
|
571 break; |
|
572 #endif |
|
573 case BT_S: case BT_CR: case BT_LF: |
|
574 for (;;) { |
|
575 int t; |
|
576 |
|
577 ptr += MINBPC(enc); |
|
578 if (ptr == end) |
|
579 return XML_TOK_PARTIAL; |
|
580 t = BYTE_TYPE(enc, ptr); |
|
581 if (t == BT_EQUALS) |
|
582 break; |
|
583 switch (t) { |
|
584 case BT_S: |
|
585 case BT_LF: |
|
586 case BT_CR: |
|
587 break; |
|
588 default: |
|
589 *nextTokPtr = ptr; |
|
590 return XML_TOK_INVALID; |
|
591 } |
|
592 } |
|
593 /* fall through */ |
|
594 case BT_EQUALS: |
|
595 { |
|
596 int open; |
|
597 #ifdef XML_NS |
|
598 hadColon = 0; |
|
599 #endif |
|
600 for (;;) { |
|
601 |
|
602 ptr += MINBPC(enc); |
|
603 if (ptr == end) |
|
604 return XML_TOK_PARTIAL; |
|
605 open = BYTE_TYPE(enc, ptr); |
|
606 if (open == BT_QUOT || open == BT_APOS) |
|
607 break; |
|
608 switch (open) { |
|
609 case BT_S: |
|
610 case BT_LF: |
|
611 case BT_CR: |
|
612 break; |
|
613 default: |
|
614 *nextTokPtr = ptr; |
|
615 return XML_TOK_INVALID; |
|
616 } |
|
617 } |
|
618 ptr += MINBPC(enc); |
|
619 /* in attribute value */ |
|
620 for (;;) { |
|
621 int t; |
|
622 if (ptr == end) |
|
623 return XML_TOK_PARTIAL; |
|
624 t = BYTE_TYPE(enc, ptr); |
|
625 if (t == open) |
|
626 break; |
|
627 switch (t) { |
|
628 INVALID_CASES(ptr, nextTokPtr) |
|
629 case BT_AMP: |
|
630 { |
|
631 int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr); |
|
632 if (tok <= 0) { |
|
633 if (tok == XML_TOK_INVALID) |
|
634 *nextTokPtr = ptr; |
|
635 return tok; |
|
636 } |
|
637 break; |
|
638 } |
|
639 case BT_LT: |
|
640 *nextTokPtr = ptr; |
|
641 return XML_TOK_INVALID; |
|
642 default: |
|
643 ptr += MINBPC(enc); |
|
644 break; |
|
645 } |
|
646 } |
|
647 ptr += MINBPC(enc); |
|
648 if (ptr == end) |
|
649 return XML_TOK_PARTIAL; |
|
650 switch (BYTE_TYPE(enc, ptr)) { |
|
651 case BT_S: |
|
652 case BT_CR: |
|
653 case BT_LF: |
|
654 break; |
|
655 case BT_SOL: |
|
656 goto sol; |
|
657 case BT_GT: |
|
658 goto gt; |
|
659 default: |
|
660 *nextTokPtr = ptr; |
|
661 return XML_TOK_INVALID; |
|
662 } |
|
663 /* ptr points to closing quote */ |
|
664 for (;;) { |
|
665 ptr += MINBPC(enc); |
|
666 if (ptr == end) |
|
667 return XML_TOK_PARTIAL; |
|
668 switch (BYTE_TYPE(enc, ptr)) { |
|
669 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
|
670 case BT_S: case BT_CR: case BT_LF: |
|
671 continue; |
|
672 case BT_GT: |
|
673 gt: |
|
674 *nextTokPtr = ptr + MINBPC(enc); |
|
675 return XML_TOK_START_TAG_WITH_ATTS; |
|
676 case BT_SOL: |
|
677 sol: |
|
678 ptr += MINBPC(enc); |
|
679 if (ptr == end) |
|
680 return XML_TOK_PARTIAL; |
|
681 if (!CHAR_MATCHES(enc, ptr, '>')) { |
|
682 *nextTokPtr = ptr; |
|
683 return XML_TOK_INVALID; |
|
684 } |
|
685 *nextTokPtr = ptr + MINBPC(enc); |
|
686 return XML_TOK_EMPTY_ELEMENT_WITH_ATTS; |
|
687 default: |
|
688 *nextTokPtr = ptr; |
|
689 return XML_TOK_INVALID; |
|
690 } |
|
691 break; |
|
692 } |
|
693 break; |
|
694 } |
|
695 default: |
|
696 *nextTokPtr = ptr; |
|
697 return XML_TOK_INVALID; |
|
698 } |
|
699 } |
|
700 return XML_TOK_PARTIAL; |
|
701 } |
|
702 |
|
703 /* ptr points to character following "<" */ |
|
704 |
|
705 static |
|
706 int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, |
|
707 const char **nextTokPtr) |
|
708 { |
|
709 #ifdef XML_NS |
|
710 int hadColon; |
|
711 #endif |
|
712 if (ptr == end) |
|
713 return XML_TOK_PARTIAL; |
|
714 switch (BYTE_TYPE(enc, ptr)) { |
|
715 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
|
716 case BT_EXCL: |
|
717 if ((ptr += MINBPC(enc)) == end) |
|
718 return XML_TOK_PARTIAL; |
|
719 switch (BYTE_TYPE(enc, ptr)) { |
|
720 case BT_MINUS: |
|
721 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
722 case BT_LSQB: |
|
723 return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
724 } |
|
725 *nextTokPtr = ptr; |
|
726 return XML_TOK_INVALID; |
|
727 case BT_QUEST: |
|
728 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
729 case BT_SOL: |
|
730 return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
731 default: |
|
732 *nextTokPtr = ptr; |
|
733 return XML_TOK_INVALID; |
|
734 } |
|
735 #ifdef XML_NS |
|
736 hadColon = 0; |
|
737 #endif |
|
738 /* we have a start-tag */ |
|
739 while (ptr != end) { |
|
740 switch (BYTE_TYPE(enc, ptr)) { |
|
741 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
|
742 #ifdef XML_NS |
|
743 case BT_COLON: |
|
744 if (hadColon) { |
|
745 *nextTokPtr = ptr; |
|
746 return XML_TOK_INVALID; |
|
747 } |
|
748 hadColon = 1; |
|
749 ptr += MINBPC(enc); |
|
750 if (ptr == end) |
|
751 return XML_TOK_PARTIAL; |
|
752 switch (BYTE_TYPE(enc, ptr)) { |
|
753 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
|
754 default: |
|
755 *nextTokPtr = ptr; |
|
756 return XML_TOK_INVALID; |
|
757 } |
|
758 break; |
|
759 #endif |
|
760 case BT_S: case BT_CR: case BT_LF: |
|
761 { |
|
762 ptr += MINBPC(enc); |
|
763 while (ptr != end) { |
|
764 switch (BYTE_TYPE(enc, ptr)) { |
|
765 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
|
766 case BT_GT: |
|
767 goto gt; |
|
768 case BT_SOL: |
|
769 goto sol; |
|
770 case BT_S: case BT_CR: case BT_LF: |
|
771 ptr += MINBPC(enc); |
|
772 continue; |
|
773 default: |
|
774 *nextTokPtr = ptr; |
|
775 return XML_TOK_INVALID; |
|
776 } |
|
777 return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr); |
|
778 } |
|
779 return XML_TOK_PARTIAL; |
|
780 } |
|
781 case BT_GT: |
|
782 gt: |
|
783 *nextTokPtr = ptr + MINBPC(enc); |
|
784 return XML_TOK_START_TAG_NO_ATTS; |
|
785 case BT_SOL: |
|
786 sol: |
|
787 ptr += MINBPC(enc); |
|
788 if (ptr == end) |
|
789 return XML_TOK_PARTIAL; |
|
790 if (!CHAR_MATCHES(enc, ptr, '>')) { |
|
791 *nextTokPtr = ptr; |
|
792 return XML_TOK_INVALID; |
|
793 } |
|
794 *nextTokPtr = ptr + MINBPC(enc); |
|
795 return XML_TOK_EMPTY_ELEMENT_NO_ATTS; |
|
796 default: |
|
797 *nextTokPtr = ptr; |
|
798 return XML_TOK_INVALID; |
|
799 } |
|
800 } |
|
801 return XML_TOK_PARTIAL; |
|
802 } |
|
803 |
|
804 static |
|
805 int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, |
|
806 const char **nextTokPtr) |
|
807 { |
|
808 if (ptr == end) |
|
809 return XML_TOK_NONE; |
|
810 if (MINBPC(enc) > 1) { |
|
811 size_t n = end - ptr; |
|
812 if (n & (MINBPC(enc) - 1)) { |
|
813 n &= ~(MINBPC(enc) - 1); |
|
814 if (n == 0) |
|
815 return XML_TOK_PARTIAL; |
|
816 end = ptr + n; |
|
817 } |
|
818 } |
|
819 switch (BYTE_TYPE(enc, ptr)) { |
|
820 case BT_LT: |
|
821 return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
822 case BT_AMP: |
|
823 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
824 case BT_CR: |
|
825 ptr += MINBPC(enc); |
|
826 if (ptr == end) |
|
827 return XML_TOK_TRAILING_CR; |
|
828 if (BYTE_TYPE(enc, ptr) == BT_LF) |
|
829 ptr += MINBPC(enc); |
|
830 *nextTokPtr = ptr; |
|
831 return XML_TOK_DATA_NEWLINE; |
|
832 case BT_LF: |
|
833 *nextTokPtr = ptr + MINBPC(enc); |
|
834 return XML_TOK_DATA_NEWLINE; |
|
835 case BT_RSQB: |
|
836 ptr += MINBPC(enc); |
|
837 if (ptr == end) |
|
838 return XML_TOK_TRAILING_RSQB; |
|
839 if (!CHAR_MATCHES(enc, ptr, ']')) |
|
840 break; |
|
841 ptr += MINBPC(enc); |
|
842 if (ptr == end) |
|
843 return XML_TOK_TRAILING_RSQB; |
|
844 if (!CHAR_MATCHES(enc, ptr, '>')) { |
|
845 ptr -= MINBPC(enc); |
|
846 break; |
|
847 } |
|
848 *nextTokPtr = ptr; |
|
849 return XML_TOK_INVALID; |
|
850 INVALID_CASES(ptr, nextTokPtr) |
|
851 default: |
|
852 ptr += MINBPC(enc); |
|
853 break; |
|
854 } |
|
855 while (ptr != end) { |
|
856 switch (BYTE_TYPE(enc, ptr)) { |
|
857 #define LEAD_CASE(n) \ |
|
858 case BT_LEAD ## n: \ |
|
859 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ |
|
860 *nextTokPtr = ptr; \ |
|
861 return XML_TOK_DATA_CHARS; \ |
|
862 } \ |
|
863 ptr += n; \ |
|
864 break; |
|
865 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
|
866 #undef LEAD_CASE |
|
867 case BT_RSQB: |
|
868 if (ptr + MINBPC(enc) != end) { |
|
869 if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ']')) { |
|
870 ptr += MINBPC(enc); |
|
871 break; |
|
872 } |
|
873 if (ptr + 2*MINBPC(enc) != end) { |
|
874 if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), '>')) { |
|
875 ptr += MINBPC(enc); |
|
876 break; |
|
877 } |
|
878 *nextTokPtr = ptr + 2*MINBPC(enc); |
|
879 return XML_TOK_INVALID; |
|
880 } |
|
881 } |
|
882 /* fall through */ |
|
883 case BT_AMP: |
|
884 case BT_LT: |
|
885 case BT_NONXML: |
|
886 case BT_MALFORM: |
|
887 case BT_TRAIL: |
|
888 case BT_CR: |
|
889 case BT_LF: |
|
890 *nextTokPtr = ptr; |
|
891 return XML_TOK_DATA_CHARS; |
|
892 default: |
|
893 ptr += MINBPC(enc); |
|
894 break; |
|
895 } |
|
896 } |
|
897 *nextTokPtr = ptr; |
|
898 return XML_TOK_DATA_CHARS; |
|
899 } |
|
900 |
|
901 /* ptr points to character following "%" */ |
|
902 |
|
903 static |
|
904 int PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, |
|
905 const char **nextTokPtr) |
|
906 { |
|
907 if (ptr == end) |
|
908 return XML_TOK_PARTIAL; |
|
909 switch (BYTE_TYPE(enc, ptr)) { |
|
910 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
|
911 case BT_S: case BT_LF: case BT_CR: case BT_PERCNT: |
|
912 *nextTokPtr = ptr; |
|
913 return XML_TOK_PERCENT; |
|
914 default: |
|
915 *nextTokPtr = ptr; |
|
916 return XML_TOK_INVALID; |
|
917 } |
|
918 while (ptr != end) { |
|
919 switch (BYTE_TYPE(enc, ptr)) { |
|
920 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
|
921 case BT_SEMI: |
|
922 *nextTokPtr = ptr + MINBPC(enc); |
|
923 return XML_TOK_PARAM_ENTITY_REF; |
|
924 default: |
|
925 *nextTokPtr = ptr; |
|
926 return XML_TOK_INVALID; |
|
927 } |
|
928 } |
|
929 return XML_TOK_PARTIAL; |
|
930 } |
|
931 |
|
932 static |
|
933 int PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, |
|
934 const char **nextTokPtr) |
|
935 { |
|
936 if (ptr == end) |
|
937 return XML_TOK_PARTIAL; |
|
938 switch (BYTE_TYPE(enc, ptr)) { |
|
939 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) |
|
940 default: |
|
941 *nextTokPtr = ptr; |
|
942 return XML_TOK_INVALID; |
|
943 } |
|
944 while (ptr != end) { |
|
945 switch (BYTE_TYPE(enc, ptr)) { |
|
946 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
|
947 case BT_CR: case BT_LF: case BT_S: |
|
948 case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR: |
|
949 *nextTokPtr = ptr; |
|
950 return XML_TOK_POUND_NAME; |
|
951 default: |
|
952 *nextTokPtr = ptr; |
|
953 return XML_TOK_INVALID; |
|
954 } |
|
955 } |
|
956 return XML_TOK_PARTIAL; |
|
957 } |
|
958 |
|
959 static |
|
960 int PREFIX(scanLit)(int open, const ENCODING *enc, |
|
961 const char *ptr, const char *end, |
|
962 const char **nextTokPtr) |
|
963 { |
|
964 while (ptr != end) { |
|
965 int t = BYTE_TYPE(enc, ptr); |
|
966 switch (t) { |
|
967 INVALID_CASES(ptr, nextTokPtr) |
|
968 case BT_QUOT: |
|
969 case BT_APOS: |
|
970 ptr += MINBPC(enc); |
|
971 if (t != open) |
|
972 break; |
|
973 if (ptr == end) |
|
974 return XML_TOK_PARTIAL; |
|
975 *nextTokPtr = ptr; |
|
976 switch (BYTE_TYPE(enc, ptr)) { |
|
977 case BT_S: case BT_CR: case BT_LF: |
|
978 case BT_GT: case BT_PERCNT: case BT_LSQB: |
|
979 return XML_TOK_LITERAL; |
|
980 default: |
|
981 return XML_TOK_INVALID; |
|
982 } |
|
983 default: |
|
984 ptr += MINBPC(enc); |
|
985 break; |
|
986 } |
|
987 } |
|
988 return XML_TOK_PARTIAL; |
|
989 } |
|
990 |
|
991 static |
|
992 int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, |
|
993 const char **nextTokPtr) |
|
994 { |
|
995 int tok; |
|
996 if (ptr == end) |
|
997 return XML_TOK_NONE; |
|
998 if (MINBPC(enc) > 1) { |
|
999 size_t n = end - ptr; |
|
1000 if (n & (MINBPC(enc) - 1)) { |
|
1001 n &= ~(MINBPC(enc) - 1); |
|
1002 if (n == 0) |
|
1003 return XML_TOK_PARTIAL; |
|
1004 end = ptr + n; |
|
1005 } |
|
1006 } |
|
1007 switch (BYTE_TYPE(enc, ptr)) { |
|
1008 case BT_QUOT: |
|
1009 return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
1010 case BT_APOS: |
|
1011 return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
1012 case BT_LT: |
|
1013 { |
|
1014 ptr += MINBPC(enc); |
|
1015 if (ptr == end) |
|
1016 return XML_TOK_PARTIAL; |
|
1017 switch (BYTE_TYPE(enc, ptr)) { |
|
1018 case BT_EXCL: |
|
1019 return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
1020 case BT_QUEST: |
|
1021 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
1022 case BT_NMSTRT: |
|
1023 case BT_HEX: |
|
1024 case BT_NONASCII: |
|
1025 case BT_LEAD2: |
|
1026 case BT_LEAD3: |
|
1027 case BT_LEAD4: |
|
1028 *nextTokPtr = ptr - MINBPC(enc); |
|
1029 return XML_TOK_INSTANCE_START; |
|
1030 } |
|
1031 *nextTokPtr = ptr; |
|
1032 return XML_TOK_INVALID; |
|
1033 } |
|
1034 case BT_CR: |
|
1035 if (ptr + MINBPC(enc) == end) |
|
1036 return XML_TOK_TRAILING_CR; |
|
1037 /* fall through */ |
|
1038 case BT_S: case BT_LF: |
|
1039 for (;;) { |
|
1040 ptr += MINBPC(enc); |
|
1041 if (ptr == end) |
|
1042 break; |
|
1043 switch (BYTE_TYPE(enc, ptr)) { |
|
1044 case BT_S: case BT_LF: |
|
1045 break; |
|
1046 case BT_CR: |
|
1047 /* don't split CR/LF pair */ |
|
1048 if (ptr + MINBPC(enc) != end) |
|
1049 break; |
|
1050 /* fall through */ |
|
1051 default: |
|
1052 *nextTokPtr = ptr; |
|
1053 return XML_TOK_PROLOG_S; |
|
1054 } |
|
1055 } |
|
1056 *nextTokPtr = ptr; |
|
1057 return XML_TOK_PROLOG_S; |
|
1058 case BT_PERCNT: |
|
1059 return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
1060 case BT_COMMA: |
|
1061 *nextTokPtr = ptr + MINBPC(enc); |
|
1062 return XML_TOK_COMMA; |
|
1063 case BT_LSQB: |
|
1064 *nextTokPtr = ptr + MINBPC(enc); |
|
1065 return XML_TOK_OPEN_BRACKET; |
|
1066 case BT_RSQB: |
|
1067 ptr += MINBPC(enc); |
|
1068 if (ptr == end) |
|
1069 return XML_TOK_PARTIAL; |
|
1070 if (CHAR_MATCHES(enc, ptr, ']')) { |
|
1071 if (ptr + MINBPC(enc) == end) |
|
1072 return XML_TOK_PARTIAL; |
|
1073 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), '>')) { |
|
1074 *nextTokPtr = ptr + 2*MINBPC(enc); |
|
1075 return XML_TOK_COND_SECT_CLOSE; |
|
1076 } |
|
1077 } |
|
1078 *nextTokPtr = ptr; |
|
1079 return XML_TOK_CLOSE_BRACKET; |
|
1080 case BT_LPAR: |
|
1081 *nextTokPtr = ptr + MINBPC(enc); |
|
1082 return XML_TOK_OPEN_PAREN; |
|
1083 case BT_RPAR: |
|
1084 ptr += MINBPC(enc); |
|
1085 if (ptr == end) |
|
1086 return XML_TOK_PARTIAL; |
|
1087 switch (BYTE_TYPE(enc, ptr)) { |
|
1088 case BT_AST: |
|
1089 *nextTokPtr = ptr + MINBPC(enc); |
|
1090 return XML_TOK_CLOSE_PAREN_ASTERISK; |
|
1091 case BT_QUEST: |
|
1092 *nextTokPtr = ptr + MINBPC(enc); |
|
1093 return XML_TOK_CLOSE_PAREN_QUESTION; |
|
1094 case BT_PLUS: |
|
1095 *nextTokPtr = ptr + MINBPC(enc); |
|
1096 return XML_TOK_CLOSE_PAREN_PLUS; |
|
1097 case BT_CR: case BT_LF: case BT_S: |
|
1098 case BT_GT: case BT_COMMA: case BT_VERBAR: |
|
1099 case BT_RPAR: |
|
1100 *nextTokPtr = ptr; |
|
1101 return XML_TOK_CLOSE_PAREN; |
|
1102 } |
|
1103 *nextTokPtr = ptr; |
|
1104 return XML_TOK_INVALID; |
|
1105 case BT_VERBAR: |
|
1106 *nextTokPtr = ptr + MINBPC(enc); |
|
1107 return XML_TOK_OR; |
|
1108 case BT_GT: |
|
1109 *nextTokPtr = ptr + MINBPC(enc); |
|
1110 return XML_TOK_DECL_CLOSE; |
|
1111 case BT_NUM: |
|
1112 return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
1113 #define LEAD_CASE(n) \ |
|
1114 case BT_LEAD ## n: \ |
|
1115 if (end - ptr < n) \ |
|
1116 return XML_TOK_PARTIAL_CHAR; \ |
|
1117 if (IS_NMSTRT_CHAR(enc, ptr, n)) { \ |
|
1118 ptr += n; \ |
|
1119 tok = XML_TOK_NAME; \ |
|
1120 break; \ |
|
1121 } \ |
|
1122 if (IS_NAME_CHAR(enc, ptr, n)) { \ |
|
1123 ptr += n; \ |
|
1124 tok = XML_TOK_NMTOKEN; \ |
|
1125 break; \ |
|
1126 } \ |
|
1127 *nextTokPtr = ptr; \ |
|
1128 return XML_TOK_INVALID; |
|
1129 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
|
1130 #undef LEAD_CASE |
|
1131 case BT_NMSTRT: |
|
1132 case BT_HEX: |
|
1133 tok = XML_TOK_NAME; |
|
1134 ptr += MINBPC(enc); |
|
1135 break; |
|
1136 case BT_DIGIT: |
|
1137 case BT_NAME: |
|
1138 case BT_MINUS: |
|
1139 #ifdef XML_NS |
|
1140 case BT_COLON: |
|
1141 #endif |
|
1142 tok = XML_TOK_NMTOKEN; |
|
1143 ptr += MINBPC(enc); |
|
1144 break; |
|
1145 case BT_NONASCII: |
|
1146 if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { |
|
1147 ptr += MINBPC(enc); |
|
1148 tok = XML_TOK_NAME; |
|
1149 break; |
|
1150 } |
|
1151 if (IS_NAME_CHAR_MINBPC(enc, ptr)) { |
|
1152 ptr += MINBPC(enc); |
|
1153 tok = XML_TOK_NMTOKEN; |
|
1154 break; |
|
1155 } |
|
1156 /* fall through */ |
|
1157 default: |
|
1158 *nextTokPtr = ptr; |
|
1159 return XML_TOK_INVALID; |
|
1160 } |
|
1161 while (ptr != end) { |
|
1162 switch (BYTE_TYPE(enc, ptr)) { |
|
1163 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
|
1164 case BT_GT: case BT_RPAR: case BT_COMMA: |
|
1165 case BT_VERBAR: case BT_LSQB: case BT_PERCNT: |
|
1166 case BT_S: case BT_CR: case BT_LF: |
|
1167 *nextTokPtr = ptr; |
|
1168 return tok; |
|
1169 #ifdef XML_NS |
|
1170 case BT_COLON: |
|
1171 ptr += MINBPC(enc); |
|
1172 switch (tok) { |
|
1173 case XML_TOK_NAME: |
|
1174 if (ptr == end) |
|
1175 return XML_TOK_PARTIAL; |
|
1176 tok = XML_TOK_PREFIXED_NAME; |
|
1177 switch (BYTE_TYPE(enc, ptr)) { |
|
1178 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) |
|
1179 default: |
|
1180 tok = XML_TOK_NMTOKEN; |
|
1181 break; |
|
1182 } |
|
1183 break; |
|
1184 case XML_TOK_PREFIXED_NAME: |
|
1185 tok = XML_TOK_NMTOKEN; |
|
1186 break; |
|
1187 } |
|
1188 break; |
|
1189 #endif |
|
1190 case BT_PLUS: |
|
1191 if (tok == XML_TOK_NMTOKEN) { |
|
1192 *nextTokPtr = ptr; |
|
1193 return XML_TOK_INVALID; |
|
1194 } |
|
1195 *nextTokPtr = ptr + MINBPC(enc); |
|
1196 return XML_TOK_NAME_PLUS; |
|
1197 case BT_AST: |
|
1198 if (tok == XML_TOK_NMTOKEN) { |
|
1199 *nextTokPtr = ptr; |
|
1200 return XML_TOK_INVALID; |
|
1201 } |
|
1202 *nextTokPtr = ptr + MINBPC(enc); |
|
1203 return XML_TOK_NAME_ASTERISK; |
|
1204 case BT_QUEST: |
|
1205 if (tok == XML_TOK_NMTOKEN) { |
|
1206 *nextTokPtr = ptr; |
|
1207 return XML_TOK_INVALID; |
|
1208 } |
|
1209 *nextTokPtr = ptr + MINBPC(enc); |
|
1210 return XML_TOK_NAME_QUESTION; |
|
1211 default: |
|
1212 *nextTokPtr = ptr; |
|
1213 return XML_TOK_INVALID; |
|
1214 } |
|
1215 } |
|
1216 return XML_TOK_PARTIAL; |
|
1217 } |
|
1218 |
|
1219 static |
|
1220 int PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end, |
|
1221 const char **nextTokPtr) |
|
1222 { |
|
1223 const char *start; |
|
1224 if (ptr == end) |
|
1225 return XML_TOK_NONE; |
|
1226 start = ptr; |
|
1227 while (ptr != end) { |
|
1228 switch (BYTE_TYPE(enc, ptr)) { |
|
1229 #define LEAD_CASE(n) \ |
|
1230 case BT_LEAD ## n: ptr += n; break; |
|
1231 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
|
1232 #undef LEAD_CASE |
|
1233 case BT_AMP: |
|
1234 if (ptr == start) |
|
1235 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
1236 *nextTokPtr = ptr; |
|
1237 return XML_TOK_DATA_CHARS; |
|
1238 case BT_LT: |
|
1239 /* this is for inside entity references */ |
|
1240 *nextTokPtr = ptr; |
|
1241 return XML_TOK_INVALID; |
|
1242 case BT_LF: |
|
1243 if (ptr == start) { |
|
1244 *nextTokPtr = ptr + MINBPC(enc); |
|
1245 return XML_TOK_DATA_NEWLINE; |
|
1246 } |
|
1247 *nextTokPtr = ptr; |
|
1248 return XML_TOK_DATA_CHARS; |
|
1249 case BT_CR: |
|
1250 if (ptr == start) { |
|
1251 ptr += MINBPC(enc); |
|
1252 if (ptr == end) |
|
1253 return XML_TOK_TRAILING_CR; |
|
1254 if (BYTE_TYPE(enc, ptr) == BT_LF) |
|
1255 ptr += MINBPC(enc); |
|
1256 *nextTokPtr = ptr; |
|
1257 return XML_TOK_DATA_NEWLINE; |
|
1258 } |
|
1259 *nextTokPtr = ptr; |
|
1260 return XML_TOK_DATA_CHARS; |
|
1261 case BT_S: |
|
1262 if (ptr == start) { |
|
1263 *nextTokPtr = ptr + MINBPC(enc); |
|
1264 return XML_TOK_ATTRIBUTE_VALUE_S; |
|
1265 } |
|
1266 *nextTokPtr = ptr; |
|
1267 return XML_TOK_DATA_CHARS; |
|
1268 default: |
|
1269 ptr += MINBPC(enc); |
|
1270 break; |
|
1271 } |
|
1272 } |
|
1273 *nextTokPtr = ptr; |
|
1274 return XML_TOK_DATA_CHARS; |
|
1275 } |
|
1276 |
|
1277 static |
|
1278 int PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end, |
|
1279 const char **nextTokPtr) |
|
1280 { |
|
1281 const char *start; |
|
1282 if (ptr == end) |
|
1283 return XML_TOK_NONE; |
|
1284 start = ptr; |
|
1285 while (ptr != end) { |
|
1286 switch (BYTE_TYPE(enc, ptr)) { |
|
1287 #define LEAD_CASE(n) \ |
|
1288 case BT_LEAD ## n: ptr += n; break; |
|
1289 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
|
1290 #undef LEAD_CASE |
|
1291 case BT_AMP: |
|
1292 if (ptr == start) |
|
1293 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
1294 *nextTokPtr = ptr; |
|
1295 return XML_TOK_DATA_CHARS; |
|
1296 case BT_PERCNT: |
|
1297 if (ptr == start) |
|
1298 return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr); |
|
1299 *nextTokPtr = ptr; |
|
1300 return XML_TOK_DATA_CHARS; |
|
1301 case BT_LF: |
|
1302 if (ptr == start) { |
|
1303 *nextTokPtr = ptr + MINBPC(enc); |
|
1304 return XML_TOK_DATA_NEWLINE; |
|
1305 } |
|
1306 *nextTokPtr = ptr; |
|
1307 return XML_TOK_DATA_CHARS; |
|
1308 case BT_CR: |
|
1309 if (ptr == start) { |
|
1310 ptr += MINBPC(enc); |
|
1311 if (ptr == end) |
|
1312 return XML_TOK_TRAILING_CR; |
|
1313 if (BYTE_TYPE(enc, ptr) == BT_LF) |
|
1314 ptr += MINBPC(enc); |
|
1315 *nextTokPtr = ptr; |
|
1316 return XML_TOK_DATA_NEWLINE; |
|
1317 } |
|
1318 *nextTokPtr = ptr; |
|
1319 return XML_TOK_DATA_CHARS; |
|
1320 default: |
|
1321 ptr += MINBPC(enc); |
|
1322 break; |
|
1323 } |
|
1324 } |
|
1325 *nextTokPtr = ptr; |
|
1326 return XML_TOK_DATA_CHARS; |
|
1327 } |
|
1328 |
|
1329 static |
|
1330 int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, |
|
1331 const char **badPtr) |
|
1332 { |
|
1333 ptr += MINBPC(enc); |
|
1334 end -= MINBPC(enc); |
|
1335 for (; ptr != end; ptr += MINBPC(enc)) { |
|
1336 switch (BYTE_TYPE(enc, ptr)) { |
|
1337 case BT_DIGIT: |
|
1338 case BT_HEX: |
|
1339 case BT_MINUS: |
|
1340 case BT_APOS: |
|
1341 case BT_LPAR: |
|
1342 case BT_RPAR: |
|
1343 case BT_PLUS: |
|
1344 case BT_COMMA: |
|
1345 case BT_SOL: |
|
1346 case BT_EQUALS: |
|
1347 case BT_QUEST: |
|
1348 case BT_CR: |
|
1349 case BT_LF: |
|
1350 case BT_SEMI: |
|
1351 case BT_EXCL: |
|
1352 case BT_AST: |
|
1353 case BT_PERCNT: |
|
1354 case BT_NUM: |
|
1355 #ifdef XML_NS |
|
1356 case BT_COLON: |
|
1357 #endif |
|
1358 break; |
|
1359 case BT_S: |
|
1360 if (CHAR_MATCHES(enc, ptr, '\t')) { |
|
1361 *badPtr = ptr; |
|
1362 return 0; |
|
1363 } |
|
1364 break; |
|
1365 case BT_NAME: |
|
1366 case BT_NMSTRT: |
|
1367 if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f)) |
|
1368 break; |
|
1369 default: |
|
1370 switch (BYTE_TO_ASCII(enc, ptr)) { |
|
1371 case 0x24: /* $ */ |
|
1372 case 0x40: /* @ */ |
|
1373 break; |
|
1374 default: |
|
1375 *badPtr = ptr; |
|
1376 return 0; |
|
1377 } |
|
1378 break; |
|
1379 } |
|
1380 } |
|
1381 return 1; |
|
1382 } |
|
1383 |
|
1384 /* This must only be called for a well-formed start-tag or empty element tag. |
|
1385 Returns the number of attributes. Pointers to the first attsMax attributes |
|
1386 are stored in atts. */ |
|
1387 |
|
1388 static |
|
1389 int PREFIX(getAtts)(const ENCODING *enc, const char *ptr, |
|
1390 int attsMax, ATTRIBUTE *atts) |
|
1391 { |
|
1392 enum { other, inName, inValue } state = inName; |
|
1393 int nAtts = 0; |
|
1394 int open; |
|
1395 |
|
1396 for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) { |
|
1397 switch (BYTE_TYPE(enc, ptr)) { |
|
1398 #define START_NAME \ |
|
1399 if (state == other) { \ |
|
1400 if (nAtts < attsMax) { \ |
|
1401 atts[nAtts].name = ptr; \ |
|
1402 atts[nAtts].normalized = 1; \ |
|
1403 } \ |
|
1404 state = inName; \ |
|
1405 } |
|
1406 #define LEAD_CASE(n) \ |
|
1407 case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break; |
|
1408 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
|
1409 #undef LEAD_CASE |
|
1410 case BT_NONASCII: |
|
1411 case BT_NMSTRT: |
|
1412 case BT_HEX: |
|
1413 START_NAME |
|
1414 break; |
|
1415 #undef START_NAME |
|
1416 case BT_QUOT: |
|
1417 if (state != inValue) { |
|
1418 if (nAtts < attsMax) |
|
1419 atts[nAtts].valuePtr = ptr + MINBPC(enc); |
|
1420 state = inValue; |
|
1421 open = BT_QUOT; |
|
1422 } |
|
1423 else if (open == BT_QUOT) { |
|
1424 state = other; |
|
1425 if (nAtts < attsMax) |
|
1426 atts[nAtts].valueEnd = ptr; |
|
1427 nAtts++; |
|
1428 } |
|
1429 break; |
|
1430 case BT_APOS: |
|
1431 if (state != inValue) { |
|
1432 if (nAtts < attsMax) |
|
1433 atts[nAtts].valuePtr = ptr + MINBPC(enc); |
|
1434 state = inValue; |
|
1435 open = BT_APOS; |
|
1436 } |
|
1437 else if (open == BT_APOS) { |
|
1438 state = other; |
|
1439 if (nAtts < attsMax) |
|
1440 atts[nAtts].valueEnd = ptr; |
|
1441 nAtts++; |
|
1442 } |
|
1443 break; |
|
1444 case BT_AMP: |
|
1445 if (nAtts < attsMax) |
|
1446 atts[nAtts].normalized = 0; |
|
1447 break; |
|
1448 case BT_S: |
|
1449 if (state == inName) |
|
1450 state = other; |
|
1451 else if (state == inValue |
|
1452 && nAtts < attsMax |
|
1453 && atts[nAtts].normalized |
|
1454 && (ptr == atts[nAtts].valuePtr |
|
1455 || BYTE_TO_ASCII(enc, ptr) != ' ' |
|
1456 || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ' ' |
|
1457 || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open)) |
|
1458 atts[nAtts].normalized = 0; |
|
1459 break; |
|
1460 case BT_CR: case BT_LF: |
|
1461 /* This case ensures that the first attribute name is counted |
|
1462 Apart from that we could just change state on the quote. */ |
|
1463 if (state == inName) |
|
1464 state = other; |
|
1465 else if (state == inValue && nAtts < attsMax) |
|
1466 atts[nAtts].normalized = 0; |
|
1467 break; |
|
1468 case BT_GT: |
|
1469 case BT_SOL: |
|
1470 if (state != inValue) |
|
1471 return nAtts; |
|
1472 break; |
|
1473 default: |
|
1474 break; |
|
1475 } |
|
1476 } |
|
1477 /* not reached */ |
|
1478 } |
|
1479 |
|
1480 static |
|
1481 int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) |
|
1482 { |
|
1483 int result = 0; |
|
1484 /* skip &# */ |
|
1485 ptr += 2*MINBPC(enc); |
|
1486 if (CHAR_MATCHES(enc, ptr, 'x')) { |
|
1487 for (ptr += MINBPC(enc); !CHAR_MATCHES(enc, ptr, ';'); ptr += MINBPC(enc)) { |
|
1488 int c = BYTE_TO_ASCII(enc, ptr); |
|
1489 switch (c) { |
|
1490 case '0': case '1': case '2': case '3': case '4': |
|
1491 case '5': case '6': case '7': case '8': case '9': |
|
1492 result <<= 4; |
|
1493 result |= (c - '0'); |
|
1494 break; |
|
1495 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': |
|
1496 result <<= 4; |
|
1497 result += 10 + (c - 'A'); |
|
1498 break; |
|
1499 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': |
|
1500 result <<= 4; |
|
1501 result += 10 + (c - 'a'); |
|
1502 break; |
|
1503 } |
|
1504 if (result >= 0x110000) |
|
1505 return -1; |
|
1506 } |
|
1507 } |
|
1508 else { |
|
1509 for (; !CHAR_MATCHES(enc, ptr, ';'); ptr += MINBPC(enc)) { |
|
1510 int c = BYTE_TO_ASCII(enc, ptr); |
|
1511 result *= 10; |
|
1512 result += (c - '0'); |
|
1513 if (result >= 0x110000) |
|
1514 return -1; |
|
1515 } |
|
1516 } |
|
1517 return checkCharRefNumber(result); |
|
1518 } |
|
1519 |
|
1520 static |
|
1521 int PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const char *end) |
|
1522 { |
|
1523 switch ((end - ptr)/MINBPC(enc)) { |
|
1524 case 2: |
|
1525 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), 't')) { |
|
1526 switch (BYTE_TO_ASCII(enc, ptr)) { |
|
1527 case 'l': |
|
1528 return '<'; |
|
1529 case 'g': |
|
1530 return '>'; |
|
1531 } |
|
1532 } |
|
1533 break; |
|
1534 case 3: |
|
1535 if (CHAR_MATCHES(enc, ptr, 'a')) { |
|
1536 ptr += MINBPC(enc); |
|
1537 if (CHAR_MATCHES(enc, ptr, 'm')) { |
|
1538 ptr += MINBPC(enc); |
|
1539 if (CHAR_MATCHES(enc, ptr, 'p')) |
|
1540 return '&'; |
|
1541 } |
|
1542 } |
|
1543 break; |
|
1544 case 4: |
|
1545 switch (BYTE_TO_ASCII(enc, ptr)) { |
|
1546 case 'q': |
|
1547 ptr += MINBPC(enc); |
|
1548 if (CHAR_MATCHES(enc, ptr, 'u')) { |
|
1549 ptr += MINBPC(enc); |
|
1550 if (CHAR_MATCHES(enc, ptr, 'o')) { |
|
1551 ptr += MINBPC(enc); |
|
1552 if (CHAR_MATCHES(enc, ptr, 't')) |
|
1553 return '"'; |
|
1554 } |
|
1555 } |
|
1556 break; |
|
1557 case 'a': |
|
1558 ptr += MINBPC(enc); |
|
1559 if (CHAR_MATCHES(enc, ptr, 'p')) { |
|
1560 ptr += MINBPC(enc); |
|
1561 if (CHAR_MATCHES(enc, ptr, 'o')) { |
|
1562 ptr += MINBPC(enc); |
|
1563 if (CHAR_MATCHES(enc, ptr, 's')) |
|
1564 return '\''; |
|
1565 } |
|
1566 } |
|
1567 break; |
|
1568 } |
|
1569 } |
|
1570 return 0; |
|
1571 } |
|
1572 |
|
1573 static |
|
1574 int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) |
|
1575 { |
|
1576 for (;;) { |
|
1577 switch (BYTE_TYPE(enc, ptr1)) { |
|
1578 #define LEAD_CASE(n) \ |
|
1579 case BT_LEAD ## n: \ |
|
1580 if (*ptr1++ != *ptr2++) \ |
|
1581 return 0; |
|
1582 LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2) |
|
1583 #undef LEAD_CASE |
|
1584 /* fall through */ |
|
1585 if (*ptr1++ != *ptr2++) |
|
1586 return 0; |
|
1587 break; |
|
1588 case BT_NONASCII: |
|
1589 case BT_NMSTRT: |
|
1590 #ifdef XML_NS |
|
1591 case BT_COLON: |
|
1592 #endif |
|
1593 case BT_HEX: |
|
1594 case BT_DIGIT: |
|
1595 case BT_NAME: |
|
1596 case BT_MINUS: |
|
1597 if (*ptr2++ != *ptr1++) |
|
1598 return 0; |
|
1599 if (MINBPC(enc) > 1) { |
|
1600 if (*ptr2++ != *ptr1++) |
|
1601 return 0; |
|
1602 if (MINBPC(enc) > 2) { |
|
1603 if (*ptr2++ != *ptr1++) |
|
1604 return 0; |
|
1605 if (MINBPC(enc) > 3) { |
|
1606 if (*ptr2++ != *ptr1++) |
|
1607 return 0; |
|
1608 } |
|
1609 } |
|
1610 } |
|
1611 break; |
|
1612 default: |
|
1613 if (MINBPC(enc) == 1 && *ptr1 == *ptr2) |
|
1614 return 1; |
|
1615 switch (BYTE_TYPE(enc, ptr2)) { |
|
1616 case BT_LEAD2: |
|
1617 case BT_LEAD3: |
|
1618 case BT_LEAD4: |
|
1619 case BT_NONASCII: |
|
1620 case BT_NMSTRT: |
|
1621 #ifdef XML_NS |
|
1622 case BT_COLON: |
|
1623 #endif |
|
1624 case BT_HEX: |
|
1625 case BT_DIGIT: |
|
1626 case BT_NAME: |
|
1627 case BT_MINUS: |
|
1628 return 0; |
|
1629 default: |
|
1630 return 1; |
|
1631 } |
|
1632 } |
|
1633 } |
|
1634 /* not reached */ |
|
1635 } |
|
1636 |
|
1637 static |
|
1638 int PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, const char *ptr2) |
|
1639 { |
|
1640 for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { |
|
1641 if (!CHAR_MATCHES(enc, ptr1, *ptr2)) |
|
1642 return 0; |
|
1643 } |
|
1644 switch (BYTE_TYPE(enc, ptr1)) { |
|
1645 case BT_LEAD2: |
|
1646 case BT_LEAD3: |
|
1647 case BT_LEAD4: |
|
1648 case BT_NONASCII: |
|
1649 case BT_NMSTRT: |
|
1650 #ifdef XML_NS |
|
1651 case BT_COLON: |
|
1652 #endif |
|
1653 case BT_HEX: |
|
1654 case BT_DIGIT: |
|
1655 case BT_NAME: |
|
1656 case BT_MINUS: |
|
1657 return 0; |
|
1658 default: |
|
1659 return 1; |
|
1660 } |
|
1661 } |
|
1662 |
|
1663 static |
|
1664 int PREFIX(nameLength)(const ENCODING *enc, const char *ptr) |
|
1665 { |
|
1666 const char *start = ptr; |
|
1667 for (;;) { |
|
1668 switch (BYTE_TYPE(enc, ptr)) { |
|
1669 #define LEAD_CASE(n) \ |
|
1670 case BT_LEAD ## n: ptr += n; break; |
|
1671 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
|
1672 #undef LEAD_CASE |
|
1673 case BT_NONASCII: |
|
1674 case BT_NMSTRT: |
|
1675 #ifdef XML_NS |
|
1676 case BT_COLON: |
|
1677 #endif |
|
1678 case BT_HEX: |
|
1679 case BT_DIGIT: |
|
1680 case BT_NAME: |
|
1681 case BT_MINUS: |
|
1682 ptr += MINBPC(enc); |
|
1683 break; |
|
1684 default: |
|
1685 return ptr - start; |
|
1686 } |
|
1687 } |
|
1688 } |
|
1689 |
|
1690 static |
|
1691 const char *PREFIX(skipS)(const ENCODING *enc, const char *ptr) |
|
1692 { |
|
1693 for (;;) { |
|
1694 switch (BYTE_TYPE(enc, ptr)) { |
|
1695 case BT_LF: |
|
1696 case BT_CR: |
|
1697 case BT_S: |
|
1698 ptr += MINBPC(enc); |
|
1699 break; |
|
1700 default: |
|
1701 return ptr; |
|
1702 } |
|
1703 } |
|
1704 } |
|
1705 |
|
1706 static |
|
1707 void PREFIX(updatePosition)(const ENCODING *enc, |
|
1708 const char *ptr, |
|
1709 const char *end, |
|
1710 POSITION *pos) |
|
1711 { |
|
1712 while (ptr != end) { |
|
1713 switch (BYTE_TYPE(enc, ptr)) { |
|
1714 #define LEAD_CASE(n) \ |
|
1715 case BT_LEAD ## n: \ |
|
1716 ptr += n; \ |
|
1717 break; |
|
1718 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) |
|
1719 #undef LEAD_CASE |
|
1720 case BT_LF: |
|
1721 pos->columnNumber = (unsigned)-1; |
|
1722 pos->lineNumber++; |
|
1723 ptr += MINBPC(enc); |
|
1724 break; |
|
1725 case BT_CR: |
|
1726 pos->lineNumber++; |
|
1727 ptr += MINBPC(enc); |
|
1728 if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF) |
|
1729 ptr += MINBPC(enc); |
|
1730 pos->columnNumber = (unsigned)-1; |
|
1731 break; |
|
1732 default: |
|
1733 ptr += MINBPC(enc); |
|
1734 break; |
|
1735 } |
|
1736 pos->columnNumber++; |
|
1737 } |
|
1738 } |
|
1739 |
|
1740 #undef DO_LEAD_CASE |
|
1741 #undef MULTIBYTE_CASES |
|
1742 #undef INVALID_CASES |
|
1743 #undef CHECK_NAME_CASE |
|
1744 #undef CHECK_NAME_CASES |
|
1745 #undef CHECK_NMSTRT_CASE |
|
1746 #undef CHECK_NMSTRT_CASES |