annotate mcabber/libjabber/xmltok.c @ 946:5c1dfc8e54d7 0.8.0

Ready for release 0.8.0
author Mikael Berthe <mikael@lilotux.net>
date Sun, 13 Aug 2006 21:01:30 +0200
parents 0aa9015f06df
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
25
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1 /*
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
2 The contents of this file are subject to the Mozilla Public License
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
3 Version 1.1 (the "License"); you may not use this file except in
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
4 compliance with the License. You may obtain a copy of the License at
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
5 http://www.mozilla.org/MPL/
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
6
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
7 Software distributed under the License is distributed on an "AS IS"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
8 basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
9 License for the specific language governing rights and limitations
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
10 under the License.
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
11
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
12 The Original Code is expat.
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
13
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
14 The Initial Developer of the Original Code is James Clark.
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
15 Portions created by James Clark are Copyright (C) 1998, 1999
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
16 James Clark. All Rights Reserved.
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
17
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
18 Contributor(s):
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
19
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
20 Alternatively, the contents of this file may be used under the terms
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
21 of the GNU General Public License (the "GPL"), in which case the
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
22 provisions of the GPL are applicable instead of those above. If you
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
23 wish to allow use of your version of this file only under the terms of
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
24 the GPL and not to allow others to use your version of this file under
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
25 the MPL, indicate your decision by deleting the provisions above and
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
26 replace them with the notice and other provisions required by the
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
27 GPL. If you do not delete the provisions above, a recipient may use
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
28 your version of this file under either the MPL or the GPL.
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
29 */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
30
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
31 #include "xmldef.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
32 #include "xmltok.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
33 #include "nametab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
34
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
35 #define VTABLE1 \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
36 { PREFIX(prologTok), PREFIX(contentTok), PREFIX(cdataSectionTok) }, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
37 { PREFIX(attributeValueTok), PREFIX(entityValueTok) }, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
38 PREFIX(sameName), \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
39 PREFIX(nameMatchesAscii), \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
40 PREFIX(nameLength), \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
41 PREFIX(skipS), \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
42 PREFIX(getAtts), \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
43 PREFIX(charRefNumber), \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
44 PREFIX(predefinedEntityName), \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
45 PREFIX(updatePosition), \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
46 PREFIX(isPublicId)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
47
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
48 #define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
49
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
50 #define UCS2_GET_NAMING(pages, hi, lo) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
51 (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F)))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
52
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
53 /* A 2 byte UTF-8 representation splits the characters 11 bits
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
54 between the bottom 5 and 6 bits of the bytes.
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
55 We need 8 bits to index into pages, 3 bits to add to that index and
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
56 5 bits to generate the mask. */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
57 #define UTF8_GET_NAMING2(pages, byte) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
58 (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
59 + ((((byte)[0]) & 3) << 1) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
60 + ((((byte)[1]) >> 5) & 1)] \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
61 & (1 << (((byte)[1]) & 0x1F)))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
62
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
63 /* A 3 byte UTF-8 representation splits the characters 16 bits
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
64 between the bottom 4, 6 and 6 bits of the bytes.
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
65 We need 8 bits to index into pages, 3 bits to add to that index and
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
66 5 bits to generate the mask. */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
67 #define UTF8_GET_NAMING3(pages, byte) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
68 (namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
69 + ((((byte)[1]) >> 2) & 0xF)] \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
70 << 3) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
71 + ((((byte)[1]) & 3) << 1) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
72 + ((((byte)[2]) >> 5) & 1)] \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
73 & (1 << (((byte)[2]) & 0x1F)))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
74
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
75 #define UTF8_GET_NAMING(pages, p, n) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
76 ((n) == 2 \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
77 ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
78 : ((n) == 3 \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
79 ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
80 : 0))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
81
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
82 #define UTF8_INVALID3(p) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
83 ((*p) == 0xED \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
84 ? (((p)[1] & 0x20) != 0) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
85 : ((*p) == 0xEF \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
86 ? ((p)[1] == 0xBF && ((p)[2] == 0xBF || (p)[2] == 0xBE)) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
87 : 0))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
88
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
89 #define UTF8_INVALID4(p) ((*p) == 0xF4 && ((p)[1] & 0x30) != 0)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
90
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
91 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
92 int isNever(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
93 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
94 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
95 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
96
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
97 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
98 int utf8_isName2(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
99 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
100 return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
101 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
102
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
103 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
104 int utf8_isName3(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
105 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
106 return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
107 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
108
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
109 #define utf8_isName4 isNever
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
110
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
111 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
112 int utf8_isNmstrt2(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
113 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
114 return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
115 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
116
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
117 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
118 int utf8_isNmstrt3(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
119 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
120 return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
121 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
122
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
123 #define utf8_isNmstrt4 isNever
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
124
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
125 #define utf8_isInvalid2 isNever
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
126
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
127 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
128 int utf8_isInvalid3(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
129 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
130 return UTF8_INVALID3((const unsigned char *)p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
131 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
132
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
133 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
134 int utf8_isInvalid4(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
135 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
136 return UTF8_INVALID4((const unsigned char *)p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
137 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
138
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
139 struct normal_encoding {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
140 ENCODING enc;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
141 unsigned char type[256];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
142 #ifdef XML_MIN_SIZE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
143 int (*byteType)(const ENCODING *, const char *);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
144 int (*isNameMin)(const ENCODING *, const char *);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
145 int (*isNmstrtMin)(const ENCODING *, const char *);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
146 int (*byteToAscii)(const ENCODING *, const char *);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
147 int (*charMatches)(const ENCODING *, const char *, int);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
148 #endif /* XML_MIN_SIZE */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
149 int (*isName2)(const ENCODING *, const char *);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
150 int (*isName3)(const ENCODING *, const char *);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
151 int (*isName4)(const ENCODING *, const char *);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
152 int (*isNmstrt2)(const ENCODING *, const char *);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
153 int (*isNmstrt3)(const ENCODING *, const char *);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
154 int (*isNmstrt4)(const ENCODING *, const char *);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
155 int (*isInvalid2)(const ENCODING *, const char *);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
156 int (*isInvalid3)(const ENCODING *, const char *);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
157 int (*isInvalid4)(const ENCODING *, const char *);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
158 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
159
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
160 #ifdef XML_MIN_SIZE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
161
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
162 #define STANDARD_VTABLE(E) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
163 E ## byteType, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
164 E ## isNameMin, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
165 E ## isNmstrtMin, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
166 E ## byteToAscii, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
167 E ## charMatches,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
168
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
169 #else
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
170
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
171 #define STANDARD_VTABLE(E) /* as nothing */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
172
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
173 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
174
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
175 #define NORMAL_VTABLE(E) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
176 E ## isName2, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
177 E ## isName3, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
178 E ## isName4, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
179 E ## isNmstrt2, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
180 E ## isNmstrt3, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
181 E ## isNmstrt4, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
182 E ## isInvalid2, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
183 E ## isInvalid3, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
184 E ## isInvalid4
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
185
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
186 static int checkCharRefNumber(int);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
187
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
188 #include "xmltok_impl.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
189
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
190 #ifdef XML_MIN_SIZE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
191 #define sb_isNameMin isNever
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
192 #define sb_isNmstrtMin isNever
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
193 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
194
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
195 #ifdef XML_MIN_SIZE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
196 #define MINBPC(enc) ((enc)->minBytesPerChar)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
197 #else
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
198 /* minimum bytes per character */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
199 #define MINBPC(enc) 1
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
200 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
201
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
202 #define SB_BYTE_TYPE(enc, p) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
203 (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)])
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
204
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
205 #ifdef XML_MIN_SIZE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
206 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
207 int sb_byteType(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
208 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
209 return SB_BYTE_TYPE(enc, p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
210 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
211 #define BYTE_TYPE(enc, p) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
212 (((const struct normal_encoding *)(enc))->byteType(enc, p))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
213 #else
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
214 #define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
215 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
216
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
217 #ifdef XML_MIN_SIZE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
218 #define BYTE_TO_ASCII(enc, p) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
219 (((const struct normal_encoding *)(enc))->byteToAscii(enc, p))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
220 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
221 int sb_byteToAscii(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
222 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
223 return *p;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
224 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
225 #else
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
226 #define BYTE_TO_ASCII(enc, p) (*p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
227 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
228
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
229 #define IS_NAME_CHAR(enc, p, n) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
230 (((const struct normal_encoding *)(enc))->isName ## n(enc, p))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
231 #define IS_NMSTRT_CHAR(enc, p, n) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
232 (((const struct normal_encoding *)(enc))->isNmstrt ## n(enc, p))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
233 #define IS_INVALID_CHAR(enc, p, n) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
234 (((const struct normal_encoding *)(enc))->isInvalid ## n(enc, p))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
235
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
236 #ifdef XML_MIN_SIZE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
237 #define IS_NAME_CHAR_MINBPC(enc, p) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
238 (((const struct normal_encoding *)(enc))->isNameMin(enc, p))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
239 #define IS_NMSTRT_CHAR_MINBPC(enc, p) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
240 (((const struct normal_encoding *)(enc))->isNmstrtMin(enc, p))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
241 #else
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
242 #define IS_NAME_CHAR_MINBPC(enc, p) (0)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
243 #define IS_NMSTRT_CHAR_MINBPC(enc, p) (0)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
244 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
245
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
246 #ifdef XML_MIN_SIZE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
247 #define CHAR_MATCHES(enc, p, c) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
248 (((const struct normal_encoding *)(enc))->charMatches(enc, p, c))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
249 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
250 int sb_charMatches(const ENCODING *enc, const char *p, int c)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
251 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
252 return *p == c;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
253 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
254 #else
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
255 /* c is an ASCII character */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
256 #define CHAR_MATCHES(enc, p, c) (*(p) == c)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
257 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
258
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
259 #define PREFIX(ident) normal_ ## ident
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
260 #include "xmltok_impl_c.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
261
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
262 #undef MINBPC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
263 #undef BYTE_TYPE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
264 #undef BYTE_TO_ASCII
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
265 #undef CHAR_MATCHES
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
266 #undef IS_NAME_CHAR
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
267 #undef IS_NAME_CHAR_MINBPC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
268 #undef IS_NMSTRT_CHAR
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
269 #undef IS_NMSTRT_CHAR_MINBPC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
270 #undef IS_INVALID_CHAR
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
271
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
272 enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
273 UTF8_cval1 = 0x00,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
274 UTF8_cval2 = 0xc0,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
275 UTF8_cval3 = 0xe0,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
276 UTF8_cval4 = 0xf0
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
277 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
278
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
279 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
280 void utf8_toUtf8(const ENCODING *enc,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
281 const char **fromP, const char *fromLim,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
282 char **toP, const char *toLim)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
283 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
284 char *to;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
285 const char *from;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
286 if (fromLim - *fromP > toLim - *toP) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
287 /* Avoid copying partial characters. */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
288 for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
289 if (((unsigned char)fromLim[-1] & 0xc0) != 0x80)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
290 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
291 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
292 for (to = *toP, from = *fromP; from != fromLim; from++, to++)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
293 *to = *from;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
294 *fromP = from;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
295 *toP = to;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
296 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
297
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
298 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
299 void utf8_toUtf16(const ENCODING *enc,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
300 const char **fromP, const char *fromLim,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
301 unsigned short **toP, const unsigned short *toLim)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
302 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
303 unsigned short *to = *toP;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
304 const char *from = *fromP;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
305 while (from != fromLim && to != toLim) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
306 switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
307 case BT_LEAD2:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
308 *to++ = ((from[0] & 0x1f) << 6) | (from[1] & 0x3f);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
309 from += 2;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
310 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
311 case BT_LEAD3:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
312 *to++ = ((from[0] & 0xf) << 12) | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
313 from += 3;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
314 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
315 case BT_LEAD4:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
316 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
317 unsigned long n;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
318 if (to + 1 == toLim)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
319 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
320 n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
321 n -= 0x10000;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
322 to[0] = (unsigned short)((n >> 10) | 0xD800);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
323 to[1] = (unsigned short)((n & 0x3FF) | 0xDC00);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
324 to += 2;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
325 from += 4;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
326 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
327 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
328 default:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
329 *to++ = *from++;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
330 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
331 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
332 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
333 *fromP = from;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
334 *toP = to;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
335 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
336
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
337 #ifdef XML_NS
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
338 static const struct normal_encoding utf8_encoding_ns = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
339 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
340 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
341 #include "asciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
342 #include "utf8tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
343 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
344 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
345 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
346 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
347
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
348 static const struct normal_encoding utf8_encoding = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
349 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
350 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
351 #define BT_COLON BT_NMSTRT
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
352 #include "asciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
353 #undef BT_COLON
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
354 #include "utf8tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
355 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
356 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
357 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
358
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
359 #ifdef XML_NS
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
360
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
361 static const struct normal_encoding internal_utf8_encoding_ns = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
362 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
363 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
364 #include "iasciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
365 #include "utf8tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
366 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
367 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
368 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
369
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
370 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
371
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
372 static const struct normal_encoding internal_utf8_encoding = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
373 { VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
374 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
375 #define BT_COLON BT_NMSTRT
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
376 #include "iasciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
377 #undef BT_COLON
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
378 #include "utf8tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
379 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
380 STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
381 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
382
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
383 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
384 void latin1_toUtf8(const ENCODING *enc,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
385 const char **fromP, const char *fromLim,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
386 char **toP, const char *toLim)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
387 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
388 for (;;) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
389 unsigned char c;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
390 if (*fromP == fromLim)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
391 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
392 c = (unsigned char)**fromP;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
393 if (c & 0x80) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
394 if (toLim - *toP < 2)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
395 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
396 *(*toP)++ = ((c >> 6) | UTF8_cval2);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
397 *(*toP)++ = ((c & 0x3f) | 0x80);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
398 (*fromP)++;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
399 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
400 else {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
401 if (*toP == toLim)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
402 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
403 *(*toP)++ = *(*fromP)++;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
404 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
405 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
406 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
407
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
408 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
409 void latin1_toUtf16(const ENCODING *enc,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
410 const char **fromP, const char *fromLim,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
411 unsigned short **toP, const unsigned short *toLim)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
412 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
413 while (*fromP != fromLim && *toP != toLim)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
414 *(*toP)++ = (unsigned char)*(*fromP)++;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
415 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
416
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
417 #ifdef XML_NS
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
418
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
419 static const struct normal_encoding latin1_encoding_ns = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
420 { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
421 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
422 #include "asciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
423 #include "latin1tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
424 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
425 STANDARD_VTABLE(sb_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
426 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
427
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
428 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
429
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
430 static const struct normal_encoding latin1_encoding = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
431 { VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
432 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
433 #define BT_COLON BT_NMSTRT
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
434 #include "asciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
435 #undef BT_COLON
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
436 #include "latin1tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
437 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
438 STANDARD_VTABLE(sb_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
439 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
440
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
441 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
442 void ascii_toUtf8(const ENCODING *enc,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
443 const char **fromP, const char *fromLim,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
444 char **toP, const char *toLim)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
445 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
446 while (*fromP != fromLim && *toP != toLim)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
447 *(*toP)++ = *(*fromP)++;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
448 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
449
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
450 #ifdef XML_NS
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
451
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
452 static const struct normal_encoding ascii_encoding_ns = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
453 { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
454 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
455 #include "asciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
456 /* BT_NONXML == 0 */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
457 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
458 STANDARD_VTABLE(sb_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
459 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
460
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
461 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
462
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
463 static const struct normal_encoding ascii_encoding = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
464 { VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
465 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
466 #define BT_COLON BT_NMSTRT
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
467 #include "asciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
468 #undef BT_COLON
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
469 /* BT_NONXML == 0 */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
470 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
471 STANDARD_VTABLE(sb_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
472 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
473
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
474 static int unicode_byte_type(char hi, char lo)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
475 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
476 switch ((unsigned char)hi) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
477 case 0xD8: case 0xD9: case 0xDA: case 0xDB:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
478 return BT_LEAD4;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
479 case 0xDC: case 0xDD: case 0xDE: case 0xDF:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
480 return BT_TRAIL;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
481 case 0xFF:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
482 switch ((unsigned char)lo) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
483 case 0xFF:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
484 case 0xFE:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
485 return BT_NONXML;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
486 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
487 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
488 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
489 return BT_NONASCII;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
490 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
491
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
492 #define DEFINE_UTF16_TO_UTF8(E) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
493 static \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
494 void E ## toUtf8(const ENCODING *enc, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
495 const char **fromP, const char *fromLim, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
496 char **toP, const char *toLim) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
497 { \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
498 const char *from; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
499 for (from = *fromP; from != fromLim; from += 2) { \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
500 int plane; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
501 unsigned char lo2; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
502 unsigned char lo = GET_LO(from); \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
503 unsigned char hi = GET_HI(from); \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
504 switch (hi) { \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
505 case 0: \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
506 if (lo < 0x80) { \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
507 if (*toP == toLim) { \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
508 *fromP = from; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
509 return; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
510 } \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
511 *(*toP)++ = lo; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
512 break; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
513 } \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
514 /* fall through */ \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
515 case 0x1: case 0x2: case 0x3: \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
516 case 0x4: case 0x5: case 0x6: case 0x7: \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
517 if (toLim - *toP < 2) { \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
518 *fromP = from; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
519 return; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
520 } \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
521 *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
522 *(*toP)++ = ((lo & 0x3f) | 0x80); \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
523 break; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
524 default: \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
525 if (toLim - *toP < 3) { \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
526 *fromP = from; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
527 return; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
528 } \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
529 /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
530 *(*toP)++ = ((hi >> 4) | UTF8_cval3); \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
531 *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
532 *(*toP)++ = ((lo & 0x3f) | 0x80); \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
533 break; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
534 case 0xD8: case 0xD9: case 0xDA: case 0xDB: \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
535 if (toLim - *toP < 4) { \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
536 *fromP = from; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
537 return; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
538 } \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
539 plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
540 *(*toP)++ = ((plane >> 2) | UTF8_cval4); \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
541 *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80); \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
542 from += 2; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
543 lo2 = GET_LO(from); \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
544 *(*toP)++ = (((lo & 0x3) << 4) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
545 | ((GET_HI(from) & 0x3) << 2) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
546 | (lo2 >> 6) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
547 | 0x80); \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
548 *(*toP)++ = ((lo2 & 0x3f) | 0x80); \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
549 break; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
550 } \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
551 } \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
552 *fromP = from; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
553 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
554
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
555 #define DEFINE_UTF16_TO_UTF16(E) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
556 static \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
557 void E ## toUtf16(const ENCODING *enc, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
558 const char **fromP, const char *fromLim, \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
559 unsigned short **toP, const unsigned short *toLim) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
560 { \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
561 /* Avoid copying first half only of surrogate */ \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
562 if (fromLim - *fromP > ((toLim - *toP) << 1) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
563 && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
564 fromLim -= 2; \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
565 for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
566 *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
567 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
568
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
569 #define SET2(ptr, ch) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
570 (((ptr)[0] = ((ch) & 0xff)), ((ptr)[1] = ((ch) >> 8)))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
571 #define GET_LO(ptr) ((unsigned char)(ptr)[0])
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
572 #define GET_HI(ptr) ((unsigned char)(ptr)[1])
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
573
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
574 DEFINE_UTF16_TO_UTF8(little2_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
575 DEFINE_UTF16_TO_UTF16(little2_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
576
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
577 #undef SET2
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
578 #undef GET_LO
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
579 #undef GET_HI
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
580
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
581 #define SET2(ptr, ch) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
582 (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch) & 0xFF)))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
583 #define GET_LO(ptr) ((unsigned char)(ptr)[1])
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
584 #define GET_HI(ptr) ((unsigned char)(ptr)[0])
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
585
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
586 DEFINE_UTF16_TO_UTF8(big2_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
587 DEFINE_UTF16_TO_UTF16(big2_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
588
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
589 #undef SET2
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
590 #undef GET_LO
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
591 #undef GET_HI
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
592
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
593 #define LITTLE2_BYTE_TYPE(enc, p) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
594 ((p)[1] == 0 \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
595 ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)] \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
596 : unicode_byte_type((p)[1], (p)[0]))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
597 #define LITTLE2_BYTE_TO_ASCII(enc, p) ((p)[1] == 0 ? (p)[0] : -1)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
598 #define LITTLE2_CHAR_MATCHES(enc, p, c) ((p)[1] == 0 && (p)[0] == c)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
599 #define LITTLE2_IS_NAME_CHAR_MINBPC(enc, p) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
600 UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0])
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
601 #define LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
602 UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0])
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
603
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
604 #ifdef XML_MIN_SIZE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
605
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
606 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
607 int little2_byteType(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
608 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
609 return LITTLE2_BYTE_TYPE(enc, p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
610 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
611
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
612 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
613 int little2_byteToAscii(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
614 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
615 return LITTLE2_BYTE_TO_ASCII(enc, p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
616 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
617
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
618 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
619 int little2_charMatches(const ENCODING *enc, const char *p, int c)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
620 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
621 return LITTLE2_CHAR_MATCHES(enc, p, c);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
622 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
623
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
624 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
625 int little2_isNameMin(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
626 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
627 return LITTLE2_IS_NAME_CHAR_MINBPC(enc, p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
628 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
629
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
630 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
631 int little2_isNmstrtMin(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
632 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
633 return LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
634 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
635
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
636 #undef VTABLE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
637 #define VTABLE VTABLE1, little2_toUtf8, little2_toUtf16
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
638
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
639 #else /* not XML_MIN_SIZE */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
640
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
641 #undef PREFIX
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
642 #define PREFIX(ident) little2_ ## ident
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
643 #define MINBPC(enc) 2
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
644 /* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
645 #define BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p)
414
ec86d759ed54 Trailing whitespace cleanup
Mikael Berthe <mikael@lilotux.net>
parents: 237
diff changeset
646 #define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(enc, p)
25
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
647 #define CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(enc, p, c)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
648 #define IS_NAME_CHAR(enc, p, n) 0
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
649 #define IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(enc, p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
650 #define IS_NMSTRT_CHAR(enc, p, n) (0)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
651 #define IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
652
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
653 #include "xmltok_impl_c.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
654
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
655 #undef MINBPC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
656 #undef BYTE_TYPE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
657 #undef BYTE_TO_ASCII
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
658 #undef CHAR_MATCHES
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
659 #undef IS_NAME_CHAR
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
660 #undef IS_NAME_CHAR_MINBPC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
661 #undef IS_NMSTRT_CHAR
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
662 #undef IS_NMSTRT_CHAR_MINBPC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
663 #undef IS_INVALID_CHAR
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
664
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
665 #endif /* not XML_MIN_SIZE */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
666
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
667 #ifdef XML_NS
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
668
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
669 static const struct normal_encoding little2_encoding_ns = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
670 { VTABLE, 2, 0,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
671 #if XML_BYTE_ORDER == 12
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
672 1
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
673 #else
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
674 0
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
675 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
676 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
677 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
678 #include "asciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
679 #include "latin1tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
680 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
681 STANDARD_VTABLE(little2_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
682 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
683
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
684 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
685
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
686 static const struct normal_encoding little2_encoding = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
687 { VTABLE, 2, 0,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
688 #if XML_BYTE_ORDER == 12
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
689 1
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
690 #else
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
691 0
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
692 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
693 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
694 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
695 #define BT_COLON BT_NMSTRT
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
696 #include "asciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
697 #undef BT_COLON
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
698 #include "latin1tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
699 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
700 STANDARD_VTABLE(little2_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
701 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
702
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
703 #if XML_BYTE_ORDER != 21
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
704
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
705 #ifdef XML_NS
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
706
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
707 static const struct normal_encoding internal_little2_encoding_ns = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
708 { VTABLE, 2, 0, 1 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
709 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
710 #include "iasciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
711 #include "latin1tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
712 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
713 STANDARD_VTABLE(little2_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
714 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
715
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
716 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
717
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
718 static const struct normal_encoding internal_little2_encoding = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
719 { VTABLE, 2, 0, 1 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
720 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
721 #define BT_COLON BT_NMSTRT
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
722 #include "iasciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
723 #undef BT_COLON
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
724 #include "latin1tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
725 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
726 STANDARD_VTABLE(little2_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
727 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
728
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
729 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
730
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
731
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
732 #define BIG2_BYTE_TYPE(enc, p) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
733 ((p)[0] == 0 \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
734 ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
735 : unicode_byte_type((p)[0], (p)[1]))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
736 #define BIG2_BYTE_TO_ASCII(enc, p) ((p)[0] == 0 ? (p)[1] : -1)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
737 #define BIG2_CHAR_MATCHES(enc, p, c) ((p)[0] == 0 && (p)[1] == c)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
738 #define BIG2_IS_NAME_CHAR_MINBPC(enc, p) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
739 UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1])
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
740 #define BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p) \
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
741 UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1])
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
742
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
743 #ifdef XML_MIN_SIZE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
744
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
745 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
746 int big2_byteType(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
747 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
748 return BIG2_BYTE_TYPE(enc, p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
749 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
750
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
751 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
752 int big2_byteToAscii(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
753 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
754 return BIG2_BYTE_TO_ASCII(enc, p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
755 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
756
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
757 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
758 int big2_charMatches(const ENCODING *enc, const char *p, int c)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
759 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
760 return BIG2_CHAR_MATCHES(enc, p, c);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
761 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
762
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
763 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
764 int big2_isNameMin(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
765 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
766 return BIG2_IS_NAME_CHAR_MINBPC(enc, p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
767 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
768
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
769 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
770 int big2_isNmstrtMin(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
771 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
772 return BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
773 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
774
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
775 #undef VTABLE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
776 #define VTABLE VTABLE1, big2_toUtf8, big2_toUtf16
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
777
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
778 #else /* not XML_MIN_SIZE */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
779
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
780 #undef PREFIX
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
781 #define PREFIX(ident) big2_ ## ident
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
782 #define MINBPC(enc) 2
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
783 /* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
784 #define BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p)
414
ec86d759ed54 Trailing whitespace cleanup
Mikael Berthe <mikael@lilotux.net>
parents: 237
diff changeset
785 #define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(enc, p)
25
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
786 #define CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(enc, p, c)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
787 #define IS_NAME_CHAR(enc, p, n) 0
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
788 #define IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(enc, p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
789 #define IS_NMSTRT_CHAR(enc, p, n) (0)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
790 #define IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
791
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
792 #include "xmltok_impl_c.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
793
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
794 #undef MINBPC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
795 #undef BYTE_TYPE
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
796 #undef BYTE_TO_ASCII
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
797 #undef CHAR_MATCHES
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
798 #undef IS_NAME_CHAR
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
799 #undef IS_NAME_CHAR_MINBPC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
800 #undef IS_NMSTRT_CHAR
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
801 #undef IS_NMSTRT_CHAR_MINBPC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
802 #undef IS_INVALID_CHAR
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
803
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
804 #endif /* not XML_MIN_SIZE */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
805
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
806 #ifdef XML_NS
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
807
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
808 static const struct normal_encoding big2_encoding_ns = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
809 { VTABLE, 2, 0,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
810 #if XML_BYTE_ORDER == 21
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
811 1
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
812 #else
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
813 0
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
814 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
815 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
816 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
817 #include "asciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
818 #include "latin1tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
819 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
820 STANDARD_VTABLE(big2_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
821 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
822
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
823 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
824
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
825 static const struct normal_encoding big2_encoding = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
826 { VTABLE, 2, 0,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
827 #if XML_BYTE_ORDER == 21
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
828 1
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
829 #else
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
830 0
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
831 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
832 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
833 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
834 #define BT_COLON BT_NMSTRT
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
835 #include "asciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
836 #undef BT_COLON
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
837 #include "latin1tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
838 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
839 STANDARD_VTABLE(big2_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
840 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
841
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
842 #if XML_BYTE_ORDER != 12
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
843
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
844 #ifdef XML_NS
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
845
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
846 static const struct normal_encoding internal_big2_encoding_ns = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
847 { VTABLE, 2, 0, 1 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
848 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
849 #include "iasciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
850 #include "latin1tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
851 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
852 STANDARD_VTABLE(big2_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
853 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
854
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
855 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
856
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
857 static const struct normal_encoding internal_big2_encoding = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
858 { VTABLE, 2, 0, 1 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
859 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
860 #define BT_COLON BT_NMSTRT
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
861 #include "iasciitab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
862 #undef BT_COLON
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
863 #include "latin1tab.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
864 },
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
865 STANDARD_VTABLE(big2_)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
866 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
867
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
868 #endif
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
869
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
870 #undef PREFIX
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
871
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
872 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
873 int streqci(const char *s1, const char *s2)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
874 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
875 for (;;) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
876 char c1 = *s1++;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
877 char c2 = *s2++;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
878 if ('a' <= c1 && c1 <= 'z')
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
879 c1 += 'A' - 'a';
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
880 if ('a' <= c2 && c2 <= 'z')
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
881 c2 += 'A' - 'a';
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
882 if (c1 != c2)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
883 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
884 if (!c1)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
885 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
886 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
887 return 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
888 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
889
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
890 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
891 void initUpdatePosition(const ENCODING *enc, const char *ptr,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
892 const char *end, POSITION *pos)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
893 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
894 normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
895 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
896
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
897 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
898 int toAscii(const ENCODING *enc, const char *ptr, const char *end)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
899 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
900 char buf[1];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
901 char *p = buf;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
902 XmlUtf8Convert(enc, &ptr, end, &p, p + 1);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
903 if (p == buf)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
904 return -1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
905 else
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
906 return buf[0];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
907 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
908
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
909 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
910 int isSpace(int c)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
911 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
912 switch (c) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
913 case 0x20:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
914 case 0xD:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
915 case 0xA:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
916 case 0x9:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
917 return 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
918 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
919 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
920 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
921
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
922 /* Return 1 if there's just optional white space
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
923 or there's an S followed by name=val. */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
924 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
925 int parsePseudoAttribute(const ENCODING *enc,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
926 const char *ptr,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
927 const char *end,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
928 const char **namePtr,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
929 const char **valPtr,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
930 const char **nextTokPtr)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
931 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
932 int c;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
933 char open;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
934 if (ptr == end) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
935 *namePtr = 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
936 return 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
937 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
938 if (!isSpace(toAscii(enc, ptr, end))) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
939 *nextTokPtr = ptr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
940 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
941 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
942 do {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
943 ptr += enc->minBytesPerChar;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
944 } while (isSpace(toAscii(enc, ptr, end)));
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
945 if (ptr == end) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
946 *namePtr = 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
947 return 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
948 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
949 *namePtr = ptr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
950 for (;;) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
951 c = toAscii(enc, ptr, end);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
952 if (c == -1) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
953 *nextTokPtr = ptr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
954 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
955 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
956 if (c == '=')
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
957 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
958 if (isSpace(c)) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
959 do {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
960 ptr += enc->minBytesPerChar;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
961 } while (isSpace(c = toAscii(enc, ptr, end)));
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
962 if (c != '=') {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
963 *nextTokPtr = ptr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
964 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
965 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
966 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
967 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
968 ptr += enc->minBytesPerChar;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
969 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
970 if (ptr == *namePtr) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
971 *nextTokPtr = ptr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
972 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
973 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
974 ptr += enc->minBytesPerChar;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
975 c = toAscii(enc, ptr, end);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
976 while (isSpace(c)) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
977 ptr += enc->minBytesPerChar;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
978 c = toAscii(enc, ptr, end);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
979 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
980 if (c != '"' && c != '\'') {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
981 *nextTokPtr = ptr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
982 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
983 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
984 open = c;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
985 ptr += enc->minBytesPerChar;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
986 *valPtr = ptr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
987 for (;; ptr += enc->minBytesPerChar) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
988 c = toAscii(enc, ptr, end);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
989 if (c == open)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
990 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
991 if (!('a' <= c && c <= 'z')
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
992 && !('A' <= c && c <= 'Z')
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
993 && !('0' <= c && c <= '9')
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
994 && c != '.'
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
995 && c != '-'
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
996 && c != '_') {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
997 *nextTokPtr = ptr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
998 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
999 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1000 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1001 *nextTokPtr = ptr + enc->minBytesPerChar;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1002 return 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1003 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1004
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1005 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1006 int doParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1007 const char *,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1008 const char *),
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1009 int isGeneralTextEntity,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1010 const ENCODING *enc,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1011 const char *ptr,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1012 const char *end,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1013 const char **badPtr,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1014 const char **versionPtr,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1015 const char **encodingName,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1016 const ENCODING **encoding,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1017 int *standalone)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1018 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1019 const char *val = 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1020 const char *name = 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1021 ptr += 5 * enc->minBytesPerChar;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1022 end -= 2 * enc->minBytesPerChar;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1023 if (!parsePseudoAttribute(enc, ptr, end, &name, &val, &ptr) || !name) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1024 *badPtr = ptr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1025 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1026 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1027 if (!XmlNameMatchesAscii(enc, name, "version")) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1028 if (!isGeneralTextEntity) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1029 *badPtr = name;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1030 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1031 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1032 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1033 else {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1034 if (versionPtr)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1035 *versionPtr = val;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1036 if (!parsePseudoAttribute(enc, ptr, end, &name, &val, &ptr)) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1037 *badPtr = ptr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1038 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1039 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1040 if (!name) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1041 if (isGeneralTextEntity) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1042 /* a TextDecl must have an EncodingDecl */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1043 *badPtr = ptr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1044 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1045 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1046 return 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1047 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1048 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1049 if (XmlNameMatchesAscii(enc, name, "encoding")) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1050 int c = toAscii(enc, val, end);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1051 if (!('a' <= c && c <= 'z') && !('A' <= c && c <= 'Z')) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1052 *badPtr = val;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1053 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1054 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1055 if (encodingName)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1056 *encodingName = val;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1057 if (encoding)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1058 *encoding = encodingFinder(enc, val, ptr - enc->minBytesPerChar);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1059 if (!parsePseudoAttribute(enc, ptr, end, &name, &val, &ptr)) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1060 *badPtr = ptr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1061 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1062 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1063 if (!name)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1064 return 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1065 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1066 if (!XmlNameMatchesAscii(enc, name, "standalone") || isGeneralTextEntity) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1067 *badPtr = name;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1068 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1069 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1070 if (XmlNameMatchesAscii(enc, val, "yes")) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1071 if (standalone)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1072 *standalone = 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1073 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1074 else if (XmlNameMatchesAscii(enc, val, "no")) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1075 if (standalone)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1076 *standalone = 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1077 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1078 else {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1079 *badPtr = val;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1080 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1081 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1082 while (isSpace(toAscii(enc, ptr, end)))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1083 ptr += enc->minBytesPerChar;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1084 if (ptr != end) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1085 *badPtr = ptr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1086 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1087 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1088 return 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1089 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1090
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1091 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1092 int checkCharRefNumber(int result)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1093 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1094 switch (result >> 8) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1095 case 0xD8: case 0xD9: case 0xDA: case 0xDB:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1096 case 0xDC: case 0xDD: case 0xDE: case 0xDF:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1097 return -1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1098 case 0:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1099 if (latin1_encoding.type[result] == BT_NONXML)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1100 return -1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1101 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1102 case 0xFF:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1103 if (result == 0xFFFE || result == 0xFFFF)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1104 return -1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1105 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1106 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1107 return result;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1108 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1109
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1110 int XmlUtf8Encode(int c, char *buf)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1111 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1112 enum {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1113 /* minN is minimum legal resulting value for N byte sequence */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1114 min2 = 0x80,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1115 min3 = 0x800,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1116 min4 = 0x10000
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1117 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1118
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1119 if (c < 0)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1120 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1121 if (c < min2) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1122 buf[0] = (c | UTF8_cval1);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1123 return 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1124 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1125 if (c < min3) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1126 buf[0] = ((c >> 6) | UTF8_cval2);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1127 buf[1] = ((c & 0x3f) | 0x80);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1128 return 2;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1129 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1130 if (c < min4) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1131 buf[0] = ((c >> 12) | UTF8_cval3);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1132 buf[1] = (((c >> 6) & 0x3f) | 0x80);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1133 buf[2] = ((c & 0x3f) | 0x80);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1134 return 3;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1135 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1136 if (c < 0x110000) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1137 buf[0] = ((c >> 18) | UTF8_cval4);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1138 buf[1] = (((c >> 12) & 0x3f) | 0x80);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1139 buf[2] = (((c >> 6) & 0x3f) | 0x80);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1140 buf[3] = ((c & 0x3f) | 0x80);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1141 return 4;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1142 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1143 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1144 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1145
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1146 int XmlUtf16Encode(int charNum, unsigned short *buf)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1147 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1148 if (charNum < 0)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1149 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1150 if (charNum < 0x10000) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1151 buf[0] = charNum;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1152 return 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1153 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1154 if (charNum < 0x110000) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1155 charNum -= 0x10000;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1156 buf[0] = (charNum >> 10) + 0xD800;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1157 buf[1] = (charNum & 0x3FF) + 0xDC00;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1158 return 2;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1159 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1160 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1161 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1162
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1163 struct unknown_encoding {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1164 struct normal_encoding normal;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1165 int (*convert)(void *userData, const char *p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1166 void *userData;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1167 unsigned short utf16[256];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1168 char utf8[256][4];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1169 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1170
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1171 int XmlSizeOfUnknownEncoding()
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1172 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1173 return sizeof(struct unknown_encoding);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1174 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1175
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1176 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1177 int unknown_isName(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1178 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1179 int c = ((const struct unknown_encoding *)enc)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1180 ->convert(((const struct unknown_encoding *)enc)->userData, p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1181 if (c & ~0xFFFF)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1182 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1183 return UCS2_GET_NAMING(namePages, c >> 8, c & 0xFF);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1184 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1185
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1186 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1187 int unknown_isNmstrt(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1188 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1189 int c = ((const struct unknown_encoding *)enc)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1190 ->convert(((const struct unknown_encoding *)enc)->userData, p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1191 if (c & ~0xFFFF)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1192 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1193 return UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xFF);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1194 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1195
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1196 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1197 int unknown_isInvalid(const ENCODING *enc, const char *p)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1198 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1199 int c = ((const struct unknown_encoding *)enc)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1200 ->convert(((const struct unknown_encoding *)enc)->userData, p);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1201 return (c & ~0xFFFF) || checkCharRefNumber(c) < 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1202 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1203
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1204 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1205 void unknown_toUtf8(const ENCODING *enc,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1206 const char **fromP, const char *fromLim,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1207 char **toP, const char *toLim)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1208 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1209 char buf[XML_UTF8_ENCODE_MAX];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1210 for (;;) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1211 const char *utf8;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1212 int n;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1213 if (*fromP == fromLim)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1214 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1215 utf8 = ((const struct unknown_encoding *)enc)->utf8[(unsigned char)**fromP];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1216 n = *utf8++;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1217 if (n == 0) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1218 int c = ((const struct unknown_encoding *)enc)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1219 ->convert(((const struct unknown_encoding *)enc)->userData, *fromP);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1220 n = XmlUtf8Encode(c, buf);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1221 if (n > toLim - *toP)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1222 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1223 utf8 = buf;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1224 *fromP += ((const struct normal_encoding *)enc)->type[(unsigned char)**fromP]
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1225 - (BT_LEAD2 - 2);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1226 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1227 else {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1228 if (n > toLim - *toP)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1229 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1230 (*fromP)++;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1231 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1232 do {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1233 *(*toP)++ = *utf8++;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1234 } while (--n != 0);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1235 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1236 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1237
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1238 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1239 void unknown_toUtf16(const ENCODING *enc,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1240 const char **fromP, const char *fromLim,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1241 unsigned short **toP, const unsigned short *toLim)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1242 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1243 while (*fromP != fromLim && *toP != toLim) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1244 unsigned short c
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1245 = ((const struct unknown_encoding *)enc)->utf16[(unsigned char)**fromP];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1246 if (c == 0) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1247 c = (unsigned short)((const struct unknown_encoding *)enc)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1248 ->convert(((const struct unknown_encoding *)enc)->userData, *fromP);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1249 *fromP += ((const struct normal_encoding *)enc)->type[(unsigned char)**fromP]
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1250 - (BT_LEAD2 - 2);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1251 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1252 else
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1253 (*fromP)++;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1254 *(*toP)++ = c;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1255 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1256 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1257
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1258 ENCODING *
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1259 XmlInitUnknownEncoding(void *mem,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1260 int *table,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1261 int (*convert)(void *userData, const char *p),
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1262 void *userData)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1263 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1264 int i;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1265 struct unknown_encoding *e = mem;
883
0aa9015f06df Remove some more libjabber warnings
Mikael Berthe <mikael@lilotux.net>
parents: 414
diff changeset
1266 for (i = 0; i < (int)sizeof(struct normal_encoding); i++)
25
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1267 ((char *)mem)[i] = ((char *)&latin1_encoding)[i];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1268 for (i = 0; i < 128; i++)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1269 if (latin1_encoding.type[i] != BT_OTHER
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1270 && latin1_encoding.type[i] != BT_NONXML
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1271 && table[i] != i)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1272 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1273 for (i = 0; i < 256; i++) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1274 int c = table[i];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1275 if (c == -1) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1276 e->normal.type[i] = BT_MALFORM;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1277 /* This shouldn't really get used. */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1278 e->utf16[i] = 0xFFFF;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1279 e->utf8[i][0] = 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1280 e->utf8[i][1] = 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1281 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1282 else if (c < 0) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1283 if (c < -4)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1284 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1285 e->normal.type[i] = BT_LEAD2 - (c + 2);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1286 e->utf8[i][0] = 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1287 e->utf16[i] = 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1288 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1289 else if (c < 0x80) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1290 if (latin1_encoding.type[c] != BT_OTHER
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1291 && latin1_encoding.type[c] != BT_NONXML
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1292 && c != i)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1293 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1294 e->normal.type[i] = latin1_encoding.type[c];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1295 e->utf8[i][0] = 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1296 e->utf8[i][1] = (char)c;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1297 e->utf16[i] = c == 0 ? 0xFFFF : c;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1298 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1299 else if (checkCharRefNumber(c) < 0) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1300 e->normal.type[i] = BT_NONXML;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1301 /* This shouldn't really get used. */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1302 e->utf16[i] = 0xFFFF;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1303 e->utf8[i][0] = 1;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1304 e->utf8[i][1] = 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1305 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1306 else {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1307 if (c > 0xFFFF)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1308 return 0;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1309 if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1310 e->normal.type[i] = BT_NMSTRT;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1311 else if (UCS2_GET_NAMING(namePages, c >> 8, c & 0xff))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1312 e->normal.type[i] = BT_NAME;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1313 else
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1314 e->normal.type[i] = BT_OTHER;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1315 e->utf8[i][0] = (char)XmlUtf8Encode(c, e->utf8[i] + 1);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1316 e->utf16[i] = c;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1317 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1318 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1319 e->userData = userData;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1320 e->convert = convert;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1321 if (convert) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1322 e->normal.isName2 = unknown_isName;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1323 e->normal.isName3 = unknown_isName;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1324 e->normal.isName4 = unknown_isName;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1325 e->normal.isNmstrt2 = unknown_isNmstrt;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1326 e->normal.isNmstrt3 = unknown_isNmstrt;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1327 e->normal.isNmstrt4 = unknown_isNmstrt;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1328 e->normal.isInvalid2 = unknown_isInvalid;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1329 e->normal.isInvalid3 = unknown_isInvalid;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1330 e->normal.isInvalid4 = unknown_isInvalid;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1331 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1332 e->normal.enc.utf8Convert = unknown_toUtf8;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1333 e->normal.enc.utf16Convert = unknown_toUtf16;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1334 return &(e->normal.enc);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1335 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1336
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1337 /* If this enumeration is changed, getEncodingIndex and encodings
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1338 must also be changed. */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1339 enum {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1340 UNKNOWN_ENC = -1,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1341 ISO_8859_1_ENC = 0,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1342 US_ASCII_ENC,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1343 UTF_8_ENC,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1344 UTF_16_ENC,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1345 UTF_16BE_ENC,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1346 UTF_16LE_ENC,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1347 /* must match encodingNames up to here */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1348 NO_ENC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1349 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1350
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1351 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1352 int getEncodingIndex(const char *name)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1353 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1354 static const char *encodingNames[] = {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1355 "ISO-8859-1",
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1356 "US-ASCII",
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1357 "UTF-8",
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1358 "UTF-16",
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1359 "UTF-16BE"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1360 "UTF-16LE",
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1361 };
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1362 int i;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1363 if (name == 0)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1364 return NO_ENC;
883
0aa9015f06df Remove some more libjabber warnings
Mikael Berthe <mikael@lilotux.net>
parents: 414
diff changeset
1365 for (i = 0; i < (int)(sizeof(encodingNames)/sizeof(encodingNames[0])); i++)
25
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1366 if (streqci(name, encodingNames[i]))
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1367 return i;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1368 return UNKNOWN_ENC;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1369 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1370
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1371 /* For binary compatibility, we store the index of the encoding specified
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1372 at initialization in the isUtf16 member. */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1373
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1374 #define INIT_ENC_INDEX(enc) ((enc)->initEnc.isUtf16)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1375
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1376 /* This is what detects the encoding.
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1377 encodingTable maps from encoding indices to encodings;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1378 INIT_ENC_INDEX(enc) is the index of the external (protocol) specified encoding;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1379 state is XML_CONTENT_STATE if we're parsing an external text entity,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1380 and XML_PROLOG_STATE otherwise.
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1381 */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1382
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1383
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1384 static
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1385 int initScan(const ENCODING **encodingTable,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1386 const INIT_ENCODING *enc,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1387 int state,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1388 const char *ptr,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1389 const char *end,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1390 const char **nextTokPtr)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1391 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1392 const ENCODING **encPtr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1393
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1394 if (ptr == end)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1395 return XML_TOK_NONE;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1396 encPtr = enc->encPtr;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1397 if (ptr + 1 == end) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1398 /* only a single byte available for auto-detection */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1399 /* a well-formed document entity must have more than one byte */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1400 if (state != XML_CONTENT_STATE)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1401 return XML_TOK_PARTIAL;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1402 /* so we're parsing an external text entity... */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1403 /* if UTF-16 was externally specified, then we need at least 2 bytes */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1404 switch (INIT_ENC_INDEX(enc)) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1405 case UTF_16_ENC:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1406 case UTF_16LE_ENC:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1407 case UTF_16BE_ENC:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1408 return XML_TOK_PARTIAL;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1409 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1410 switch ((unsigned char)*ptr) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1411 case 0xFE:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1412 case 0xFF:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1413 case 0xEF: /* possibly first byte of UTF-8 BOM */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1414 if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1415 && state == XML_CONTENT_STATE)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1416 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1417 /* fall through */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1418 case 0x00:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1419 case 0x3C:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1420 return XML_TOK_PARTIAL;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1421 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1422 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1423 else {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1424 switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1425 case 0xFEFF:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1426 if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1427 && state == XML_CONTENT_STATE)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1428 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1429 *nextTokPtr = ptr + 2;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1430 *encPtr = encodingTable[UTF_16BE_ENC];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1431 return XML_TOK_BOM;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1432 /* 00 3C is handled in the default case */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1433 case 0x3C00:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1434 if ((INIT_ENC_INDEX(enc) == UTF_16BE_ENC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1435 || INIT_ENC_INDEX(enc) == UTF_16_ENC)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1436 && state == XML_CONTENT_STATE)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1437 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1438 *encPtr = encodingTable[UTF_16LE_ENC];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1439 return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1440 case 0xFFFE:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1441 if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1442 && state == XML_CONTENT_STATE)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1443 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1444 *nextTokPtr = ptr + 2;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1445 *encPtr = encodingTable[UTF_16LE_ENC];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1446 return XML_TOK_BOM;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1447 case 0xEFBB:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1448 /* Maybe a UTF-8 BOM (EF BB BF) */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1449 /* If there's an explicitly specified (external) encoding
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1450 of ISO-8859-1 or some flavour of UTF-16
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1451 and this is an external text entity,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1452 don't look for the BOM,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1453 because it might be a legal data. */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1454 if (state == XML_CONTENT_STATE) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1455 int e = INIT_ENC_INDEX(enc);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1456 if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC || e == UTF_16LE_ENC || e == UTF_16_ENC)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1457 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1458 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1459 if (ptr + 2 == end)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1460 return XML_TOK_PARTIAL;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1461 if ((unsigned char)ptr[2] == 0xBF) {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1462 *encPtr = encodingTable[UTF_8_ENC];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1463 return XML_TOK_BOM;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1464 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1465 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1466 default:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1467 if (ptr[0] == '\0') {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1468 /* 0 isn't a legal data character. Furthermore a document entity can only
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1469 start with ASCII characters. So the only way this can fail to be big-endian
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1470 UTF-16 if it it's an external parsed general entity that's labelled as
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1471 UTF-16LE. */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1472 if (state == XML_CONTENT_STATE && INIT_ENC_INDEX(enc) == UTF_16LE_ENC)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1473 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1474 *encPtr = encodingTable[UTF_16BE_ENC];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1475 return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1476 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1477 else if (ptr[1] == '\0') {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1478 /* We could recover here in the case:
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1479 - parsing an external entity
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1480 - second byte is 0
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1481 - no externally specified encoding
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1482 - no encoding declaration
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1483 by assuming UTF-16LE. But we don't, because this would mean when
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1484 presented just with a single byte, we couldn't reliably determine
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1485 whether we needed further bytes. */
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1486 if (state == XML_CONTENT_STATE)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1487 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1488 *encPtr = encodingTable[UTF_16LE_ENC];
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1489 return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1490 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1491 break;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1492 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1493 }
237
c8df64f43625 [/trunk] Changeset 250 by mikael
mikael
parents: 25
diff changeset
1494 *encPtr = encodingTable[(int)INIT_ENC_INDEX(enc)];
25
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1495 return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1496 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1497
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1498
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1499 #define NS(x) x
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1500 #define ns(x) x
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1501 #include "xmltok_ns_c.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1502 #undef NS
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1503 #undef ns
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1504
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1505 #ifdef XML_NS
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1506
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1507 #define NS(x) x ## NS
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1508 #define ns(x) x ## _ns
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1509
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1510 #include "xmltok_ns_c.h"
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1511
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1512 #undef NS
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1513 #undef ns
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1514
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1515 ENCODING *
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1516 XmlInitUnknownEncodingNS(void *mem,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1517 int *table,
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1518 int (*convert)(void *userData, const char *p),
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1519 void *userData)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1520 {
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1521 ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData);
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1522 if (enc)
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1523 ((struct normal_encoding *)enc)->type[':'] = BT_COLON;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1524 return enc;
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1525 }
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1526
bf3d6e241714 [/trunk] Changeset 41 by mikael
mikael
parents:
diff changeset
1527 #endif /* XML_NS */