1 /*
2 * "$Id: mxml-entity.c 408 2010-09-19 05:26:46Z mike $"
3 *
4 * Character entity support code for Mini-XML, a small XML-like
5 * file parsing library.
6 *
7 * Copyright 2003-2010 by Michael R Sweet.
8 *
9 * These coded instructions, statements, and computer programs are the
10 * property of Michael R Sweet and are protected by Federal copyright
11 * law. Distribution and use rights are outlined in the file "COPYING"
12 * which should have been included with this file. If this file is
13 * missing or damaged, see the license at:
14 *
15 * http://www.minixml.org/
16 *
17 * Contents:
18 *
19 * mxmlEntityAddCallback() - Add a callback to convert entities to
20 * Unicode.
21 * mxmlEntityGetName() - Get the name that corresponds to the
22 * character value.
23 * mxmlEntityGetValue() - Get the character corresponding to a named
24 * entity.
25 * mxmlEntityRemoveCallback() - Remove a callback.
26 * _mxml_entity_cb() - Lookup standard (X)HTML entities.
27 */
29 /*
30 * Include necessary headers...
31 */
33 #include "mxml-private.h"
36 /*
37 * 'mxmlEntityAddCallback()' - Add a callback to convert entities to Unicode.
38 */
40 int /* O - 0 on success, -1 on failure */
41 mxmlEntityAddCallback(
42 mxml_entity_cb_t cb) /* I - Callback function to add */
43 {
44 _mxml_global_t *global = _mxml_global();
45 /* Global data */
48 if (global->num_entity_cbs < (int)(sizeof(global->entity_cbs) / sizeof(global->entity_cbs[0])))
49 {
50 global->entity_cbs[global->num_entity_cbs] = cb;
51 global->num_entity_cbs ++;
53 return (0);
54 }
55 else
56 {
57 mxml_error("Unable to add entity callback!");
59 return (-1);
60 }
61 }
64 /*
65 * 'mxmlEntityGetName()' - Get the name that corresponds to the character value.
66 *
67 * If val does not need to be represented by a named entity, NULL is returned.
68 */
70 const char * /* O - Entity name or NULL */
71 mxmlEntityGetName(int val) /* I - Character value */
72 {
73 switch (val)
74 {
75 case '&' :
76 return ("amp");
78 case '<' :
79 return ("lt");
81 case '>' :
82 return ("gt");
84 case '\"' :
85 return ("quot");
87 default :
88 return (NULL);
89 }
90 }
93 /*
94 * 'mxmlEntityGetValue()' - Get the character corresponding to a named entity.
95 *
96 * The entity name can also be a numeric constant. -1 is returned if the
97 * name is not known.
98 */
100 int /* O - Character value or -1 on error */
101 mxmlEntityGetValue(const char *name) /* I - Entity name */
102 {
103 int i; /* Looping var */
104 int ch; /* Character value */
105 _mxml_global_t *global = _mxml_global();
106 /* Global data */
109 for (i = 0; i < global->num_entity_cbs; i ++)
110 if ((ch = (global->entity_cbs[i])(name)) >= 0)
111 return (ch);
113 return (-1);
114 }
117 /*
118 * 'mxmlEntityRemoveCallback()' - Remove a callback.
119 */
121 void
122 mxmlEntityRemoveCallback(
123 mxml_entity_cb_t cb) /* I - Callback function to remove */
124 {
125 int i; /* Looping var */
126 _mxml_global_t *global = _mxml_global();
127 /* Global data */
130 for (i = 0; i < global->num_entity_cbs; i ++)
131 if (cb == global->entity_cbs[i])
132 {
133 /*
134 * Remove the callback...
135 */
137 global->num_entity_cbs --;
139 if (i < global->num_entity_cbs)
140 memmove(global->entity_cbs + i, global->entity_cbs + i + 1,
141 (global->num_entity_cbs - i) * sizeof(global->entity_cbs[0]));
143 return;
144 }
145 }
148 /*
149 * '_mxml_entity_cb()' - Lookup standard (X)HTML entities.
150 */
152 int /* O - Unicode value or -1 */
153 _mxml_entity_cb(const char *name) /* I - Entity name */
154 {
155 int diff, /* Difference between names */
156 current, /* Current entity in search */
157 first, /* First entity in search */
158 last; /* Last entity in search */
159 static const struct
160 {
161 const char *name; /* Entity name */
162 int val; /* Character value */
163 } entities[] =
164 {
165 { "AElig", 198 },
166 { "Aacute", 193 },
167 { "Acirc", 194 },
168 { "Agrave", 192 },
169 { "Alpha", 913 },
170 { "Aring", 197 },
171 { "Atilde", 195 },
172 { "Auml", 196 },
173 { "Beta", 914 },
174 { "Ccedil", 199 },
175 { "Chi", 935 },
176 { "Dagger", 8225 },
177 { "Delta", 916 },
178 { "Dstrok", 208 },
179 { "ETH", 208 },
180 { "Eacute", 201 },
181 { "Ecirc", 202 },
182 { "Egrave", 200 },
183 { "Epsilon", 917 },
184 { "Eta", 919 },
185 { "Euml", 203 },
186 { "Gamma", 915 },
187 { "Iacute", 205 },
188 { "Icirc", 206 },
189 { "Igrave", 204 },
190 { "Iota", 921 },
191 { "Iuml", 207 },
192 { "Kappa", 922 },
193 { "Lambda", 923 },
194 { "Mu", 924 },
195 { "Ntilde", 209 },
196 { "Nu", 925 },
197 { "OElig", 338 },
198 { "Oacute", 211 },
199 { "Ocirc", 212 },
200 { "Ograve", 210 },
201 { "Omega", 937 },
202 { "Omicron", 927 },
203 { "Oslash", 216 },
204 { "Otilde", 213 },
205 { "Ouml", 214 },
206 { "Phi", 934 },
207 { "Pi", 928 },
208 { "Prime", 8243 },
209 { "Psi", 936 },
210 { "Rho", 929 },
211 { "Scaron", 352 },
212 { "Sigma", 931 },
213 { "THORN", 222 },
214 { "Tau", 932 },
215 { "Theta", 920 },
216 { "Uacute", 218 },
217 { "Ucirc", 219 },
218 { "Ugrave", 217 },
219 { "Upsilon", 933 },
220 { "Uuml", 220 },
221 { "Xi", 926 },
222 { "Yacute", 221 },
223 { "Yuml", 376 },
224 { "Zeta", 918 },
225 { "aacute", 225 },
226 { "acirc", 226 },
227 { "acute", 180 },
228 { "aelig", 230 },
229 { "agrave", 224 },
230 { "alefsym", 8501 },
231 { "alpha", 945 },
232 { "amp", '&' },
233 { "and", 8743 },
234 { "ang", 8736 },
235 { "apos", '\'' },
236 { "aring", 229 },
237 { "asymp", 8776 },
238 { "atilde", 227 },
239 { "auml", 228 },
240 { "bdquo", 8222 },
241 { "beta", 946 },
242 { "brkbar", 166 },
243 { "brvbar", 166 },
244 { "bull", 8226 },
245 { "cap", 8745 },
246 { "ccedil", 231 },
247 { "cedil", 184 },
248 { "cent", 162 },
249 { "chi", 967 },
250 { "circ", 710 },
251 { "clubs", 9827 },
252 { "cong", 8773 },
253 { "copy", 169 },
254 { "crarr", 8629 },
255 { "cup", 8746 },
256 { "curren", 164 },
257 { "dArr", 8659 },
258 { "dagger", 8224 },
259 { "darr", 8595 },
260 { "deg", 176 },
261 { "delta", 948 },
262 { "diams", 9830 },
263 { "die", 168 },
264 { "divide", 247 },
265 { "eacute", 233 },
266 { "ecirc", 234 },
267 { "egrave", 232 },
268 { "empty", 8709 },
269 { "emsp", 8195 },
270 { "ensp", 8194 },
271 { "epsilon", 949 },
272 { "equiv", 8801 },
273 { "eta", 951 },
274 { "eth", 240 },
275 { "euml", 235 },
276 { "euro", 8364 },
277 { "exist", 8707 },
278 { "fnof", 402 },
279 { "forall", 8704 },
280 { "frac12", 189 },
281 { "frac14", 188 },
282 { "frac34", 190 },
283 { "frasl", 8260 },
284 { "gamma", 947 },
285 { "ge", 8805 },
286 { "gt", '>' },
287 { "hArr", 8660 },
288 { "harr", 8596 },
289 { "hearts", 9829 },
290 { "hellip", 8230 },
291 { "hibar", 175 },
292 { "iacute", 237 },
293 { "icirc", 238 },
294 { "iexcl", 161 },
295 { "igrave", 236 },
296 { "image", 8465 },
297 { "infin", 8734 },
298 { "int", 8747 },
299 { "iota", 953 },
300 { "iquest", 191 },
301 { "isin", 8712 },
302 { "iuml", 239 },
303 { "kappa", 954 },
304 { "lArr", 8656 },
305 { "lambda", 955 },
306 { "lang", 9001 },
307 { "laquo", 171 },
308 { "larr", 8592 },
309 { "lceil", 8968 },
310 { "ldquo", 8220 },
311 { "le", 8804 },
312 { "lfloor", 8970 },
313 { "lowast", 8727 },
314 { "loz", 9674 },
315 { "lrm", 8206 },
316 { "lsaquo", 8249 },
317 { "lsquo", 8216 },
318 { "lt", '<' },
319 { "macr", 175 },
320 { "mdash", 8212 },
321 { "micro", 181 },
322 { "middot", 183 },
323 { "minus", 8722 },
324 { "mu", 956 },
325 { "nabla", 8711 },
326 { "nbsp", 160 },
327 { "ndash", 8211 },
328 { "ne", 8800 },
329 { "ni", 8715 },
330 { "not", 172 },
331 { "notin", 8713 },
332 { "nsub", 8836 },
333 { "ntilde", 241 },
334 { "nu", 957 },
335 { "oacute", 243 },
336 { "ocirc", 244 },
337 { "oelig", 339 },
338 { "ograve", 242 },
339 { "oline", 8254 },
340 { "omega", 969 },
341 { "omicron", 959 },
342 { "oplus", 8853 },
343 { "or", 8744 },
344 { "ordf", 170 },
345 { "ordm", 186 },
346 { "oslash", 248 },
347 { "otilde", 245 },
348 { "otimes", 8855 },
349 { "ouml", 246 },
350 { "para", 182 },
351 { "part", 8706 },
352 { "permil", 8240 },
353 { "perp", 8869 },
354 { "phi", 966 },
355 { "pi", 960 },
356 { "piv", 982 },
357 { "plusmn", 177 },
358 { "pound", 163 },
359 { "prime", 8242 },
360 { "prod", 8719 },
361 { "prop", 8733 },
362 { "psi", 968 },
363 { "quot", '\"' },
364 { "rArr", 8658 },
365 { "radic", 8730 },
366 { "rang", 9002 },
367 { "raquo", 187 },
368 { "rarr", 8594 },
369 { "rceil", 8969 },
370 { "rdquo", 8221 },
371 { "real", 8476 },
372 { "reg", 174 },
373 { "rfloor", 8971 },
374 { "rho", 961 },
375 { "rlm", 8207 },
376 { "rsaquo", 8250 },
377 { "rsquo", 8217 },
378 { "sbquo", 8218 },
379 { "scaron", 353 },
380 { "sdot", 8901 },
381 { "sect", 167 },
382 { "shy", 173 },
383 { "sigma", 963 },
384 { "sigmaf", 962 },
385 { "sim", 8764 },
386 { "spades", 9824 },
387 { "sub", 8834 },
388 { "sube", 8838 },
389 { "sum", 8721 },
390 { "sup", 8835 },
391 { "sup1", 185 },
392 { "sup2", 178 },
393 { "sup3", 179 },
394 { "supe", 8839 },
395 { "szlig", 223 },
396 { "tau", 964 },
397 { "there4", 8756 },
398 { "theta", 952 },
399 { "thetasym", 977 },
400 { "thinsp", 8201 },
401 { "thorn", 254 },
402 { "tilde", 732 },
403 { "times", 215 },
404 { "trade", 8482 },
405 { "uArr", 8657 },
406 { "uacute", 250 },
407 { "uarr", 8593 },
408 { "ucirc", 251 },
409 { "ugrave", 249 },
410 { "uml", 168 },
411 { "upsih", 978 },
412 { "upsilon", 965 },
413 { "uuml", 252 },
414 { "weierp", 8472 },
415 { "xi", 958 },
416 { "yacute", 253 },
417 { "yen", 165 },
418 { "yuml", 255 },
419 { "zeta", 950 },
420 { "zwj", 8205 },
421 { "zwnj", 8204 }
422 };
425 /*
426 * Do a binary search for the named entity...
427 */
429 first = 0;
430 last = (int)(sizeof(entities) / sizeof(entities[0]) - 1);
432 while ((last - first) > 1)
433 {
434 current = (first + last) / 2;
436 if ((diff = strcmp(name, entities[current].name)) == 0)
437 return (entities[current].val);
438 else if (diff < 0)
439 last = current;
440 else
441 first = current;
442 }
444 /*
445 * If we get here, there is a small chance that there is still
446 * a match; check first and last...
447 */
449 if (!strcmp(name, entities[first].name))
450 return (entities[first].val);
451 else if (!strcmp(name, entities[last].name))
452 return (entities[last].val);
453 else
454 return (-1);
455 }
458 /*
459 * End of "$Id: mxml-entity.c 408 2010-09-19 05:26:46Z mike $".
460 */