• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2001-2004 Brandon Long
3  * All Rights Reserved.
4  *
5  * ClearSilver Templating System
6  *
7  * This code is made available under the terms of the ClearSilver License.
8  * http://www.clearsilver.net/license.hdf
9  *
10  */
11 
12 #include "cs_config.h"
13 
14 #include <stdlib.h>
15 #include <string.h>
16 #include <sys/types.h>
17 #include <regex.h>
18 #include <ctype.h>
19 #include "util/neo_misc.h"
20 #include "util/neo_err.h"
21 #include "util/neo_str.h"
22 #include "html.h"
23 #include "cgi.h"
24 
has_space_formatting(const char * src,int slen)25 static int has_space_formatting(const char *src, int slen)
26 {
27   int spaces = 0;
28   int returns = 0;
29   int ascii_art = 0;
30   int x = 0;
31 
32   for (x = 0; x < slen; x++)
33   {
34     if (src[x] == '\t') return 1;
35     if (src[x] == ' ')
36     {
37       spaces++;
38       if (x && (src[x-1] == '.'))
39 	spaces--;
40     }
41     else if (src[x] == '\n')
42     {
43       spaces = 0;
44       returns++;
45     }
46     else if (strchr ("/\\<>:[]!@#$%^&*()|", src[x]))
47     {
48       ascii_art++;
49       if (ascii_art > 3) return 2;
50     }
51     else if (src[x] != '\r')
52     {
53       if (returns > 2) return 1;
54       if (spaces > 2) return 1;
55       returns = 0;
56       spaces = 0;
57       ascii_art = 0;
58     }
59   }
60 
61   return 0;
62 }
63 
64 /*
65 static int has_long_lines (char *s, int l)
66 {
67   char *ptr;
68   int x = 0;
69 
70   while (x < l)
71   {
72     ptr = strchr (s + x, '\n');
73     if (ptr == NULL)
74     {
75       if (l - x > 75) return 1;
76       return 0;
77     }
78     if (ptr - (s + x) > 75) return 1;
79     x = ptr - s + 1;
80   }
81   return 0;
82 }
83 */
84 
85 /* The first step is to actually find all of the URLs and email
86  * addresses using our handy regular expressions.  We then mark these,
87  * and then go through convert non-special areas with straight
88  * text->html escapes, and convert special parts as special parts
89  */
90 struct _parts {
91   int begin;
92   int end;
93   int type;
94 };
95 
96 #define SC_TYPE_TEXT  1
97 #define SC_TYPE_URL   2
98 #define SC_TYPE_EMAIL 3
99 
100 static char *EmailRe = "[^][@:;<>\\\"()[:space:][:cntrl:]]+@[-+a-zA-Z0-9]+\\.[-+a-zA-Z0-9\\.]+[-+a-zA-Z0-9]";
101 static char *URLRe = "((http|https|ftp|mailto):(//)?[^[:space:]>\"\t]*|www\\.[-a-z0-9\\.]+)[^[:space:];\t\">]*";
102 
split_and_convert(const char * src,int slen,STRING * out,HTML_CONVERT_OPTS * opts)103 static NEOERR *split_and_convert (const char *src, int slen,
104                                   STRING *out, HTML_CONVERT_OPTS *opts)
105 {
106   NEOERR *err = STATUS_OK;
107   static int compiled = 0;
108   static regex_t email_re, url_re;
109   regmatch_t email_match, url_match;
110   int errcode;
111   char *ptr, *esc;
112   char errbuf[256];
113   struct _parts *parts;
114   int part_count;
115   int part;
116   int x, i;
117   int spaces = 0;
118 
119   if (!compiled)
120   {
121     if ((errcode = regcomp (&email_re, EmailRe, REG_ICASE | REG_EXTENDED)))
122     {
123       regerror (errcode, &email_re, errbuf, sizeof(errbuf));
124       return nerr_raise (NERR_PARSE, "Unable to compile EmailRE: %s", errbuf);
125     }
126     if ((errcode = regcomp (&url_re, URLRe, REG_ICASE | REG_EXTENDED)))
127     {
128       regerror (errcode, &url_re, errbuf, sizeof(errbuf));
129       return nerr_raise (NERR_PARSE, "Unable to compile URLRe: %s", errbuf);
130     }
131     compiled = 1;
132   }
133 
134   part_count = 20;
135   parts = (struct _parts *) malloc (sizeof(struct _parts) * part_count);
136   part = 0;
137 
138   x = 0;
139   if (regexec (&email_re, src+x, 1, &email_match, 0) != 0)
140   {
141     email_match.rm_so = -1;
142     email_match.rm_eo = -1;
143   }
144   else
145   {
146     email_match.rm_so += x;
147     email_match.rm_eo += x;
148   }
149   if (regexec (&url_re, src+x, 1, &url_match, 0) != 0)
150   {
151     url_match.rm_so = -1;
152     url_match.rm_eo = -1;
153   }
154   else
155   {
156     url_match.rm_so += x;
157     url_match.rm_eo += x;
158   }
159   while ((x < slen) && !((email_match.rm_so == -1) && (url_match.rm_so == -1)))
160   {
161     if (part >= part_count)
162     {
163       part_count *= 2;
164       parts = (struct _parts *) realloc (parts, sizeof(struct _parts) * part_count);
165     }
166     if ((url_match.rm_so != -1) && ((email_match.rm_so == -1) || (url_match.rm_so <= email_match.rm_so)))
167     {
168       parts[part].begin = url_match.rm_so;
169       parts[part].end = url_match.rm_eo;
170       parts[part].type = SC_TYPE_URL;
171       x = parts[part].end + 1;
172       part++;
173       if (x < slen)
174       {
175 	if (regexec (&url_re, src+x, 1, &url_match, 0) != 0)
176 	{
177 	  url_match.rm_so = -1;
178 	  url_match.rm_eo = -1;
179 	}
180 	else
181 	{
182 	  url_match.rm_so += x;
183 	  url_match.rm_eo += x;
184 	}
185 	if ((email_match.rm_so != -1) && (x > email_match.rm_so))
186 	{
187 	  if (regexec (&email_re, src+x, 1, &email_match, 0) != 0)
188 	  {
189 	    email_match.rm_so = -1;
190 	    email_match.rm_eo = -1;
191 	  }
192 	  else
193 	  {
194 	    email_match.rm_so += x;
195 	    email_match.rm_eo += x;
196 	  }
197 	}
198       }
199     }
200     else
201     {
202       parts[part].begin = email_match.rm_so;
203       parts[part].end = email_match.rm_eo;
204       parts[part].type = SC_TYPE_EMAIL;
205       x = parts[part].end + 1;
206       part++;
207       if (x < slen)
208       {
209 	if (regexec (&email_re, src+x, 1, &email_match, 0) != 0)
210 	{
211 	  email_match.rm_so = -1;
212 	  email_match.rm_eo = -1;
213 	}
214 	else
215 	{
216 	  email_match.rm_so += x;
217 	  email_match.rm_eo += x;
218 	}
219 	if ((url_match.rm_so != -1) && (x > url_match.rm_so))
220 	{
221 	  if (regexec (&url_re, src+x, 1, &url_match, 0) != 0)
222 	  {
223 	    url_match.rm_so = -1;
224 	    url_match.rm_eo = -1;
225 	  }
226 	  else
227 	  {
228 	    url_match.rm_so += x;
229 	    url_match.rm_eo += x;
230 	  }
231 	}
232       }
233     }
234   }
235 
236   i = 0;
237   x = 0;
238   while (x < slen)
239   {
240     if ((i >= part) || (x < parts[i].begin))
241     {
242       ptr = strpbrk(src + x, "&<>\r\n ");
243       if (ptr == NULL)
244       {
245 	if (spaces)
246 	{
247 	  int sp;
248 	  for (sp = 0; sp < spaces - 1; sp++)
249 	  {
250 	    err = string_append (out, "&nbsp;");
251 	    if (err != STATUS_OK) break;
252 	  }
253 	  if (err != STATUS_OK) break;
254 	  err = string_append_char (out, ' ');
255 	}
256 	spaces = 0;
257 	if (i < part)
258 	{
259 	  err = string_appendn (out, src + x, parts[i].begin - x);
260 	  x = parts[i].begin;
261 	}
262 	else
263 	{
264 	  err = string_append (out, src + x);
265 	  x = slen;
266 	}
267       }
268       else
269       {
270 	if ((i >= part) || ((ptr - src) < parts[i].begin))
271 	{
272 	  if (spaces)
273 	  {
274 	    int sp;
275 	    for (sp = 0; sp < spaces - 1; sp++)
276 	    {
277 	      err = string_append (out, "&nbsp;");
278 	      if (err != STATUS_OK) break;
279 	    }
280 	    if (err != STATUS_OK) break;
281 	    err = string_append_char (out, ' ');
282 	  }
283 	  spaces = 0;
284 	  err = string_appendn (out, src + x, (ptr - src) - x);
285 	  if (err != STATUS_OK) break;
286 	  x = ptr - src;
287 	  if (src[x] == ' ')
288 	  {
289 	    if (opts->space_convert)
290 	    {
291 	      spaces++;
292 	    }
293 	    else
294 	      err = string_append_char (out, ' ');
295 	  }
296 	  else
297 	  {
298 	    if (src[x] != '\n' && spaces)
299 	    {
300 	      int sp;
301 	      for (sp = 0; sp < spaces - 1; sp++)
302 	      {
303 		err = string_append (out, "&nbsp;");
304 		if (err != STATUS_OK) break;
305 	      }
306 	      if (err != STATUS_OK) break;
307 	      err = string_append_char (out, ' ');
308 	    }
309 	    spaces = 0;
310 
311 	    if (src[x] == '&')
312 	      err = string_append (out, "&amp;");
313 	    else if (src[x] == '<')
314 	      err = string_append (out, "&lt;");
315 	    else if (src[x] == '>')
316 	      err = string_append (out, "&gt;");
317 	    else if (src[x] == '\n')
318 	      if (opts->newlines_convert)
319 		err = string_append (out, "<br/>\n");
320 	      else if (x && src[x-1] == '\n')
321 		err = string_append (out, "<p/>\n");
322 	      else
323 		err = string_append_char (out, '\n');
324 	    else if (src[x] != '\r')
325 	      err = nerr_raise (NERR_ASSERT, "src[x] == '%c'", src[x]);
326 	  }
327 	  x++;
328 	}
329 	else
330 	{
331 	  if (spaces)
332 	  {
333 	    int sp;
334 	    for (sp = 0; sp < spaces - 1; sp++)
335 	    {
336 	      err = string_append (out, "&nbsp;");
337 	      if (err != STATUS_OK) break;
338 	    }
339 	    if (err != STATUS_OK) break;
340 	    err = string_append_char (out, ' ');
341 	  }
342 	  spaces = 0;
343 	  err = string_appendn (out, src + x, parts[i].begin - x);
344 	  x = parts[i].begin;
345 	}
346       }
347     }
348     else
349     {
350       if (spaces)
351       {
352 	int sp;
353 	for (sp = 0; sp < spaces - 1; sp++)
354 	{
355 	  err = string_append (out, "&nbsp;");
356 	  if (err != STATUS_OK) break;
357 	}
358 	if (err != STATUS_OK) break;
359 	err = string_append_char (out, ' ');
360       }
361       spaces = 0;
362       if (parts[i].type == SC_TYPE_URL)
363       {
364         char last_char = src[parts[i].end-1];
365         int suffix=0;
366         if (last_char == '.' || last_char == ',') { suffix=1; }
367 	err = string_append (out, " <a ");
368 	if (err != STATUS_OK) break;
369 	if (opts->url_class)
370 	{
371 	    err = string_appendf (out, "class=%s ", opts->url_class);
372 	    if (err) break;
373 	}
374 	if (opts->url_target)
375 	{
376 	  err = string_appendf (out, "target=\"%s\" ", opts->url_target);
377 	  if (err) break;
378 	}
379 	err = string_append(out, "href=\"");
380 	if (err) break;
381 	if (opts->bounce_url)
382 	{
383 	  char *url, *esc_url, *new_url;
384 	  int url_len;
385 	  if (!strncasecmp(src + x, "www.", 4))
386 	  {
387 	    url_len = 7 + parts[i].end - x - suffix;
388 	    url = (char *) malloc(url_len+1);
389 	    if (url == NULL)
390 	    {
391 	      err = nerr_raise(NERR_NOMEM,
392 		  "Unable to allocate memory to convert url");
393 	      break;
394 	    }
395 	    strcpy(url, "http://");
396 	    strncat(url, src + x, parts[i].end - x - suffix);
397 	  }
398 	  else
399 	  {
400 	    url_len = parts[i].end - x - suffix;
401 	    url = (char *) malloc(url_len+1);
402 	    if (url == NULL)
403 	    {
404 	      err = nerr_raise(NERR_NOMEM,
405 		  "Unable to allocate memory to convert url");
406 	      break;
407 	    }
408 	    strncpy(url, src + x, parts[i].end - x - suffix);
409 	    url[url_len] = '\0';
410 	  }
411 	  err = cgi_url_escape(url, &esc_url);
412 	  free(url);
413 	  if (err) {
414 	    free(esc_url);
415 	    break;
416 	  }
417 
418 	  new_url = sprintf_alloc(opts->bounce_url, esc_url);
419 	  free(esc_url);
420 	  if (new_url == NULL)
421 	  {
422 	    err = nerr_raise(NERR_NOMEM, "Unable to allocate memory to convert url");
423 	    break;
424 	  }
425 	  err = string_append (out, new_url);
426 	  free(new_url);
427 	  if (err) break;
428 	}
429 	else
430 	{
431 	  if (!strncasecmp(src + x, "www.", 4))
432 	  {
433 	    err = string_append (out, "http://");
434 	    if (err != STATUS_OK) break;
435 	  }
436 	  err = string_appendn (out, src + x, parts[i].end - x - suffix);
437 	  if (err != STATUS_OK) break;
438 	}
439 	err = string_append (out, "\">");
440 	if (err != STATUS_OK) break;
441         if (opts->link_name) {
442           err = html_escape_alloc((opts->link_name),
443                                   strlen(opts->link_name), &esc);
444         } else {
445           err = html_escape_alloc((src + x), parts[i].end - x - suffix, &esc);
446         }
447 	if (err != STATUS_OK) break;
448 	err = string_append (out, esc);
449 	free(esc);
450 	if (err != STATUS_OK) break;
451 	err = string_append (out, "</a>");
452         if (suffix) {
453             err  = string_appendn(out,src + parts[i].end - 1,1);
454 	    if (err != STATUS_OK) break;
455         }
456       }
457       else /* type == SC_TYPE_EMAIL */
458       {
459 	err = string_append (out, "<a ");
460 	if (err != STATUS_OK) break;
461 	if (opts->mailto_class)
462 	{
463 	    err = string_appendf (out, "class=%s ", opts->mailto_class);
464 	    if (err) break;
465 	}
466 	err = string_append(out, "href=\"mailto:");
467 	if (err) break;
468 	err = string_appendn (out, src + x, parts[i].end - x);
469 	if (err != STATUS_OK) break;
470 	err = string_append (out, "\">");
471 	if (err != STATUS_OK) break;
472 	err = html_escape_alloc(src + x, parts[i].end - x, &esc);
473 	if (err != STATUS_OK) break;
474 	err = string_append (out, esc);
475 	free(esc);
476 	if (err != STATUS_OK) break;
477 	err = string_append (out, "</a>");
478       }
479       x = parts[i].end;
480       i++;
481     }
482     if (err != STATUS_OK) break;
483   }
484   free (parts);
485   return err;
486 }
487 
strip_white_space_end(STRING * str)488 static void strip_white_space_end (STRING *str)
489 {
490   int x = 0;
491   int ol = str->len;
492   char *ptr;
493   int i;
494 
495   while (x < str->len)
496   {
497     ptr = strchr(str->buf + x, '\n');
498     if (ptr == NULL)
499     {
500       /* just strip the white space at the end of the string */
501       ol = strlen(str->buf);
502       while (ol && isspace(str->buf[ol-1]))
503       {
504 	str->buf[ol - 1] = '\0';
505 	ol--;
506       }
507       str->len = ol;
508       return;
509     }
510     else
511     {
512       x = i = ptr - str->buf;
513       if (x)
514       {
515 	x--;
516 	while (x && isspace(str->buf[x]) && (str->buf[x] != '\n')) x--;
517 	if (x) x++;
518 	memmove (str->buf + x, ptr, ol - i + 1);
519 	x++;
520 	str->len -= ((i - x) + 1);
521 	str->buf[str->len] = '\0';
522 	ol = str->len;
523       }
524     }
525   }
526 }
527 
convert_text_html_alloc(const char * src,int slen,char ** out)528 NEOERR *convert_text_html_alloc (const char *src, int slen,
529                                  char **out)
530 {
531     return nerr_pass(convert_text_html_alloc_options(src, slen, out, NULL));
532 }
533 
convert_text_html_alloc_options(const char * src,int slen,char ** out,HTML_CONVERT_OPTS * opts)534 NEOERR *convert_text_html_alloc_options (const char *src, int slen,
535                                          char **out,
536                                          HTML_CONVERT_OPTS *opts)
537 {
538   NEOERR *err;
539   STRING out_s;
540   int formatting = 0;
541   HTML_CONVERT_OPTS my_opts;
542 
543   string_init(&out_s);
544 
545   if (opts == NULL)
546   {
547     opts = &my_opts;
548     opts->bounce_url = NULL;
549     opts->url_class = NULL;
550     opts->url_target = "_blank";
551     opts->mailto_class = NULL;
552     opts->long_lines = 0;
553     opts->space_convert = 0;
554     opts->newlines_convert = 1;
555     opts->longline_width = 75; /* This hasn't been used in a while, actually */
556     opts->check_ascii_art = 1;
557     opts->link_name = NULL;
558   }
559 
560   do
561   {
562     if  (opts->check_ascii_art)
563     {
564 	formatting = has_space_formatting (src, slen);
565 	if (formatting) opts->space_convert = 1;
566     }
567     if (formatting == 2)
568     {
569       /* Do <pre> formatting */
570       opts->newlines_convert = 1;
571       err = string_append (&out_s, "<tt>");
572       if (err != STATUS_OK) break;
573       err = split_and_convert(src, slen, &out_s, opts);
574       if (err != STATUS_OK) break;
575       err = string_append (&out_s, "</tt>");
576       if (err != STATUS_OK) break;
577       /* Strip white space at end of lines */
578       strip_white_space_end (&out_s);
579     }
580     else
581     {
582       /* int nl = has_long_lines (src, slen); */
583       err = split_and_convert(src, slen, &out_s, opts);
584     }
585   } while (0);
586   if (err != STATUS_OK)
587   {
588     string_clear (&out_s);
589     return nerr_pass (err);
590   }
591   if (out_s.buf == NULL)
592   {
593     *out = strdup("");
594   }
595   else
596   {
597     *out = out_s.buf;
598   }
599   return STATUS_OK;
600 }
601 
html_escape_alloc(const char * src,int slen,char ** out)602 NEOERR *html_escape_alloc (const char *src, int slen,
603                            char **out)
604 {
605   return nerr_pass(neos_html_escape(src, slen, out));
606 }
607 
608 /* Replace ampersand with iso-8859-1 character code */
_expand_amp_8859_1_char(const char * s)609 static unsigned char _expand_amp_8859_1_char (const char *s)
610 {
611   if (s[0] == '\0')
612     return 0;
613 
614   switch (s[0]) {
615     case '#':
616       if (s[1] == 'x') return strtol (s+2, NULL, 16);
617       return strtol (s+1, NULL, 10);
618     case 'a':
619       if (!strcmp(s, "agrave")) return 0xe0; /* � */
620       if (!strcmp(s, "aacute")) return 0xe1; /* � */
621       if (!strcmp(s, "acirc")) return 0xe2; /* � */
622       if (!strcmp(s, "atilde")) return 0xe3; /* � */
623       if (!strcmp(s, "auml")) return 0xe4; /* � */
624       if (!strcmp(s, "aring")) return 0xe5; /* � */
625       if (!strcmp(s, "aelig")) return 0xe6; /* � */
626       if (!strcmp(s, "amp")) return '&';
627       return 0;
628     case 'c':
629       if (!strcmp(s, "ccedil")) return 0xe7; /* � */
630       return 0;
631     case 'e':
632       if (!strcmp(s, "egrave")) return 0xe8; /* � */
633       if (!strcmp(s, "eacute")) return 0xe9; /* � */
634       if (!strcmp(s, "ecirc")) return 0xea; /* � */
635       if (!strcmp(s, "euml")) return 0xeb; /* � */
636       if (!strcmp(s, "eth")) return 0xf0; /* � */
637       return 0;
638     case 'i':
639       if (!strcmp(s, "igrave")) return 0xec; /* � */
640       if (!strcmp(s, "iacute")) return 0xed; /* � */
641       if (!strcmp(s, "icirc")) return 0xee; /* � */
642       if (!strcmp(s, "iuml")) return 0xef; /* � */
643       return 0;
644     case 'g':
645       if (!strcmp(s, "gt")) return '>';
646       return 0;
647     case 'l':
648       if (!strcmp(s, "lt")) return '<';
649       return 0;
650     case 'n':
651       if (!strcmp(s, "ntilde")) return 0xf1; /* � */
652       if (!strcmp(s, "nbsp")) return ' ';
653       return 0;
654     case 'o':
655       if (!strcmp(s, "ograve")) return 0xf2; /* � */
656       if (!strcmp(s, "oacute")) return 0xf3; /* � */
657       if (!strcmp(s, "ocirc")) return 0xf4; /* � */
658       if (!strcmp(s, "otilde")) return 0xf5; /* � */
659       if (!strcmp(s, "ouml")) return 0xf6; /* � */
660       if (!strcmp(s, "oslash")) return 0xf8; /* � */
661       return 0;
662     case 'q': /* quot */
663       if (!strcmp(s, "quot")) return '"';
664       return 0;
665     case 's':
666       if (!strcmp(s, "szlig")) return 0xdf; /* � */
667       return 0;
668     case 't':
669       if (!strcmp(s, "thorn")) return 0xfe; /* � */
670       return 0;
671     case 'u':
672       if (!strcmp(s, "ugrave")) return 0xf9; /* � */
673       if (!strcmp(s, "uacute")) return 0xfa; /* � */
674       if (!strcmp(s, "ucirc")) return 0xfb; /* � */
675       if (!strcmp(s, "uuml")) return 0xfc; /* � */
676       return 0;
677     case 'y':
678       if (!strcmp(s, "yacute")) return 0xfd; /* � */
679 
680   }
681   return 0;
682 }
683 
html_expand_amp_8859_1(const char * amp,char * buf)684 char *html_expand_amp_8859_1(const char *amp,
685                                       char *buf)
686 {
687   unsigned char ch;
688 
689   ch = _expand_amp_8859_1_char(amp);
690   if (ch == '\0')
691   {
692     if (!strcmp(amp, "copy")) return "(C)";
693     return "";
694   }
695   else {
696     buf[0] = (char)ch;
697     buf[1] = '\0';
698     return buf;
699   }
700 }
701 
html_strip_alloc(const char * src,int slen,char ** out)702 NEOERR *html_strip_alloc(const char *src, int slen,
703                          char **out)
704 {
705   NEOERR *err = STATUS_OK;
706   STRING out_s;
707   int x = 0;
708   int strip_match = -1;
709   int state = 0;
710   char amp[10];
711   int amp_start = 0;
712   char buf[10];
713   int ampl = 0;
714 
715   string_init(&out_s);
716   err = string_append (&out_s, "");
717   if (err) return nerr_pass (err);
718 
719   while (x < slen)
720   {
721     switch (state) {
722       case 0:
723 	/* Default */
724 	if (src[x] == '&')
725 	{
726 	  state = 3;
727 	  ampl = 0;
728 	  amp_start = x;
729 	}
730 	else if (src[x] == '<')
731 	{
732 	  state = 1;
733 	}
734 	else
735 	{
736 	  if (strip_match == -1)
737 	  {
738 	    err = string_append_char(&out_s, src[x]);
739 	    if (err) break;
740 	  }
741 	}
742 	x++;
743 	break;
744       case 1:
745 	/* Starting TAG */
746 	if (src[x] == '>')
747 	{
748 	  state = 0;
749 	}
750 	else if (src[x] == '/')
751 	{
752 	}
753 	else
754 	{
755 	}
756 	x++;
757 	break;
758       case 2:
759 	/* In TAG */
760 	if (src[x] == '>')
761 	{
762 	  state = 0;
763 	}
764 	x++;
765 	break;
766       case 3:
767 	/* In AMP */
768 	if (src[x] == ';')
769 	{
770 	  amp[ampl] = '\0';
771 	  state = 0;
772 	  err = string_append(&out_s, html_expand_amp_8859_1(amp, buf));
773 	  if (err) break;
774 	}
775 	else
776 	{
777 	  if (ampl < sizeof(amp)-1)
778 	    amp[ampl++] = tolower(src[x]);
779 	  else
780 	  {
781 	    /* broken html... just back up */
782 	    x = amp_start;
783 	    err = string_append_char(&out_s, src[x]);
784 	    if (err) break;
785 	    state = 0;
786 	  }
787 	}
788 	x++;
789 	break;
790     }
791     if (err) break;
792   }
793 
794 
795   if (err)
796   {
797     string_clear (&out_s);
798     return nerr_pass (err);
799   }
800   *out = out_s.buf;
801   return STATUS_OK;
802 }
803