1 /* Convert ASCII quotations to Unicode quotations.
2 Copyright (C) 2014-2016 Free Software Foundation, Inc.
3 Written by Daiki Ueno <ueno@gnu.org>, 2014.
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17
18 #ifdef HAVE_CONFIG_H
19 # include <config.h>
20 #endif
21
22 /* Specification. */
23 #include "filters.h"
24
25 #include "quote.h"
26 #include <stdbool.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include "xalloc.h"
30
31 #define BOLD_START "\x1b[1m"
32 #define BOLD_END "\x1b[0m"
33
34 struct result
35 {
36 char *output;
37 char *offset;
38 bool bold;
39 };
40
41 static void
convert_quote_callback(char quote,const char * quoted,size_t quoted_length,void * data)42 convert_quote_callback (char quote, const char *quoted, size_t quoted_length,
43 void *data)
44 {
45 struct result *result = data;
46
47 switch (quote)
48 {
49 case '\0':
50 memcpy (result->offset, quoted, quoted_length);
51 result->offset += quoted_length;
52 break;
53
54 case '"':
55 /* U+201C: LEFT DOUBLE QUOTATION MARK */
56 memcpy (result->offset, "\xe2\x80\x9c", 3);
57 result->offset += 3;
58 if (result->bold)
59 {
60 memcpy (result->offset, BOLD_START, 4);
61 result->offset += 4;
62 }
63 memcpy (result->offset, quoted, quoted_length);
64 result->offset += quoted_length;
65 if (result->bold)
66 {
67 memcpy (result->offset, BOLD_END, 4);
68 result->offset += 4;
69 }
70 /* U+201D: RIGHT DOUBLE QUOTATION MARK */
71 memcpy (result->offset, "\xe2\x80\x9d", 3);
72 result->offset += 3;
73 break;
74
75 case '\'':
76 /* U+2018: LEFT SINGLE QUOTATION MARK */
77 memcpy (result->offset, "\xe2\x80\x98", 3);
78 result->offset += 3;
79 if (result->bold)
80 {
81 memcpy (result->offset, BOLD_START, 4);
82 result->offset += 4;
83 }
84 memcpy (result->offset, quoted, quoted_length);
85 result->offset += quoted_length;
86 if (result->bold)
87 {
88 memcpy (result->offset, BOLD_END, 4);
89 result->offset += 4;
90 }
91 /* U+2019: RIGHT SINGLE QUOTATION MARK */
92 memcpy (result->offset, "\xe2\x80\x99", 3);
93 result->offset += 3;
94 break;
95 }
96 }
97
98 /* This is a direct translation of po/quot.sed and po/boldquot.sed. */
99 static void
convert_ascii_quote_to_unicode(const char * input,size_t input_len,char ** output_p,size_t * output_len_p,bool bold)100 convert_ascii_quote_to_unicode (const char *input, size_t input_len,
101 char **output_p, size_t *output_len_p,
102 bool bold)
103 {
104 const char *p;
105 size_t quote_count;
106 struct result result;
107
108 /* Count the number of quotation characters. */
109 quote_count = 0;
110 for (p = input; p < input + input_len; p++)
111 {
112 size_t len;
113
114 p = strpbrk (p, "`'\"");
115 if (!p)
116 break;
117
118 len = strspn (p, "`'\"");
119 quote_count += len;
120 p += len;
121 }
122
123 /* Large enough. */
124 result.output = XNMALLOC (input_len - quote_count
125 + (bold ? 7 : 3) * quote_count + 1,
126 char);
127 result.offset = result.output;
128 result.bold = bold;
129
130 scan_quoted (input, input_len, convert_quote_callback, &result);
131
132 *output_p = result.output;
133 *output_len_p = result.offset - result.output;
134 }
135
136 void
ascii_quote_to_unicode(const char * input,size_t input_len,char ** output_p,size_t * output_len_p)137 ascii_quote_to_unicode (const char *input, size_t input_len,
138 char **output_p, size_t *output_len_p)
139 {
140 convert_ascii_quote_to_unicode (input, input_len,
141 output_p, output_len_p,
142 false);
143 }
144
145 void
ascii_quote_to_unicode_bold(const char * input,size_t input_len,char ** output_p,size_t * output_len_p)146 ascii_quote_to_unicode_bold (const char *input, size_t input_len,
147 char **output_p, size_t *output_len_p)
148 {
149 convert_ascii_quote_to_unicode (input, input_len,
150 output_p, output_len_p,
151 true);
152 }
153