1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2003-2013, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: ucdstrip.c
11 * encoding: US-ASCII
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2003feb20
16 * created by: Markus W. Scherer
17 *
18 * Simple tool for Unicode Character Database files with semicolon-delimited fields.
19 * Removes comments behind data lines but not in others.
20 *
21 * To compile, just call a C compiler/linker with this source file.
22 * On Windows: cl ucdstrip.c
23 */
24
25 #include <stdio.h>
26 #include <string.h>
27
28 extern int
main(int argc,const char * argv[])29 main(int argc, const char *argv[]) {
30 static char line[2000];
31
32 /*
33 * Careful: Do not strip a comment right after the
34 * UTF-8 signature byte sequence EF BB BF (U+FEFF "BOM")
35 * which can occur on the first line of a UTF-8 text file.
36 */
37 while(gets(line)!=NULL) {
38 char *end=strrchr(line, '#');
39 char c;
40 /*
41 * Assume that a data line comment is preceded by some white space.
42 * This also protects data like '#' in UCA_Rules.txt.
43 */
44 if(end!=NULL && end!=line && ((c=*(end-1))==' ' || c=='\t')) {
45 /* ignore whitespace before the comment */
46 while(end!=line && ((c=*(end-1))==' ' || c=='\t')) {
47 --end;
48 }
49 *end=0;
50 }
51 puts(line);
52 }
53
54 return 0;
55 }
56