• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 2003-2013, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *   file name:  ucdstrip.c
11 *   encoding:   US-ASCII
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 2003feb20
16 *   created by: Markus W. Scherer
17 *
18 *   Simple tool for Unicode Character Database files with semicolon-delimited fields.
19 *   Removes comments behind data lines but not in others.
20 *
21 *   To compile, just call a C compiler/linker with this source file.
22 *   On Windows: cl ucdstrip.c
23 */
24 
25 #include <stdio.h>
26 #include <string.h>
27 
28 extern int
main(int argc,const char * argv[])29 main(int argc, const char *argv[]) {
30     static char line[2000];
31 
32     /*
33      * Careful: Do not strip a comment right after the
34      * UTF-8 signature byte sequence EF BB BF (U+FEFF "BOM")
35      * which can occur on the first line of a UTF-8 text file.
36      */
37     while(gets(line)!=NULL) {
38         char *end=strrchr(line, '#');
39         char c;
40         /*
41          * Assume that a data line comment is preceded by some white space.
42          * This also protects data like '#' in UCA_Rules.txt.
43          */
44         if(end!=NULL && end!=line && ((c=*(end-1))==' ' || c=='\t')) {
45             /* ignore whitespace before the comment */
46             while(end!=line && ((c=*(end-1))==' ' || c=='\t')) {
47                 --end;
48             }
49             *end=0;
50         }
51         puts(line);
52     }
53 
54     return 0;
55 }
56