• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <string.h>
4 #include <getopt.h>
5 
6 #define PACKAGE "wgram"
7 #define VERSION "0.0.4"
8 #define MAXLINE 1024
9 #define MAXGRAM 32
10 
11 /* status epilepticus .. print help */
12 void print_help(int exval);
13 
main(int argc,char * argv[])14 int main (int argc, char *argv[]) {
15  /* word delimeter for strtok() */
16  char delim[] = ".,:;`/\"+-_(){}[]<>*&^%$#@!?~/|\\=1234567890 \t\n";
17  char line[MAXLINE];     /* input buff, fgets() */
18  char *stray = NULL;     /* returned value by strtok() */
19  char **strarray = NULL; /* array to hold all entrys */
20  int i = 0;              /* general counter */
21  int strcount = 0;       /* number of entrys in pointer array */
22  int N = 3, pos = 0;     /* ngram size, 3 in this case */
23  int opt = 0;            /* holds command line opt nr.. */
24  int word_flag = 0;      /* print only the `raw' words */
25  FILE *fp = stdin;       /* read input from `FILE', default is stdin */
26 
27  while((opt = getopt(argc, argv, "hvn:wf:")) != -1) {
28   switch(opt) {
29    case 'h':
30     print_help(0);
31     break;
32    case 'v':
33   exit(0);
34     break;
35    case 'n':
36     N = atoi(optarg);
37     if(N > MAXGRAM || N < 2) {
38      fprintf(stderr, "%s: Error - Ngram length `%d' out of range `0-%d'\n",
39        PACKAGE, N, MAXGRAM);
40      return 1;
41     }
42     break;
43    case 'w':
44     word_flag = 1;
45     break;
46    case 'f':
47     if(freopen(optarg, "r", fp) == NULL) {
48      fprintf(stderr, "%s: Error - opening `%s'\n", PACKAGE, optarg);
49      return 1;
50     }
51     break;
52    case '?':
53     fprintf(stderr, "%s: Error - No such option: `%c'\n\n", PACKAGE, optopt);
54     print_help(1);
55   } /* switch */
56  } /* while */
57 
58  /* start reading lines from file pointer, add all entrys to **strarray */
59  while((fgets(line, MAXLINE, fp)) != NULL) {
60   if(strlen(line) < 2)
61    continue;
62 
63   stray = strtok(line, delim);
64   while(stray != NULL) {
65    strarray = (char **)realloc(strarray, (strcount + 1) * sizeof(char *));
66    strarray[strcount++] = strdup(stray);
67    stray = strtok(NULL, delim);
68   }
69  }
70 
71  if(word_flag == 0) {
72   /*
73   // print the array of strings, jumping back each time
74   // (N - 1) positions if a whole ngram of words has been printed
75   */
76   for(i = 0, pos = N; i < strcount; i++, pos--) {
77    if(pos == 0) pos = N, i -= (N - 1), printf("\n");
78     printf("%s ", strarray[i]);
79   }
80   printf("\n");
81  } else {
82   /* print raw words */
83   for(i = 0; i < strcount; i++)
84    printf("%s\n", strarray[i]);
85  }
86 
87  /* free the string array */
88  for(i = 0; i < strcount; i++)
89   free(strarray[i]);
90 
91  free(strarray);
92  return 0;
93 }
94 
95 /* status epilepticus .. print help */
print_help(int exval)96 void print_help(int exval) {
97  printf("%s,%s extract N-grams from text data\n", PACKAGE, VERSION);
98  printf("Usage: %s [-h] [-v] [-n INT] [-w] [-f FILE]\n\n", PACKAGE);
99 
100  printf(" -h        print this help and exit\n");
101  printf(" -v        print version and exit\n\n");
102 
103  printf(" -n INT    set ngram length (default=3)\n");
104  printf(" -w        print only the extracted words\n");
105  printf(" -f FILE   read input from `FILE' (default=stdin)\n\n");
106  exit(exval);
107 }
108