1 /* $NetBSD: file.c,v 1.7 2011/04/18 22:46:48 joerg Exp $ */
2 /* $FreeBSD: head/usr.bin/grep/file.c 211496 2010-08-19 09:28:59Z des $ */
3 /* $OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $ */
4
5 /*-
6 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
7 * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
8 * Copyright (C) 2010 Dimitry Andric <dimitry@andric.com>
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 #if HAVE_NBTOOL_CONFIG_H
34 #include "nbtool_config.h"
35 #endif
36
37 #include <sys/cdefs.h>
38 __RCSID("$NetBSD: file.c,v 1.7 2011/04/18 22:46:48 joerg Exp $");
39
40 #include <sys/param.h>
41 #include <sys/types.h>
42 #include <sys/stat.h>
43
44 #ifndef __ANDROID__
45 #include <bzlib.h>
46 #endif
47 #include <err.h>
48 #include <errno.h>
49 #include <fcntl.h>
50 #include <stddef.h>
51 #include <stdlib.h>
52 #include <string.h>
53 #include <unistd.h>
54 #include <wchar.h>
55 #include <wctype.h>
56 #ifndef __ANDROID__
57 #include <zlib.h>
58 #endif
59
60 #include "grep.h"
61
62 #define MAXBUFSIZ (32 * 1024)
63 #define LNBUFBUMP 80
64
65 #ifndef __ANDROID__
66 static gzFile gzbufdesc;
67 static BZFILE* bzbufdesc;
68 #endif
69
70 static unsigned char buffer[MAXBUFSIZ];
71 static unsigned char *bufpos;
72 static size_t bufrem;
73
74 static unsigned char *lnbuf;
75 static size_t lnbuflen;
76
77 static inline int
grep_refill(struct file * f)78 grep_refill(struct file *f)
79 {
80 ssize_t nr;
81 #ifndef __ANDROID__
82 int bzerr;
83 #endif
84
85 bufpos = buffer;
86 bufrem = 0;
87
88 #ifndef __ANDROID__
89 if (filebehave == FILE_GZIP)
90 nr = gzread(gzbufdesc, buffer, MAXBUFSIZ);
91 else if (filebehave == FILE_BZIP && bzbufdesc != NULL) {
92 nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ);
93 switch (bzerr) {
94 case BZ_OK:
95 case BZ_STREAM_END:
96 /* No problem, nr will be okay */
97 break;
98 case BZ_DATA_ERROR_MAGIC:
99 /*
100 * As opposed to gzread(), which simply returns the
101 * plain file data, if it is not in the correct
102 * compressed format, BZ2_bzRead() instead aborts.
103 *
104 * So, just restart at the beginning of the file again,
105 * and use plain reads from now on.
106 */
107 BZ2_bzReadClose(&bzerr, bzbufdesc);
108 bzbufdesc = NULL;
109 if (lseek(f->fd, 0, SEEK_SET) == -1)
110 return (-1);
111 nr = read(f->fd, buffer, MAXBUFSIZ);
112 break;
113 default:
114 /* Make sure we exit with an error */
115 nr = -1;
116 }
117 } else
118 #endif
119 nr = read(f->fd, buffer, MAXBUFSIZ);
120
121 if (nr < 0)
122 return (-1);
123
124 bufrem = nr;
125 return (0);
126 }
127
128 static inline int
grep_lnbufgrow(size_t newlen)129 grep_lnbufgrow(size_t newlen)
130 {
131
132 if (lnbuflen < newlen) {
133 lnbuf = grep_realloc(lnbuf, newlen);
134 lnbuflen = newlen;
135 }
136
137 return (0);
138 }
139
140 char *
grep_fgetln(struct file * f,size_t * lenp)141 grep_fgetln(struct file *f, size_t *lenp)
142 {
143 unsigned char *p;
144 char *ret;
145 size_t len;
146 size_t off;
147 ptrdiff_t diff;
148
149 /* Fill the buffer, if necessary */
150 if (bufrem == 0 && grep_refill(f) != 0)
151 goto error;
152
153 if (bufrem == 0) {
154 /* Return zero length to indicate EOF */
155 *lenp = 0;
156 return ((char *)bufpos);
157 }
158
159 /* Look for a newline in the remaining part of the buffer */
160 if ((p = memchr(bufpos, line_sep, bufrem)) != NULL) {
161 ++p; /* advance over newline */
162 ret = (char *)bufpos;
163 len = p - bufpos;
164 bufrem -= len;
165 bufpos = p;
166 *lenp = len;
167 return (ret);
168 }
169
170 /* We have to copy the current buffered data to the line buffer */
171 for (len = bufrem, off = 0; ; len += bufrem) {
172 /* Make sure there is room for more data */
173 if (grep_lnbufgrow(len + LNBUFBUMP))
174 goto error;
175 memcpy(lnbuf + off, bufpos, len - off);
176 off = len;
177 if (grep_refill(f) != 0)
178 goto error;
179 if (bufrem == 0)
180 /* EOF: return partial line */
181 break;
182 if ((p = memchr(bufpos, line_sep, bufrem)) == NULL)
183 continue;
184 /* got it: finish up the line (like code above) */
185 ++p;
186 diff = p - bufpos;
187 len += diff;
188 if (grep_lnbufgrow(len))
189 goto error;
190 memcpy(lnbuf + off, bufpos, diff);
191 bufrem -= diff;
192 bufpos = p;
193 break;
194 }
195 *lenp = len;
196 return ((char *)lnbuf);
197
198 error:
199 *lenp = 0;
200 return (NULL);
201 }
202
203 static inline struct file *
grep_file_init(struct file * f)204 grep_file_init(struct file *f)
205 {
206
207 #ifndef __ANDROID__
208 if (filebehave == FILE_GZIP &&
209 (gzbufdesc = gzdopen(f->fd, "r")) == NULL)
210 goto error;
211
212 if (filebehave == FILE_BZIP &&
213 (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL)
214 goto error;
215 #endif
216
217 /* Fill read buffer, also catches errors early */
218 if (grep_refill(f) != 0)
219 goto error;
220
221 /* Check for binary stuff, if necessary */
222 if (!nulldataflag && binbehave != BINFILE_TEXT &&
223 memchr(bufpos, '\0', bufrem) != NULL)
224 f->binary = true;
225
226 return (f);
227 error:
228 close(f->fd);
229 free(f);
230 return (NULL);
231 }
232
233 /*
234 * Opens a file for processing.
235 */
236 struct file *
grep_open(const char * path)237 grep_open(const char *path)
238 {
239 struct file *f;
240
241 f = grep_malloc(sizeof *f);
242 memset(f, 0, sizeof *f);
243 if (path == NULL) {
244 /* Processing stdin implies --line-buffered. */
245 lbflag = true;
246 f->fd = STDIN_FILENO;
247 } else if ((f->fd = open(path, O_RDONLY)) == -1) {
248 free(f);
249 return (NULL);
250 }
251
252 return (grep_file_init(f));
253 }
254
255 /*
256 * Closes a file.
257 */
258 void
grep_close(struct file * f)259 grep_close(struct file *f)
260 {
261
262 close(f->fd);
263
264 /* Reset read buffer and line buffer */
265 bufpos = buffer;
266 bufrem = 0;
267
268 free(lnbuf);
269 lnbuf = NULL;
270 lnbuflen = 0;
271 }
272