1 /* $NetBSD: file.c,v 1.7 2011/04/18 22:46:48 joerg Exp $ */
2 /* $FreeBSD: head/usr.bin/grep/file.c 211496 2010-08-19 09:28:59Z des $ */
3 /* $OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $ */
4
5 /*-
6 * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
7 * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
8 * Copyright (C) 2010 Dimitry Andric <dimitry@andric.com>
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 #if HAVE_NBTOOL_CONFIG_H
34 #include "nbtool_config.h"
35 #endif
36
37 #include <sys/cdefs.h>
38 __RCSID("$NetBSD: file.c,v 1.7 2011/04/18 22:46:48 joerg Exp $");
39
40 #include <sys/param.h>
41 #include <sys/types.h>
42 #include <sys/stat.h>
43
44 #ifndef ANDROID
45 #include <bzlib.h>
46 #endif
47 #include <err.h>
48 #include <errno.h>
49 #include <fcntl.h>
50 #include <stddef.h>
51 #include <stdlib.h>
52 #include <string.h>
53 #include <unistd.h>
54 #include <wchar.h>
55 #include <wctype.h>
56 #ifndef ANDROID
57 #include <zlib.h>
58 #endif
59
60 #include "grep.h"
61
62 #define MAXBUFSIZ (32 * 1024)
63 #define LNBUFBUMP 80
64
65 #ifndef ANDROID
66 static gzFile gzbufdesc;
67 static BZFILE* bzbufdesc;
68 #endif
69
70 static unsigned char buffer[MAXBUFSIZ];
71 static unsigned char *bufpos;
72 static size_t bufrem;
73
74 static unsigned char *lnbuf;
75 static size_t lnbuflen;
76
77 static inline int
grep_refill(struct file * f)78 grep_refill(struct file *f)
79 {
80 ssize_t nr;
81 int bzerr;
82
83 bufpos = buffer;
84 bufrem = 0;
85
86 #ifndef ANDROID
87 if (filebehave == FILE_GZIP)
88 nr = gzread(gzbufdesc, buffer, MAXBUFSIZ);
89 else if (filebehave == FILE_BZIP && bzbufdesc != NULL) {
90 nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ);
91 switch (bzerr) {
92 case BZ_OK:
93 case BZ_STREAM_END:
94 /* No problem, nr will be okay */
95 break;
96 case BZ_DATA_ERROR_MAGIC:
97 /*
98 * As opposed to gzread(), which simply returns the
99 * plain file data, if it is not in the correct
100 * compressed format, BZ2_bzRead() instead aborts.
101 *
102 * So, just restart at the beginning of the file again,
103 * and use plain reads from now on.
104 */
105 BZ2_bzReadClose(&bzerr, bzbufdesc);
106 bzbufdesc = NULL;
107 if (lseek(f->fd, 0, SEEK_SET) == -1)
108 return (-1);
109 nr = read(f->fd, buffer, MAXBUFSIZ);
110 break;
111 default:
112 /* Make sure we exit with an error */
113 nr = -1;
114 }
115 } else
116 #endif
117 nr = read(f->fd, buffer, MAXBUFSIZ);
118
119 if (nr < 0)
120 return (-1);
121
122 bufrem = nr;
123 return (0);
124 }
125
126 static inline int
grep_lnbufgrow(size_t newlen)127 grep_lnbufgrow(size_t newlen)
128 {
129
130 if (lnbuflen < newlen) {
131 lnbuf = grep_realloc(lnbuf, newlen);
132 lnbuflen = newlen;
133 }
134
135 return (0);
136 }
137
138 char *
grep_fgetln(struct file * f,size_t * lenp)139 grep_fgetln(struct file *f, size_t *lenp)
140 {
141 unsigned char *p;
142 char *ret;
143 size_t len;
144 size_t off;
145 ptrdiff_t diff;
146
147 /* Fill the buffer, if necessary */
148 if (bufrem == 0 && grep_refill(f) != 0)
149 goto error;
150
151 if (bufrem == 0) {
152 /* Return zero length to indicate EOF */
153 *lenp = 0;
154 return ((char *)bufpos);
155 }
156
157 /* Look for a newline in the remaining part of the buffer */
158 if ((p = memchr(bufpos, line_sep, bufrem)) != NULL) {
159 ++p; /* advance over newline */
160 ret = (char *)bufpos;
161 len = p - bufpos;
162 bufrem -= len;
163 bufpos = p;
164 *lenp = len;
165 return (ret);
166 }
167
168 /* We have to copy the current buffered data to the line buffer */
169 for (len = bufrem, off = 0; ; len += bufrem) {
170 /* Make sure there is room for more data */
171 if (grep_lnbufgrow(len + LNBUFBUMP))
172 goto error;
173 memcpy(lnbuf + off, bufpos, len - off);
174 off = len;
175 if (grep_refill(f) != 0)
176 goto error;
177 if (bufrem == 0)
178 /* EOF: return partial line */
179 break;
180 if ((p = memchr(bufpos, line_sep, bufrem)) == NULL)
181 continue;
182 /* got it: finish up the line (like code above) */
183 ++p;
184 diff = p - bufpos;
185 len += diff;
186 if (grep_lnbufgrow(len))
187 goto error;
188 memcpy(lnbuf + off, bufpos, diff);
189 bufrem -= diff;
190 bufpos = p;
191 break;
192 }
193 *lenp = len;
194 return ((char *)lnbuf);
195
196 error:
197 *lenp = 0;
198 return (NULL);
199 }
200
201 static inline struct file *
grep_file_init(struct file * f)202 grep_file_init(struct file *f)
203 {
204
205 #ifndef ANDROID
206 if (filebehave == FILE_GZIP &&
207 (gzbufdesc = gzdopen(f->fd, "r")) == NULL)
208 goto error;
209
210 if (filebehave == FILE_BZIP &&
211 (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL)
212 goto error;
213 #endif
214
215 /* Fill read buffer, also catches errors early */
216 if (grep_refill(f) != 0)
217 goto error;
218
219 /* Check for binary stuff, if necessary */
220 if (!nulldataflag && binbehave != BINFILE_TEXT &&
221 memchr(bufpos, '\0', bufrem) != NULL)
222 f->binary = true;
223
224 return (f);
225 error:
226 close(f->fd);
227 free(f);
228 return (NULL);
229 }
230
231 /*
232 * Opens a file for processing.
233 */
234 struct file *
grep_open(const char * path)235 grep_open(const char *path)
236 {
237 struct file *f;
238
239 f = grep_malloc(sizeof *f);
240 memset(f, 0, sizeof *f);
241 if (path == NULL) {
242 /* Processing stdin implies --line-buffered. */
243 lbflag = true;
244 f->fd = STDIN_FILENO;
245 } else if ((f->fd = open(path, O_RDONLY)) == -1) {
246 free(f);
247 return (NULL);
248 }
249
250 return (grep_file_init(f));
251 }
252
253 /*
254 * Closes a file.
255 */
256 void
grep_close(struct file * f)257 grep_close(struct file *f)
258 {
259
260 close(f->fd);
261
262 /* Reset read buffer and line buffer */
263 bufpos = buffer;
264 bufrem = 0;
265
266 free(lnbuf);
267 lnbuf = NULL;
268 lnbuflen = 0;
269 }
270