• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <fcntl.h>
18 #include <stdlib.h>
19 #include <sys/mman.h>
20 
21 extern "C" {
22     #include <fec.h>
23 }
24 
25 #include "fec_private.h"
26 
27 using rs_unique_ptr = std::unique_ptr<void, decltype(&free_rs_char)>;
28 
29 /* prints a hexdump of `data' using warn(...) */
dump(const char * name,uint64_t value,const uint8_t * data,size_t size)30 static void dump(const char *name, uint64_t value, const uint8_t *data,
31         size_t size)
32 {
33     const int bytes_per_line = 16;
34     char hex[bytes_per_line * 3 + 1];
35     char prn[bytes_per_line + 1];
36 
37     warn("%s (%" PRIu64 ") (%zu bytes):", name ? name : "", value, size);
38 
39     if (!data) {
40         warn("    (null)");
41         return;
42     }
43 
44     for (size_t n = 0; n < size; n += bytes_per_line) {
45         memset(hex, 0, sizeof(hex));
46         memset(prn, 0, sizeof(prn));
47 
48         for (size_t m = 0; m < bytes_per_line; ++m) {
49             if (n + m < size) {
50                 sprintf(&hex[m * 3], "%02x ", data[n + m]);
51 
52                 if (isprint(data[n + m])) {
53                     prn[m] = data[n + m];
54                 } else {
55                     prn[m] = '.';
56                 }
57             } else {
58                 strcpy(&hex[m * 3], "   ");
59             }
60         }
61 
62         warn("    %04zu   %s  %s", n, hex, prn);
63     }
64 }
65 
66 /* checks if `offset' is within a corrupted block */
is_erasure(fec_handle * f,uint64_t offset,const uint8_t * data)67 static inline bool is_erasure(fec_handle *f, uint64_t offset,
68         const uint8_t *data)
69 {
70     if (unlikely(offset >= f->data_size)) {
71         return false;
72     }
73 
74     /* ideally, we would like to know if a specific byte on this block has
75        been corrupted, but knowing whether any of them is can be useful as
76        well, because often the entire block is corrupted */
77 
78     uint64_t n = offset / FEC_BLOCKSIZE;
79 
80     return !verity_check_block(f, &f->verity.hash[n * SHA256_DIGEST_LENGTH],
81                 data);
82 }
83 
84 /* check if `offset' is within a block expected to contain zeros */
is_zero(fec_handle * f,uint64_t offset)85 static inline bool is_zero(fec_handle *f, uint64_t offset)
86 {
87     verity_info *v = &f->verity;
88 
89     if (!v->hash || unlikely(offset >= f->data_size)) {
90         return false;
91     }
92 
93     uint64_t hash_offset = (offset / FEC_BLOCKSIZE) * SHA256_DIGEST_LENGTH;
94 
95     if (unlikely(hash_offset >
96             v->hash_data_blocks * FEC_BLOCKSIZE - SHA256_DIGEST_LENGTH)) {
97         return false;
98     }
99 
100     return !memcmp(v->zero_hash, &v->hash[hash_offset], SHA256_DIGEST_LENGTH);
101 }
102 
103 /* reads and decodes a single block starting from `offset', returns the number
104    of bytes corrected in `errors' */
__ecc_read(fec_handle * f,void * rs,uint8_t * dest,uint64_t offset,bool use_erasures,uint8_t * ecc_data,size_t * errors)105 static int __ecc_read(fec_handle *f, void *rs, uint8_t *dest, uint64_t offset,
106         bool use_erasures, uint8_t *ecc_data, size_t *errors)
107 {
108     check(offset % FEC_BLOCKSIZE == 0);
109     ecc_info *e = &f->ecc;
110 
111     /* reverse interleaving: calculate the RS block that includes the requested
112        offset */
113     uint64_t rsb = offset - (offset / (e->rounds * FEC_BLOCKSIZE)) *
114                         e->rounds * FEC_BLOCKSIZE;
115     int data_index = -1;
116     int erasures[e->rsn];
117     int neras = 0;
118 
119     /* verity is required to check for erasures */
120     check(!use_erasures || f->verity.hash);
121 
122     for (int i = 0; i < e->rsn; ++i) {
123         uint64_t interleaved = fec_ecc_interleave(rsb * e->rsn + i, e->rsn,
124                                     e->rounds);
125 
126         if (interleaved == offset) {
127             data_index = i;
128         }
129 
130         /* to improve our chances of correcting IO errors, initialize the
131            buffer to zeros even if we are going to read to it later */
132         uint8_t bbuf[FEC_BLOCKSIZE] = {0};
133 
134         if (likely(interleaved < e->start) && !is_zero(f, interleaved)) {
135             /* copy raw data to reconstruct the RS block */
136             if (!raw_pread(f, bbuf, FEC_BLOCKSIZE, interleaved)) {
137                 warn("failed to read: %s", strerror(errno));
138 
139                 /* treat errors as corruption */
140                 if (use_erasures && neras <= e->roots) {
141                     erasures[neras++] = i;
142                 }
143             } else if (use_erasures && neras <= e->roots &&
144                             is_erasure(f, interleaved, bbuf)) {
145                 erasures[neras++] = i;
146             }
147         }
148 
149         for (int j = 0; j < FEC_BLOCKSIZE; ++j) {
150             ecc_data[j * FEC_RSM + i] = bbuf[j];
151         }
152     }
153 
154     check(data_index >= 0);
155 
156     size_t nerrs = 0;
157     uint8_t copy[FEC_RSM];
158 
159     for (int i = 0; i < FEC_BLOCKSIZE; ++i) {
160         /* copy parity data */
161         if (!raw_pread(f, &ecc_data[i * FEC_RSM + e->rsn], e->roots,
162                 e->start + (i + rsb) * e->roots)) {
163             error("failed to read ecc data: %s", strerror(errno));
164             return -1;
165         }
166 
167         /* for debugging decoding failures, because decode_rs_char can mangle
168            ecc_data */
169         if (unlikely(use_erasures)) {
170             memcpy(copy, &ecc_data[i * FEC_RSM], FEC_RSM);
171         }
172 
173         /* decode */
174         int rc = decode_rs_char(rs, &ecc_data[i * FEC_RSM], erasures, neras);
175 
176         if (unlikely(rc < 0)) {
177             if (use_erasures) {
178                 error("RS block %" PRIu64 ": decoding failed (%d erasures)",
179                     rsb, neras);
180                 dump("raw RS block", rsb, copy, FEC_RSM);
181             } else if (!f->verity.hash) {
182                 warn("RS block %" PRIu64 ": decoding failed", rsb);
183             } else {
184                 debug("RS block %" PRIu64 ": decoding failed", rsb);
185             }
186 
187             errno = EIO;
188             return -1;
189         } else if (unlikely(rc > 0)) {
190             check(rc <= (use_erasures ? e->roots : e->roots / 2));
191             nerrs += rc;
192         }
193 
194         dest[i] = ecc_data[i * FEC_RSM + data_index];
195     }
196 
197     if (nerrs) {
198         warn("RS block %" PRIu64 ": corrected %zu errors", rsb, nerrs);
199         *errors += nerrs;
200     }
201 
202     return FEC_BLOCKSIZE;
203 }
204 
205 /* initializes RS decoder and allocates memory for interleaving */
ecc_init(fec_handle * f,rs_unique_ptr & rs,std::unique_ptr<uint8_t[]> & ecc_data)206 static int ecc_init(fec_handle *f, rs_unique_ptr& rs,
207         std::unique_ptr<uint8_t[]>& ecc_data)
208 {
209     check(f);
210 
211     rs.reset(init_rs_char(FEC_PARAMS(f->ecc.roots)));
212 
213     if (unlikely(!rs)) {
214         error("failed to initialize RS");
215         errno = ENOMEM;
216         return -1;
217     }
218 
219     ecc_data.reset(new (std::nothrow) uint8_t[FEC_RSM * FEC_BLOCKSIZE]);
220 
221     if (unlikely(!ecc_data)) {
222         error("failed to allocate ecc buffer");
223         errno = ENOMEM;
224         return -1;
225     }
226 
227     return 0;
228 }
229 
230 /* reads `count' bytes from `offset' and corrects possible errors without
231    erasure detection, returning the number of corrected bytes in `errors' */
ecc_read(fec_handle * f,uint8_t * dest,size_t count,uint64_t offset,size_t * errors)232 static ssize_t ecc_read(fec_handle *f, uint8_t *dest, size_t count,
233         uint64_t offset, size_t *errors)
234 {
235     check(f);
236     check(dest);
237     check(offset < f->data_size);
238     check(offset + count <= f->data_size);
239     check(errors);
240 
241     debug("[%" PRIu64 ", %" PRIu64 ")", offset, offset + count);
242 
243     rs_unique_ptr rs(NULL, free_rs_char);
244     std::unique_ptr<uint8_t[]> ecc_data;
245 
246     if (ecc_init(f, rs, ecc_data) == -1) {
247         return -1;
248     }
249 
250     uint64_t curr = offset / FEC_BLOCKSIZE;
251     size_t coff = (size_t)(offset - curr * FEC_BLOCKSIZE);
252     size_t left = count;
253 
254     uint8_t data[FEC_BLOCKSIZE];
255 
256     while (left > 0) {
257         /* there's no erasure detection without verity metadata */
258         if (__ecc_read(f, rs.get(), data, curr * FEC_BLOCKSIZE, false,
259                 ecc_data.get(), errors) == -1) {
260             return -1;
261         }
262 
263         size_t copy = FEC_BLOCKSIZE - coff;
264 
265         if (copy > left) {
266             copy = left;
267         }
268 
269         memcpy(dest, &data[coff], copy);
270 
271         dest += copy;
272         left -= copy;
273         coff = 0;
274         ++curr;
275     }
276 
277     return count;
278 }
279 
280 /* reads `count' bytes from `offset', corrects possible errors with
281    erasure detection, and verifies the integrity of read data using
282    verity hash tree; returns the number of corrections in `errors' */
verity_read(fec_handle * f,uint8_t * dest,size_t count,uint64_t offset,size_t * errors)283 static ssize_t verity_read(fec_handle *f, uint8_t *dest, size_t count,
284         uint64_t offset, size_t *errors)
285 {
286     check(f);
287     check(dest);
288     check(offset < f->data_size);
289     check(offset + count <= f->data_size);
290     check(f->verity.hash);
291     check(errors);
292 
293     debug("[%" PRIu64 ", %" PRIu64 ")", offset, offset + count);
294 
295     rs_unique_ptr rs(NULL, free_rs_char);
296     std::unique_ptr<uint8_t[]> ecc_data;
297 
298     if (f->ecc.start && ecc_init(f, rs, ecc_data) == -1) {
299         return -1;
300     }
301 
302     uint64_t curr = offset / FEC_BLOCKSIZE;
303     size_t coff = (size_t)(offset - curr * FEC_BLOCKSIZE);
304     size_t left = count;
305     uint8_t data[FEC_BLOCKSIZE];
306 
307     uint64_t max_hash_block = (f->verity.hash_data_blocks * FEC_BLOCKSIZE -
308                                 SHA256_DIGEST_LENGTH) / SHA256_DIGEST_LENGTH;
309 
310     while (left > 0) {
311         check(curr <= max_hash_block);
312 
313         uint8_t *hash = &f->verity.hash[curr * SHA256_DIGEST_LENGTH];
314         uint64_t curr_offset = curr * FEC_BLOCKSIZE;
315 
316         bool expect_zeros = is_zero(f, curr_offset);
317 
318         /* if we are in read-only mode and expect to read a zero block,
319            skip reading and just return zeros */
320         if (f->mode & O_RDONLY && expect_zeros) {
321             memset(data, 0, FEC_BLOCKSIZE);
322             goto valid;
323         }
324 
325         /* copy raw data without error correction */
326         if (!raw_pread(f, data, FEC_BLOCKSIZE, curr_offset)) {
327             error("failed to read: %s", strerror(errno));
328             return -1;
329         }
330 
331         if (likely(verity_check_block(f, hash, data))) {
332             goto valid;
333         }
334 
335         /* we know the block is supposed to contain zeros, so return zeros
336            instead of trying to correct it */
337         if (expect_zeros) {
338             memset(data, 0, FEC_BLOCKSIZE);
339             goto corrected;
340         }
341 
342         if (!f->ecc.start) {
343             /* fatal error without ecc */
344             error("[%" PRIu64 ", %" PRIu64 "): corrupted block %" PRIu64,
345                 offset, offset + count, curr);
346             return -1;
347         } else {
348             debug("[%" PRIu64 ", %" PRIu64 "): corrupted block %" PRIu64,
349                 offset, offset + count, curr);
350         }
351 
352         /* try to correct without erasures first, because checking for
353            erasure locations is slower */
354         if (__ecc_read(f, rs.get(), data, curr_offset, false, ecc_data.get(),
355                 errors) == FEC_BLOCKSIZE &&
356             verity_check_block(f, hash, data)) {
357             goto corrected;
358         }
359 
360         /* try to correct with erasures */
361         if (__ecc_read(f, rs.get(), data, curr_offset, true, ecc_data.get(),
362                 errors) == FEC_BLOCKSIZE &&
363             verity_check_block(f, hash, data)) {
364             goto corrected;
365         }
366 
367         error("[%" PRIu64 ", %" PRIu64 "): corrupted block %" PRIu64
368             " (offset %" PRIu64 ") cannot be recovered",
369             offset, offset + count, curr, curr_offset);
370         dump("decoded block", curr, data, FEC_BLOCKSIZE);
371 
372         errno = EIO;
373         return -1;
374 
375 corrected:
376         /* update the corrected block to the file if we are in r/w mode */
377         if (f->mode & O_RDWR &&
378                 !raw_pwrite(f, data, FEC_BLOCKSIZE, curr_offset)) {
379             error("failed to write: %s", strerror(errno));
380             return -1;
381         }
382 
383 valid:
384         size_t copy = FEC_BLOCKSIZE - coff;
385 
386         if (copy > left) {
387             copy = left;
388         }
389 
390         memcpy(dest, &data[coff], copy);
391 
392         dest += copy;
393         left -= copy;
394         coff = 0;
395         ++curr;
396     }
397 
398     return count;
399 }
400 
401 /* sets the internal file position to `offset' relative to `whence' */
fec_seek(struct fec_handle * f,int64_t offset,int whence)402 int fec_seek(struct fec_handle *f, int64_t offset, int whence)
403 {
404     check(f);
405 
406     if (whence == SEEK_SET) {
407         if (offset < 0) {
408             errno = EOVERFLOW;
409             return -1;
410         }
411 
412         f->pos = offset;
413     } else if (whence == SEEK_CUR) {
414         if (offset < 0 && f->pos < (uint64_t)-offset) {
415             errno = EOVERFLOW;
416             return -1;
417         } else if (offset > 0 && (uint64_t)offset > UINT64_MAX - f->pos) {
418             errno = EOVERFLOW;
419             return -1;
420         }
421 
422         f->pos += offset;
423     } else if (whence == SEEK_END) {
424         if (offset >= 0) {
425             errno = ENXIO;
426             return -1;
427         } else if ((uint64_t)-offset > f->size) {
428             errno = EOVERFLOW;
429             return -1;
430         }
431 
432         f->pos = f->size + offset;
433     } else {
434         errno = EINVAL;
435         return -1;
436     }
437 
438     return 0;
439 }
440 
441 /* reads up to `count' bytes starting from the internal file position using
442    error correction and integrity validation, if available */
fec_read(struct fec_handle * f,void * buf,size_t count)443 ssize_t fec_read(struct fec_handle *f, void *buf, size_t count)
444 {
445     ssize_t rc = fec_pread(f, buf, count, f->pos);
446 
447     if (rc > 0) {
448         check(f->pos < UINT64_MAX - rc);
449         f->pos += rc;
450     }
451 
452     return rc;
453 }
454 
455 /* for a file with size `max', returns the number of bytes we can read starting
456    from `offset', up to `count' bytes */
get_max_count(uint64_t offset,size_t count,uint64_t max)457 static inline size_t get_max_count(uint64_t offset, size_t count, uint64_t max)
458 {
459     if (offset >= max) {
460         return 0;
461     } else if (offset > max - count) {
462         return (size_t)(max - offset);
463     }
464 
465     return count;
466 }
467 
468 /* reads `count' bytes from `f->fd' starting from `offset', and copies the
469    data to `buf' */
raw_pread(fec_handle * f,void * buf,size_t count,uint64_t offset)470 bool raw_pread(fec_handle *f, void *buf, size_t count, uint64_t offset)
471 {
472     check(f);
473     check(buf);
474 
475     uint8_t *p = (uint8_t *)buf;
476     size_t remaining = count;
477 
478     while (remaining > 0) {
479         ssize_t n = TEMP_FAILURE_RETRY(pread64(f->fd, p, remaining, offset));
480 
481         if (n <= 0) {
482             return false;
483         }
484 
485         p += n;
486         remaining -= n;
487         offset += n;
488     }
489 
490     return true;
491 }
492 
493 /* writes `count' bytes from `buf' to `f->fd' to a file position `offset' */
raw_pwrite(fec_handle * f,const void * buf,size_t count,uint64_t offset)494 bool raw_pwrite(fec_handle *f, const void *buf, size_t count, uint64_t offset)
495 {
496     check(f);
497     check(buf);
498 
499     const uint8_t *p = (const uint8_t *)buf;
500     size_t remaining = count;
501 
502     while (remaining > 0) {
503         ssize_t n = TEMP_FAILURE_RETRY(pwrite64(f->fd, p, remaining, offset));
504 
505         if (n <= 0) {
506             return false;
507         }
508 
509         p += n;
510         remaining -= n;
511         offset += n;
512     }
513 
514     return true;
515 }
516 
517 /* reads up to `count' bytes starting from `offset' using error correction and
518    integrity validation, if available */
fec_pread(struct fec_handle * f,void * buf,size_t count,uint64_t offset)519 ssize_t fec_pread(struct fec_handle *f, void *buf, size_t count,
520         uint64_t offset)
521 {
522     check(f);
523     check(buf);
524 
525     if (unlikely(offset > UINT64_MAX - count)) {
526         errno = EOVERFLOW;
527         return -1;
528     }
529 
530     if (f->verity.hash) {
531         return process(f, (uint8_t *)buf,
532                     get_max_count(offset, count, f->data_size), offset,
533                     verity_read);
534     } else if (f->ecc.start) {
535         check(f->ecc.start < f->size);
536 
537         count = get_max_count(offset, count, f->data_size);
538         ssize_t rc = process(f, (uint8_t *)buf, count, offset, ecc_read);
539 
540         if (rc >= 0) {
541             return rc;
542         }
543 
544         /* return raw data if pure ecc read fails; due to interleaving
545            the specific blocks the caller wants may still be fine */
546     } else {
547         count = get_max_count(offset, count, f->size);
548     }
549 
550     if (raw_pread(f, buf, count, offset)) {
551         return count;
552     }
553 
554     return -1;
555 }
556