• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "snapuserd_core.h"
18 
19 namespace android {
20 namespace snapshot {
21 
22 using namespace android;
23 using namespace android::dm;
24 using android::base::unique_fd;
25 
Worker(const std::string & cow_device,const std::string & backing_device,const std::string & control_device,const std::string & misc_name,const std::string & base_path_merge,std::shared_ptr<SnapshotHandler> snapuserd)26 Worker::Worker(const std::string& cow_device, const std::string& backing_device,
27                const std::string& control_device, const std::string& misc_name,
28                const std::string& base_path_merge, std::shared_ptr<SnapshotHandler> snapuserd) {
29     cow_device_ = cow_device;
30     backing_store_device_ = backing_device;
31     control_device_ = control_device;
32     misc_name_ = misc_name;
33     base_path_merge_ = base_path_merge;
34     snapuserd_ = snapuserd;
35 }
36 
InitializeFds()37 bool Worker::InitializeFds() {
38     backing_store_fd_.reset(open(backing_store_device_.c_str(), O_RDONLY));
39     if (backing_store_fd_ < 0) {
40         SNAP_PLOG(ERROR) << "Open Failed: " << backing_store_device_;
41         return false;
42     }
43 
44     cow_fd_.reset(open(cow_device_.c_str(), O_RDWR));
45     if (cow_fd_ < 0) {
46         SNAP_PLOG(ERROR) << "Open Failed: " << cow_device_;
47         return false;
48     }
49 
50     ctrl_fd_.reset(open(control_device_.c_str(), O_RDWR));
51     if (ctrl_fd_ < 0) {
52         SNAP_PLOG(ERROR) << "Unable to open " << control_device_;
53         return false;
54     }
55 
56     // Base device used by merge thread
57     base_path_merge_fd_.reset(open(base_path_merge_.c_str(), O_RDWR));
58     if (base_path_merge_fd_ < 0) {
59         SNAP_PLOG(ERROR) << "Open Failed: " << base_path_merge_;
60         return false;
61     }
62 
63     return true;
64 }
65 
InitReader()66 bool Worker::InitReader() {
67     reader_ = snapuserd_->CloneReaderForWorker();
68 
69     if (!reader_->InitForMerge(std::move(cow_fd_))) {
70         return false;
71     }
72     return true;
73 }
74 
75 // Start the replace operation. This will read the
76 // internal COW format and if the block is compressed,
77 // it will be de-compressed.
ProcessReplaceOp(const CowOperation * cow_op)78 bool Worker::ProcessReplaceOp(const CowOperation* cow_op) {
79     if (!reader_->ReadData(*cow_op, &bufsink_)) {
80         SNAP_LOG(ERROR) << "ProcessReplaceOp failed for block " << cow_op->new_block;
81         return false;
82     }
83 
84     return true;
85 }
86 
ReadFromSourceDevice(const CowOperation * cow_op)87 bool Worker::ReadFromSourceDevice(const CowOperation* cow_op) {
88     void* buffer = bufsink_.GetPayloadBuffer(BLOCK_SZ);
89     if (buffer == nullptr) {
90         SNAP_LOG(ERROR) << "ReadFromBaseDevice: Failed to get payload buffer";
91         return false;
92     }
93     SNAP_LOG(DEBUG) << " ReadFromBaseDevice...: new-block: " << cow_op->new_block
94                     << " Source: " << cow_op->source;
95     uint64_t offset = cow_op->source;
96     if (cow_op->type == kCowCopyOp) {
97         offset *= BLOCK_SZ;
98     }
99     if (!android::base::ReadFullyAtOffset(backing_store_fd_, buffer, BLOCK_SZ, offset)) {
100         std::string op;
101         if (cow_op->type == kCowCopyOp)
102             op = "Copy-op";
103         else {
104             op = "Xor-op";
105         }
106         SNAP_PLOG(ERROR) << op << " failed. Read from backing store: " << backing_store_device_
107                          << "at block :" << offset / BLOCK_SZ << " offset:" << offset % BLOCK_SZ;
108         return false;
109     }
110 
111     return true;
112 }
113 
114 // Start the copy operation. This will read the backing
115 // block device which is represented by cow_op->source.
ProcessCopyOp(const CowOperation * cow_op)116 bool Worker::ProcessCopyOp(const CowOperation* cow_op) {
117     if (!ReadFromSourceDevice(cow_op)) {
118         return false;
119     }
120 
121     return true;
122 }
123 
ProcessXorOp(const CowOperation * cow_op)124 bool Worker::ProcessXorOp(const CowOperation* cow_op) {
125     if (!ReadFromSourceDevice(cow_op)) {
126         return false;
127     }
128     xorsink_.Reset();
129     if (!reader_->ReadData(*cow_op, &xorsink_)) {
130         SNAP_LOG(ERROR) << "ProcessXorOp failed for block " << cow_op->new_block;
131         return false;
132     }
133 
134     return true;
135 }
136 
ProcessZeroOp()137 bool Worker::ProcessZeroOp() {
138     // Zero out the entire block
139     void* buffer = bufsink_.GetPayloadBuffer(BLOCK_SZ);
140     if (buffer == nullptr) {
141         SNAP_LOG(ERROR) << "ProcessZeroOp: Failed to get payload buffer";
142         return false;
143     }
144 
145     memset(buffer, 0, BLOCK_SZ);
146     return true;
147 }
148 
ProcessOrderedOp(const CowOperation * cow_op)149 bool Worker::ProcessOrderedOp(const CowOperation* cow_op) {
150     void* buffer = bufsink_.GetPayloadBuffer(BLOCK_SZ);
151     if (buffer == nullptr) {
152         SNAP_LOG(ERROR) << "ProcessOrderedOp: Failed to get payload buffer";
153         return false;
154     }
155 
156     MERGE_GROUP_STATE state = snapuserd_->ProcessMergingBlock(cow_op->new_block, buffer);
157 
158     switch (state) {
159         case MERGE_GROUP_STATE::GROUP_MERGE_COMPLETED: {
160             // Merge is completed for this COW op; just read directly from
161             // the base device
162             SNAP_LOG(DEBUG) << "Merge-completed: Reading from base device sector: "
163                             << (cow_op->new_block >> SECTOR_SHIFT)
164                             << " Block-number: " << cow_op->new_block;
165             if (!ReadDataFromBaseDevice(ChunkToSector(cow_op->new_block), BLOCK_SZ)) {
166                 SNAP_LOG(ERROR) << "ReadDataFromBaseDevice at sector: "
167                                 << (cow_op->new_block >> SECTOR_SHIFT) << " after merge-complete.";
168                 return false;
169             }
170             return true;
171         }
172         case MERGE_GROUP_STATE::GROUP_MERGE_PENDING: {
173             bool ret;
174             if (cow_op->type == kCowCopyOp) {
175                 ret = ProcessCopyOp(cow_op);
176             } else {
177                 ret = ProcessXorOp(cow_op);
178             }
179 
180             // I/O is complete - decrement the refcount irrespective of the return
181             // status
182             snapuserd_->NotifyIOCompletion(cow_op->new_block);
183             return ret;
184         }
185         // We already have the data in the buffer retrieved from RA thread.
186         // Nothing to process further.
187         case MERGE_GROUP_STATE::GROUP_MERGE_RA_READY: {
188             [[fallthrough]];
189         }
190         case MERGE_GROUP_STATE::GROUP_MERGE_IN_PROGRESS: {
191             return true;
192         }
193         default: {
194             // All other states, fail the I/O viz (GROUP_MERGE_FAILED and GROUP_INVALID)
195             return false;
196         }
197     }
198 
199     return false;
200 }
201 
ProcessCowOp(const CowOperation * cow_op)202 bool Worker::ProcessCowOp(const CowOperation* cow_op) {
203     if (cow_op == nullptr) {
204         SNAP_LOG(ERROR) << "ProcessCowOp: Invalid cow_op";
205         return false;
206     }
207 
208     switch (cow_op->type) {
209         case kCowReplaceOp: {
210             return ProcessReplaceOp(cow_op);
211         }
212 
213         case kCowZeroOp: {
214             return ProcessZeroOp();
215         }
216 
217         case kCowCopyOp:
218             [[fallthrough]];
219         case kCowXorOp: {
220             return ProcessOrderedOp(cow_op);
221         }
222 
223         default: {
224             SNAP_LOG(ERROR) << "Unknown operation-type found: " << cow_op->type;
225         }
226     }
227     return false;
228 }
229 
InitializeBufsink()230 void Worker::InitializeBufsink() {
231     // Allocate the buffer which is used to communicate between
232     // daemon and dm-user. The buffer comprises of header and a fixed payload.
233     // If the dm-user requests a big IO, the IO will be broken into chunks
234     // of PAYLOAD_BUFFER_SZ.
235     size_t buf_size = sizeof(struct dm_user_header) + PAYLOAD_BUFFER_SZ;
236     bufsink_.Initialize(buf_size);
237 }
238 
Init()239 bool Worker::Init() {
240     InitializeBufsink();
241     xorsink_.Initialize(&bufsink_, BLOCK_SZ);
242 
243     if (!InitializeFds()) {
244         return false;
245     }
246 
247     if (!InitReader()) {
248         return false;
249     }
250 
251     return true;
252 }
253 
RunThread()254 bool Worker::RunThread() {
255     SNAP_LOG(INFO) << "Processing snapshot I/O requests....";
256     // Start serving IO
257     while (true) {
258         if (!ProcessIORequest()) {
259             break;
260         }
261     }
262 
263     CloseFds();
264     reader_->CloseCowFd();
265 
266     return true;
267 }
268 
269 // Read Header from dm-user misc device. This gives
270 // us the sector number for which IO is issued by dm-snapshot device
ReadDmUserHeader()271 bool Worker::ReadDmUserHeader() {
272     if (!android::base::ReadFully(ctrl_fd_, bufsink_.GetBufPtr(), sizeof(struct dm_user_header))) {
273         if (errno != ENOTBLK) {
274             SNAP_PLOG(ERROR) << "Control-read failed";
275         }
276 
277         SNAP_PLOG(DEBUG) << "ReadDmUserHeader failed....";
278         return false;
279     }
280 
281     return true;
282 }
283 
284 // Send the payload/data back to dm-user misc device.
WriteDmUserPayload(size_t size,bool header_response)285 bool Worker::WriteDmUserPayload(size_t size, bool header_response) {
286     size_t payload_size = size;
287     void* buf = bufsink_.GetPayloadBufPtr();
288     if (header_response) {
289         payload_size += sizeof(struct dm_user_header);
290         buf = bufsink_.GetBufPtr();
291     }
292 
293     if (!android::base::WriteFully(ctrl_fd_, buf, payload_size)) {
294         SNAP_PLOG(ERROR) << "Write to dm-user failed size: " << payload_size;
295         return false;
296     }
297 
298     return true;
299 }
300 
ReadDataFromBaseDevice(sector_t sector,size_t read_size)301 bool Worker::ReadDataFromBaseDevice(sector_t sector, size_t read_size) {
302     CHECK(read_size <= BLOCK_SZ);
303 
304     void* buffer = bufsink_.GetPayloadBuffer(BLOCK_SZ);
305     if (buffer == nullptr) {
306         SNAP_LOG(ERROR) << "ReadFromBaseDevice: Failed to get payload buffer";
307         return false;
308     }
309 
310     loff_t offset = sector << SECTOR_SHIFT;
311     if (!android::base::ReadFullyAtOffset(base_path_merge_fd_, buffer, read_size, offset)) {
312         SNAP_PLOG(ERROR) << "ReadDataFromBaseDevice failed. fd: " << base_path_merge_fd_
313                          << "at sector :" << sector << " size: " << read_size;
314         return false;
315     }
316 
317     return true;
318 }
319 
ReadAlignedSector(sector_t sector,size_t sz,bool header_response)320 bool Worker::ReadAlignedSector(sector_t sector, size_t sz, bool header_response) {
321     struct dm_user_header* header = bufsink_.GetHeaderPtr();
322     size_t remaining_size = sz;
323     std::vector<std::pair<sector_t, const CowOperation*>>& chunk_vec = snapuserd_->GetChunkVec();
324     bool io_error = false;
325     int ret = 0;
326 
327     do {
328         // Process 1MB payload at a time
329         size_t read_size = std::min(PAYLOAD_BUFFER_SZ, remaining_size);
330 
331         header->type = DM_USER_RESP_SUCCESS;
332         size_t total_bytes_read = 0;
333         io_error = false;
334         bufsink_.ResetBufferOffset();
335 
336         while (read_size) {
337             // We need to check every 4k block to verify if it is
338             // present in the mapping.
339             size_t size = std::min(BLOCK_SZ, read_size);
340 
341             auto it = std::lower_bound(chunk_vec.begin(), chunk_vec.end(),
342                                        std::make_pair(sector, nullptr), SnapshotHandler::compare);
343             bool not_found = (it == chunk_vec.end() || it->first != sector);
344 
345             if (not_found) {
346                 // Block not found in map - which means this block was not
347                 // changed as per the OTA. Just route the I/O to the base
348                 // device.
349                 if (!ReadDataFromBaseDevice(sector, size)) {
350                     SNAP_LOG(ERROR) << "ReadDataFromBaseDevice failed";
351                     header->type = DM_USER_RESP_ERROR;
352                 }
353 
354                 ret = size;
355             } else {
356                 // We found the sector in mapping. Check the type of COW OP and
357                 // process it.
358                 if (!ProcessCowOp(it->second)) {
359                     SNAP_LOG(ERROR) << "ProcessCowOp failed";
360                     header->type = DM_USER_RESP_ERROR;
361                 }
362 
363                 ret = BLOCK_SZ;
364             }
365 
366             // Just return the header if it is an error
367             if (header->type == DM_USER_RESP_ERROR) {
368                 if (!RespondIOError(header_response)) {
369                     return false;
370                 }
371 
372                 io_error = true;
373                 break;
374             }
375 
376             read_size -= ret;
377             total_bytes_read += ret;
378             sector += (ret >> SECTOR_SHIFT);
379             bufsink_.UpdateBufferOffset(ret);
380         }
381 
382         if (!io_error) {
383             if (!WriteDmUserPayload(total_bytes_read, header_response)) {
384                 return false;
385             }
386 
387             SNAP_LOG(DEBUG) << "WriteDmUserPayload success total_bytes_read: " << total_bytes_read
388                             << " header-response: " << header_response
389                             << " remaining_size: " << remaining_size;
390             header_response = false;
391             remaining_size -= total_bytes_read;
392         }
393     } while (remaining_size > 0 && !io_error);
394 
395     return true;
396 }
397 
ReadUnalignedSector(sector_t sector,size_t size,std::vector<std::pair<sector_t,const CowOperation * >>::iterator & it)398 int Worker::ReadUnalignedSector(
399         sector_t sector, size_t size,
400         std::vector<std::pair<sector_t, const CowOperation*>>::iterator& it) {
401     size_t skip_sector_size = 0;
402 
403     SNAP_LOG(DEBUG) << "ReadUnalignedSector: sector " << sector << " size: " << size
404                     << " Aligned sector: " << it->first;
405 
406     if (!ProcessCowOp(it->second)) {
407         SNAP_LOG(ERROR) << "ReadUnalignedSector: " << sector << " failed of size: " << size
408                         << " Aligned sector: " << it->first;
409         return -1;
410     }
411 
412     int num_sectors_skip = sector - it->first;
413 
414     if (num_sectors_skip > 0) {
415         skip_sector_size = num_sectors_skip << SECTOR_SHIFT;
416         char* buffer = reinterpret_cast<char*>(bufsink_.GetBufPtr());
417         struct dm_user_message* msg = (struct dm_user_message*)(&(buffer[0]));
418 
419         if (skip_sector_size == BLOCK_SZ) {
420             SNAP_LOG(ERROR) << "Invalid un-aligned IO request at sector: " << sector
421                             << " Base-sector: " << it->first;
422             return -1;
423         }
424 
425         memmove(msg->payload.buf, (char*)msg->payload.buf + skip_sector_size,
426                 (BLOCK_SZ - skip_sector_size));
427     }
428 
429     bufsink_.ResetBufferOffset();
430     return std::min(size, (BLOCK_SZ - skip_sector_size));
431 }
432 
ReadUnalignedSector(sector_t sector,size_t size)433 bool Worker::ReadUnalignedSector(sector_t sector, size_t size) {
434     struct dm_user_header* header = bufsink_.GetHeaderPtr();
435     header->type = DM_USER_RESP_SUCCESS;
436     bufsink_.ResetBufferOffset();
437     std::vector<std::pair<sector_t, const CowOperation*>>& chunk_vec = snapuserd_->GetChunkVec();
438 
439     auto it = std::lower_bound(chunk_vec.begin(), chunk_vec.end(), std::make_pair(sector, nullptr),
440                                SnapshotHandler::compare);
441 
442     // |-------|-------|-------|
443     // 0       1       2       3
444     //
445     // Block 0 - op 1
446     // Block 1 - op 2
447     // Block 2 - op 3
448     //
449     // chunk_vec will have block 0, 1, 2 which maps to relavant COW ops.
450     //
451     // Each block is 4k bytes. Thus, the last block will span 8 sectors
452     // ranging till block 3 (However, block 3 won't be in chunk_vec as
453     // it doesn't have any mapping to COW ops. Now, if we get an I/O request for a sector
454     // spanning between block 2 and block 3, we need to step back
455     // and get hold of the last element.
456     //
457     // Additionally, we need to make sure that the requested sector is
458     // indeed within the range of the final sector. It is perfectly valid
459     // to get an I/O request for block 3 and beyond which are not mapped
460     // to any COW ops. In that case, we just need to read from the base
461     // device.
462     bool merge_complete = false;
463     bool header_response = true;
464     if (it == chunk_vec.end()) {
465         if (chunk_vec.size() > 0) {
466             // I/O request beyond the last mapped sector
467             it = std::prev(chunk_vec.end());
468         } else {
469             // This can happen when a partition merge is complete but snapshot
470             // state in /metadata is not yet deleted; during this window if the
471             // device is rebooted, subsequent attempt will mount the snapshot.
472             // However, since the merge was completed we wouldn't have any
473             // mapping to COW ops thus chunk_vec will be empty. In that case,
474             // mark this as merge_complete and route the I/O to the base device.
475             merge_complete = true;
476         }
477     } else if (it->first != sector) {
478         if (it != chunk_vec.begin()) {
479             --it;
480         }
481     } else {
482         return ReadAlignedSector(sector, size, header_response);
483     }
484 
485     loff_t requested_offset = sector << SECTOR_SHIFT;
486 
487     loff_t final_offset = 0;
488     if (!merge_complete) {
489         final_offset = it->first << SECTOR_SHIFT;
490     }
491 
492     // Since a COW op span 4k block size, we need to make sure that the requested
493     // offset is within the 4k region. Consider the following case:
494     //
495     // |-------|-------|-------|
496     // 0       1       2       3
497     //
498     // Block 0 - op 1
499     // Block 1 - op 2
500     //
501     // We have an I/O request for a sector between block 2 and block 3. However,
502     // we have mapping to COW ops only for block 0 and block 1. Thus, the
503     // requested offset in this case is beyond the last mapped COW op size (which
504     // is block 1 in this case).
505 
506     size_t total_bytes_read = 0;
507     size_t remaining_size = size;
508     int ret = 0;
509     if (!merge_complete && (requested_offset >= final_offset) &&
510         (requested_offset - final_offset) < BLOCK_SZ) {
511         // Read the partial un-aligned data
512         ret = ReadUnalignedSector(sector, remaining_size, it);
513         if (ret < 0) {
514             SNAP_LOG(ERROR) << "ReadUnalignedSector failed for sector: " << sector
515                             << " size: " << size << " it->sector: " << it->first;
516             return RespondIOError(header_response);
517         }
518 
519         remaining_size -= ret;
520         total_bytes_read += ret;
521         sector += (ret >> SECTOR_SHIFT);
522 
523         // Send the data back
524         if (!WriteDmUserPayload(total_bytes_read, header_response)) {
525             return false;
526         }
527 
528         header_response = false;
529         // If we still have pending data to be processed, this will be aligned I/O
530         if (remaining_size) {
531             return ReadAlignedSector(sector, remaining_size, header_response);
532         }
533     } else {
534         // This is all about handling I/O request to be routed to base device
535         // as the I/O is not mapped to any of the COW ops.
536         loff_t aligned_offset = requested_offset;
537         // Align to nearest 4k
538         aligned_offset += BLOCK_SZ - 1;
539         aligned_offset &= ~(BLOCK_SZ - 1);
540         // Find the diff of the aligned offset
541         size_t diff_size = aligned_offset - requested_offset;
542         CHECK(diff_size <= BLOCK_SZ);
543         if (remaining_size < diff_size) {
544             if (!ReadDataFromBaseDevice(sector, remaining_size)) {
545                 return RespondIOError(header_response);
546             }
547             total_bytes_read += remaining_size;
548 
549             if (!WriteDmUserPayload(total_bytes_read, header_response)) {
550                 return false;
551             }
552         } else {
553             if (!ReadDataFromBaseDevice(sector, diff_size)) {
554                 return RespondIOError(header_response);
555             }
556 
557             total_bytes_read += diff_size;
558 
559             if (!WriteDmUserPayload(total_bytes_read, header_response)) {
560                 return false;
561             }
562 
563             remaining_size -= diff_size;
564             size_t num_sectors_read = (diff_size >> SECTOR_SHIFT);
565             sector += num_sectors_read;
566             CHECK(IsBlockAligned(sector << SECTOR_SHIFT));
567             header_response = false;
568 
569             // If we still have pending data to be processed, this will be aligned I/O
570             return ReadAlignedSector(sector, remaining_size, header_response);
571         }
572     }
573 
574     return true;
575 }
576 
RespondIOError(bool header_response)577 bool Worker::RespondIOError(bool header_response) {
578     struct dm_user_header* header = bufsink_.GetHeaderPtr();
579     header->type = DM_USER_RESP_ERROR;
580     // This is an issue with the dm-user interface. There
581     // is no way to propagate the I/O error back to dm-user
582     // if we have already communicated the header back. Header
583     // is responded once at the beginning; however I/O can
584     // be processed in chunks. If we encounter an I/O error
585     // somewhere in the middle of the processing, we can't communicate
586     // this back to dm-user.
587     //
588     // TODO: Fix the interface
589     CHECK(header_response);
590 
591     if (!WriteDmUserPayload(0, header_response)) {
592         return false;
593     }
594 
595     // There is no need to process further as we have already seen
596     // an I/O error
597     return true;
598 }
599 
DmuserReadRequest()600 bool Worker::DmuserReadRequest() {
601     struct dm_user_header* header = bufsink_.GetHeaderPtr();
602 
603     // Unaligned I/O request
604     if (!IsBlockAligned(header->sector << SECTOR_SHIFT)) {
605         return ReadUnalignedSector(header->sector, header->len);
606     }
607 
608     return ReadAlignedSector(header->sector, header->len, true);
609 }
610 
ProcessIORequest()611 bool Worker::ProcessIORequest() {
612     struct dm_user_header* header = bufsink_.GetHeaderPtr();
613 
614     if (!ReadDmUserHeader()) {
615         return false;
616     }
617 
618     SNAP_LOG(DEBUG) << "Daemon: msg->seq: " << std::dec << header->seq;
619     SNAP_LOG(DEBUG) << "Daemon: msg->len: " << std::dec << header->len;
620     SNAP_LOG(DEBUG) << "Daemon: msg->sector: " << std::dec << header->sector;
621     SNAP_LOG(DEBUG) << "Daemon: msg->type: " << std::dec << header->type;
622     SNAP_LOG(DEBUG) << "Daemon: msg->flags: " << std::dec << header->flags;
623 
624     switch (header->type) {
625         case DM_USER_REQ_MAP_READ: {
626             if (!DmuserReadRequest()) {
627                 return false;
628             }
629             break;
630         }
631 
632         case DM_USER_REQ_MAP_WRITE: {
633             // TODO: We should not get any write request
634             // to dm-user as we mount all partitions
635             // as read-only. Need to verify how are TRIM commands
636             // handled during mount.
637             return false;
638         }
639     }
640 
641     return true;
642 }
643 
644 }  // namespace snapshot
645 }  // namespace android
646