1 /*
2 * Copyright (C) 2021 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "snapuserd_core.h"
18
19 namespace android {
20 namespace snapshot {
21
22 using namespace android;
23 using namespace android::dm;
24 using android::base::unique_fd;
25
Worker(const std::string & cow_device,const std::string & backing_device,const std::string & control_device,const std::string & misc_name,const std::string & base_path_merge,std::shared_ptr<SnapshotHandler> snapuserd)26 Worker::Worker(const std::string& cow_device, const std::string& backing_device,
27 const std::string& control_device, const std::string& misc_name,
28 const std::string& base_path_merge, std::shared_ptr<SnapshotHandler> snapuserd) {
29 cow_device_ = cow_device;
30 backing_store_device_ = backing_device;
31 control_device_ = control_device;
32 misc_name_ = misc_name;
33 base_path_merge_ = base_path_merge;
34 snapuserd_ = snapuserd;
35 }
36
InitializeFds()37 bool Worker::InitializeFds() {
38 backing_store_fd_.reset(open(backing_store_device_.c_str(), O_RDONLY));
39 if (backing_store_fd_ < 0) {
40 SNAP_PLOG(ERROR) << "Open Failed: " << backing_store_device_;
41 return false;
42 }
43
44 cow_fd_.reset(open(cow_device_.c_str(), O_RDWR));
45 if (cow_fd_ < 0) {
46 SNAP_PLOG(ERROR) << "Open Failed: " << cow_device_;
47 return false;
48 }
49
50 ctrl_fd_.reset(open(control_device_.c_str(), O_RDWR));
51 if (ctrl_fd_ < 0) {
52 SNAP_PLOG(ERROR) << "Unable to open " << control_device_;
53 return false;
54 }
55
56 // Base device used by merge thread
57 base_path_merge_fd_.reset(open(base_path_merge_.c_str(), O_RDWR));
58 if (base_path_merge_fd_ < 0) {
59 SNAP_PLOG(ERROR) << "Open Failed: " << base_path_merge_;
60 return false;
61 }
62
63 return true;
64 }
65
InitReader()66 bool Worker::InitReader() {
67 reader_ = snapuserd_->CloneReaderForWorker();
68
69 if (!reader_->InitForMerge(std::move(cow_fd_))) {
70 return false;
71 }
72 return true;
73 }
74
75 // Start the replace operation. This will read the
76 // internal COW format and if the block is compressed,
77 // it will be de-compressed.
ProcessReplaceOp(const CowOperation * cow_op)78 bool Worker::ProcessReplaceOp(const CowOperation* cow_op) {
79 if (!reader_->ReadData(*cow_op, &bufsink_)) {
80 SNAP_LOG(ERROR) << "ProcessReplaceOp failed for block " << cow_op->new_block;
81 return false;
82 }
83
84 return true;
85 }
86
ReadFromSourceDevice(const CowOperation * cow_op)87 bool Worker::ReadFromSourceDevice(const CowOperation* cow_op) {
88 void* buffer = bufsink_.GetPayloadBuffer(BLOCK_SZ);
89 if (buffer == nullptr) {
90 SNAP_LOG(ERROR) << "ReadFromBaseDevice: Failed to get payload buffer";
91 return false;
92 }
93 SNAP_LOG(DEBUG) << " ReadFromBaseDevice...: new-block: " << cow_op->new_block
94 << " Source: " << cow_op->source;
95 uint64_t offset = cow_op->source;
96 if (cow_op->type == kCowCopyOp) {
97 offset *= BLOCK_SZ;
98 }
99 if (!android::base::ReadFullyAtOffset(backing_store_fd_, buffer, BLOCK_SZ, offset)) {
100 std::string op;
101 if (cow_op->type == kCowCopyOp)
102 op = "Copy-op";
103 else {
104 op = "Xor-op";
105 }
106 SNAP_PLOG(ERROR) << op << " failed. Read from backing store: " << backing_store_device_
107 << "at block :" << offset / BLOCK_SZ << " offset:" << offset % BLOCK_SZ;
108 return false;
109 }
110
111 return true;
112 }
113
114 // Start the copy operation. This will read the backing
115 // block device which is represented by cow_op->source.
ProcessCopyOp(const CowOperation * cow_op)116 bool Worker::ProcessCopyOp(const CowOperation* cow_op) {
117 if (!ReadFromSourceDevice(cow_op)) {
118 return false;
119 }
120
121 return true;
122 }
123
ProcessXorOp(const CowOperation * cow_op)124 bool Worker::ProcessXorOp(const CowOperation* cow_op) {
125 if (!ReadFromSourceDevice(cow_op)) {
126 return false;
127 }
128 xorsink_.Reset();
129 if (!reader_->ReadData(*cow_op, &xorsink_)) {
130 SNAP_LOG(ERROR) << "ProcessXorOp failed for block " << cow_op->new_block;
131 return false;
132 }
133
134 return true;
135 }
136
ProcessZeroOp()137 bool Worker::ProcessZeroOp() {
138 // Zero out the entire block
139 void* buffer = bufsink_.GetPayloadBuffer(BLOCK_SZ);
140 if (buffer == nullptr) {
141 SNAP_LOG(ERROR) << "ProcessZeroOp: Failed to get payload buffer";
142 return false;
143 }
144
145 memset(buffer, 0, BLOCK_SZ);
146 return true;
147 }
148
ProcessOrderedOp(const CowOperation * cow_op)149 bool Worker::ProcessOrderedOp(const CowOperation* cow_op) {
150 void* buffer = bufsink_.GetPayloadBuffer(BLOCK_SZ);
151 if (buffer == nullptr) {
152 SNAP_LOG(ERROR) << "ProcessOrderedOp: Failed to get payload buffer";
153 return false;
154 }
155
156 MERGE_GROUP_STATE state = snapuserd_->ProcessMergingBlock(cow_op->new_block, buffer);
157
158 switch (state) {
159 case MERGE_GROUP_STATE::GROUP_MERGE_COMPLETED: {
160 // Merge is completed for this COW op; just read directly from
161 // the base device
162 SNAP_LOG(DEBUG) << "Merge-completed: Reading from base device sector: "
163 << (cow_op->new_block >> SECTOR_SHIFT)
164 << " Block-number: " << cow_op->new_block;
165 if (!ReadDataFromBaseDevice(ChunkToSector(cow_op->new_block), BLOCK_SZ)) {
166 SNAP_LOG(ERROR) << "ReadDataFromBaseDevice at sector: "
167 << (cow_op->new_block >> SECTOR_SHIFT) << " after merge-complete.";
168 return false;
169 }
170 return true;
171 }
172 case MERGE_GROUP_STATE::GROUP_MERGE_PENDING: {
173 bool ret;
174 if (cow_op->type == kCowCopyOp) {
175 ret = ProcessCopyOp(cow_op);
176 } else {
177 ret = ProcessXorOp(cow_op);
178 }
179
180 // I/O is complete - decrement the refcount irrespective of the return
181 // status
182 snapuserd_->NotifyIOCompletion(cow_op->new_block);
183 return ret;
184 }
185 // We already have the data in the buffer retrieved from RA thread.
186 // Nothing to process further.
187 case MERGE_GROUP_STATE::GROUP_MERGE_RA_READY: {
188 [[fallthrough]];
189 }
190 case MERGE_GROUP_STATE::GROUP_MERGE_IN_PROGRESS: {
191 return true;
192 }
193 default: {
194 // All other states, fail the I/O viz (GROUP_MERGE_FAILED and GROUP_INVALID)
195 return false;
196 }
197 }
198
199 return false;
200 }
201
ProcessCowOp(const CowOperation * cow_op)202 bool Worker::ProcessCowOp(const CowOperation* cow_op) {
203 if (cow_op == nullptr) {
204 SNAP_LOG(ERROR) << "ProcessCowOp: Invalid cow_op";
205 return false;
206 }
207
208 switch (cow_op->type) {
209 case kCowReplaceOp: {
210 return ProcessReplaceOp(cow_op);
211 }
212
213 case kCowZeroOp: {
214 return ProcessZeroOp();
215 }
216
217 case kCowCopyOp:
218 [[fallthrough]];
219 case kCowXorOp: {
220 return ProcessOrderedOp(cow_op);
221 }
222
223 default: {
224 SNAP_LOG(ERROR) << "Unknown operation-type found: " << cow_op->type;
225 }
226 }
227 return false;
228 }
229
InitializeBufsink()230 void Worker::InitializeBufsink() {
231 // Allocate the buffer which is used to communicate between
232 // daemon and dm-user. The buffer comprises of header and a fixed payload.
233 // If the dm-user requests a big IO, the IO will be broken into chunks
234 // of PAYLOAD_BUFFER_SZ.
235 size_t buf_size = sizeof(struct dm_user_header) + PAYLOAD_BUFFER_SZ;
236 bufsink_.Initialize(buf_size);
237 }
238
Init()239 bool Worker::Init() {
240 InitializeBufsink();
241 xorsink_.Initialize(&bufsink_, BLOCK_SZ);
242
243 if (!InitializeFds()) {
244 return false;
245 }
246
247 if (!InitReader()) {
248 return false;
249 }
250
251 return true;
252 }
253
RunThread()254 bool Worker::RunThread() {
255 SNAP_LOG(INFO) << "Processing snapshot I/O requests....";
256 // Start serving IO
257 while (true) {
258 if (!ProcessIORequest()) {
259 break;
260 }
261 }
262
263 CloseFds();
264 reader_->CloseCowFd();
265
266 return true;
267 }
268
269 // Read Header from dm-user misc device. This gives
270 // us the sector number for which IO is issued by dm-snapshot device
ReadDmUserHeader()271 bool Worker::ReadDmUserHeader() {
272 if (!android::base::ReadFully(ctrl_fd_, bufsink_.GetBufPtr(), sizeof(struct dm_user_header))) {
273 if (errno != ENOTBLK) {
274 SNAP_PLOG(ERROR) << "Control-read failed";
275 }
276
277 SNAP_PLOG(DEBUG) << "ReadDmUserHeader failed....";
278 return false;
279 }
280
281 return true;
282 }
283
284 // Send the payload/data back to dm-user misc device.
WriteDmUserPayload(size_t size,bool header_response)285 bool Worker::WriteDmUserPayload(size_t size, bool header_response) {
286 size_t payload_size = size;
287 void* buf = bufsink_.GetPayloadBufPtr();
288 if (header_response) {
289 payload_size += sizeof(struct dm_user_header);
290 buf = bufsink_.GetBufPtr();
291 }
292
293 if (!android::base::WriteFully(ctrl_fd_, buf, payload_size)) {
294 SNAP_PLOG(ERROR) << "Write to dm-user failed size: " << payload_size;
295 return false;
296 }
297
298 return true;
299 }
300
ReadDataFromBaseDevice(sector_t sector,size_t read_size)301 bool Worker::ReadDataFromBaseDevice(sector_t sector, size_t read_size) {
302 CHECK(read_size <= BLOCK_SZ);
303
304 void* buffer = bufsink_.GetPayloadBuffer(BLOCK_SZ);
305 if (buffer == nullptr) {
306 SNAP_LOG(ERROR) << "ReadFromBaseDevice: Failed to get payload buffer";
307 return false;
308 }
309
310 loff_t offset = sector << SECTOR_SHIFT;
311 if (!android::base::ReadFullyAtOffset(base_path_merge_fd_, buffer, read_size, offset)) {
312 SNAP_PLOG(ERROR) << "ReadDataFromBaseDevice failed. fd: " << base_path_merge_fd_
313 << "at sector :" << sector << " size: " << read_size;
314 return false;
315 }
316
317 return true;
318 }
319
ReadAlignedSector(sector_t sector,size_t sz,bool header_response)320 bool Worker::ReadAlignedSector(sector_t sector, size_t sz, bool header_response) {
321 struct dm_user_header* header = bufsink_.GetHeaderPtr();
322 size_t remaining_size = sz;
323 std::vector<std::pair<sector_t, const CowOperation*>>& chunk_vec = snapuserd_->GetChunkVec();
324 bool io_error = false;
325 int ret = 0;
326
327 do {
328 // Process 1MB payload at a time
329 size_t read_size = std::min(PAYLOAD_BUFFER_SZ, remaining_size);
330
331 header->type = DM_USER_RESP_SUCCESS;
332 size_t total_bytes_read = 0;
333 io_error = false;
334 bufsink_.ResetBufferOffset();
335
336 while (read_size) {
337 // We need to check every 4k block to verify if it is
338 // present in the mapping.
339 size_t size = std::min(BLOCK_SZ, read_size);
340
341 auto it = std::lower_bound(chunk_vec.begin(), chunk_vec.end(),
342 std::make_pair(sector, nullptr), SnapshotHandler::compare);
343 bool not_found = (it == chunk_vec.end() || it->first != sector);
344
345 if (not_found) {
346 // Block not found in map - which means this block was not
347 // changed as per the OTA. Just route the I/O to the base
348 // device.
349 if (!ReadDataFromBaseDevice(sector, size)) {
350 SNAP_LOG(ERROR) << "ReadDataFromBaseDevice failed";
351 header->type = DM_USER_RESP_ERROR;
352 }
353
354 ret = size;
355 } else {
356 // We found the sector in mapping. Check the type of COW OP and
357 // process it.
358 if (!ProcessCowOp(it->second)) {
359 SNAP_LOG(ERROR) << "ProcessCowOp failed";
360 header->type = DM_USER_RESP_ERROR;
361 }
362
363 ret = BLOCK_SZ;
364 }
365
366 // Just return the header if it is an error
367 if (header->type == DM_USER_RESP_ERROR) {
368 if (!RespondIOError(header_response)) {
369 return false;
370 }
371
372 io_error = true;
373 break;
374 }
375
376 read_size -= ret;
377 total_bytes_read += ret;
378 sector += (ret >> SECTOR_SHIFT);
379 bufsink_.UpdateBufferOffset(ret);
380 }
381
382 if (!io_error) {
383 if (!WriteDmUserPayload(total_bytes_read, header_response)) {
384 return false;
385 }
386
387 SNAP_LOG(DEBUG) << "WriteDmUserPayload success total_bytes_read: " << total_bytes_read
388 << " header-response: " << header_response
389 << " remaining_size: " << remaining_size;
390 header_response = false;
391 remaining_size -= total_bytes_read;
392 }
393 } while (remaining_size > 0 && !io_error);
394
395 return true;
396 }
397
ReadUnalignedSector(sector_t sector,size_t size,std::vector<std::pair<sector_t,const CowOperation * >>::iterator & it)398 int Worker::ReadUnalignedSector(
399 sector_t sector, size_t size,
400 std::vector<std::pair<sector_t, const CowOperation*>>::iterator& it) {
401 size_t skip_sector_size = 0;
402
403 SNAP_LOG(DEBUG) << "ReadUnalignedSector: sector " << sector << " size: " << size
404 << " Aligned sector: " << it->first;
405
406 if (!ProcessCowOp(it->second)) {
407 SNAP_LOG(ERROR) << "ReadUnalignedSector: " << sector << " failed of size: " << size
408 << " Aligned sector: " << it->first;
409 return -1;
410 }
411
412 int num_sectors_skip = sector - it->first;
413
414 if (num_sectors_skip > 0) {
415 skip_sector_size = num_sectors_skip << SECTOR_SHIFT;
416 char* buffer = reinterpret_cast<char*>(bufsink_.GetBufPtr());
417 struct dm_user_message* msg = (struct dm_user_message*)(&(buffer[0]));
418
419 if (skip_sector_size == BLOCK_SZ) {
420 SNAP_LOG(ERROR) << "Invalid un-aligned IO request at sector: " << sector
421 << " Base-sector: " << it->first;
422 return -1;
423 }
424
425 memmove(msg->payload.buf, (char*)msg->payload.buf + skip_sector_size,
426 (BLOCK_SZ - skip_sector_size));
427 }
428
429 bufsink_.ResetBufferOffset();
430 return std::min(size, (BLOCK_SZ - skip_sector_size));
431 }
432
ReadUnalignedSector(sector_t sector,size_t size)433 bool Worker::ReadUnalignedSector(sector_t sector, size_t size) {
434 struct dm_user_header* header = bufsink_.GetHeaderPtr();
435 header->type = DM_USER_RESP_SUCCESS;
436 bufsink_.ResetBufferOffset();
437 std::vector<std::pair<sector_t, const CowOperation*>>& chunk_vec = snapuserd_->GetChunkVec();
438
439 auto it = std::lower_bound(chunk_vec.begin(), chunk_vec.end(), std::make_pair(sector, nullptr),
440 SnapshotHandler::compare);
441
442 // |-------|-------|-------|
443 // 0 1 2 3
444 //
445 // Block 0 - op 1
446 // Block 1 - op 2
447 // Block 2 - op 3
448 //
449 // chunk_vec will have block 0, 1, 2 which maps to relavant COW ops.
450 //
451 // Each block is 4k bytes. Thus, the last block will span 8 sectors
452 // ranging till block 3 (However, block 3 won't be in chunk_vec as
453 // it doesn't have any mapping to COW ops. Now, if we get an I/O request for a sector
454 // spanning between block 2 and block 3, we need to step back
455 // and get hold of the last element.
456 //
457 // Additionally, we need to make sure that the requested sector is
458 // indeed within the range of the final sector. It is perfectly valid
459 // to get an I/O request for block 3 and beyond which are not mapped
460 // to any COW ops. In that case, we just need to read from the base
461 // device.
462 bool merge_complete = false;
463 bool header_response = true;
464 if (it == chunk_vec.end()) {
465 if (chunk_vec.size() > 0) {
466 // I/O request beyond the last mapped sector
467 it = std::prev(chunk_vec.end());
468 } else {
469 // This can happen when a partition merge is complete but snapshot
470 // state in /metadata is not yet deleted; during this window if the
471 // device is rebooted, subsequent attempt will mount the snapshot.
472 // However, since the merge was completed we wouldn't have any
473 // mapping to COW ops thus chunk_vec will be empty. In that case,
474 // mark this as merge_complete and route the I/O to the base device.
475 merge_complete = true;
476 }
477 } else if (it->first != sector) {
478 if (it != chunk_vec.begin()) {
479 --it;
480 }
481 } else {
482 return ReadAlignedSector(sector, size, header_response);
483 }
484
485 loff_t requested_offset = sector << SECTOR_SHIFT;
486
487 loff_t final_offset = 0;
488 if (!merge_complete) {
489 final_offset = it->first << SECTOR_SHIFT;
490 }
491
492 // Since a COW op span 4k block size, we need to make sure that the requested
493 // offset is within the 4k region. Consider the following case:
494 //
495 // |-------|-------|-------|
496 // 0 1 2 3
497 //
498 // Block 0 - op 1
499 // Block 1 - op 2
500 //
501 // We have an I/O request for a sector between block 2 and block 3. However,
502 // we have mapping to COW ops only for block 0 and block 1. Thus, the
503 // requested offset in this case is beyond the last mapped COW op size (which
504 // is block 1 in this case).
505
506 size_t total_bytes_read = 0;
507 size_t remaining_size = size;
508 int ret = 0;
509 if (!merge_complete && (requested_offset >= final_offset) &&
510 (requested_offset - final_offset) < BLOCK_SZ) {
511 // Read the partial un-aligned data
512 ret = ReadUnalignedSector(sector, remaining_size, it);
513 if (ret < 0) {
514 SNAP_LOG(ERROR) << "ReadUnalignedSector failed for sector: " << sector
515 << " size: " << size << " it->sector: " << it->first;
516 return RespondIOError(header_response);
517 }
518
519 remaining_size -= ret;
520 total_bytes_read += ret;
521 sector += (ret >> SECTOR_SHIFT);
522
523 // Send the data back
524 if (!WriteDmUserPayload(total_bytes_read, header_response)) {
525 return false;
526 }
527
528 header_response = false;
529 // If we still have pending data to be processed, this will be aligned I/O
530 if (remaining_size) {
531 return ReadAlignedSector(sector, remaining_size, header_response);
532 }
533 } else {
534 // This is all about handling I/O request to be routed to base device
535 // as the I/O is not mapped to any of the COW ops.
536 loff_t aligned_offset = requested_offset;
537 // Align to nearest 4k
538 aligned_offset += BLOCK_SZ - 1;
539 aligned_offset &= ~(BLOCK_SZ - 1);
540 // Find the diff of the aligned offset
541 size_t diff_size = aligned_offset - requested_offset;
542 CHECK(diff_size <= BLOCK_SZ);
543 if (remaining_size < diff_size) {
544 if (!ReadDataFromBaseDevice(sector, remaining_size)) {
545 return RespondIOError(header_response);
546 }
547 total_bytes_read += remaining_size;
548
549 if (!WriteDmUserPayload(total_bytes_read, header_response)) {
550 return false;
551 }
552 } else {
553 if (!ReadDataFromBaseDevice(sector, diff_size)) {
554 return RespondIOError(header_response);
555 }
556
557 total_bytes_read += diff_size;
558
559 if (!WriteDmUserPayload(total_bytes_read, header_response)) {
560 return false;
561 }
562
563 remaining_size -= diff_size;
564 size_t num_sectors_read = (diff_size >> SECTOR_SHIFT);
565 sector += num_sectors_read;
566 CHECK(IsBlockAligned(sector << SECTOR_SHIFT));
567 header_response = false;
568
569 // If we still have pending data to be processed, this will be aligned I/O
570 return ReadAlignedSector(sector, remaining_size, header_response);
571 }
572 }
573
574 return true;
575 }
576
RespondIOError(bool header_response)577 bool Worker::RespondIOError(bool header_response) {
578 struct dm_user_header* header = bufsink_.GetHeaderPtr();
579 header->type = DM_USER_RESP_ERROR;
580 // This is an issue with the dm-user interface. There
581 // is no way to propagate the I/O error back to dm-user
582 // if we have already communicated the header back. Header
583 // is responded once at the beginning; however I/O can
584 // be processed in chunks. If we encounter an I/O error
585 // somewhere in the middle of the processing, we can't communicate
586 // this back to dm-user.
587 //
588 // TODO: Fix the interface
589 CHECK(header_response);
590
591 if (!WriteDmUserPayload(0, header_response)) {
592 return false;
593 }
594
595 // There is no need to process further as we have already seen
596 // an I/O error
597 return true;
598 }
599
DmuserReadRequest()600 bool Worker::DmuserReadRequest() {
601 struct dm_user_header* header = bufsink_.GetHeaderPtr();
602
603 // Unaligned I/O request
604 if (!IsBlockAligned(header->sector << SECTOR_SHIFT)) {
605 return ReadUnalignedSector(header->sector, header->len);
606 }
607
608 return ReadAlignedSector(header->sector, header->len, true);
609 }
610
ProcessIORequest()611 bool Worker::ProcessIORequest() {
612 struct dm_user_header* header = bufsink_.GetHeaderPtr();
613
614 if (!ReadDmUserHeader()) {
615 return false;
616 }
617
618 SNAP_LOG(DEBUG) << "Daemon: msg->seq: " << std::dec << header->seq;
619 SNAP_LOG(DEBUG) << "Daemon: msg->len: " << std::dec << header->len;
620 SNAP_LOG(DEBUG) << "Daemon: msg->sector: " << std::dec << header->sector;
621 SNAP_LOG(DEBUG) << "Daemon: msg->type: " << std::dec << header->type;
622 SNAP_LOG(DEBUG) << "Daemon: msg->flags: " << std::dec << header->flags;
623
624 switch (header->type) {
625 case DM_USER_REQ_MAP_READ: {
626 if (!DmuserReadRequest()) {
627 return false;
628 }
629 break;
630 }
631
632 case DM_USER_REQ_MAP_WRITE: {
633 // TODO: We should not get any write request
634 // to dm-user as we mount all partitions
635 // as read-only. Need to verify how are TRIM commands
636 // handled during mount.
637 return false;
638 }
639 }
640
641 return true;
642 }
643
644 } // namespace snapshot
645 } // namespace android
646