1 /*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "apexd"
18
19 #include "apexd_loop.h"
20
21 #include <mutex>
22
23 #include <dirent.h>
24 #include <fcntl.h>
25 #include <linux/fs.h>
26 #include <linux/loop.h>
27 #include <sys/ioctl.h>
28 #include <sys/stat.h>
29 #include <sys/types.h>
30 #include <unistd.h>
31
32 #include <android-base/file.h>
33 #include <android-base/logging.h>
34 #include <android-base/stringprintf.h>
35 #include <android-base/strings.h>
36
37 #include "apexd_utils.h"
38 #include "string_log.h"
39
40 using android::base::Error;
41 using android::base::Result;
42 using android::base::StartsWith;
43 using android::base::StringPrintf;
44 using android::base::unique_fd;
45
46 #ifndef LOOP_CONFIGURE
47 // These can be removed whenever we pull in the Linux v5.8 UAPI headers
48 struct loop_config {
49 __u32 fd;
50 __u32 block_size;
51 struct loop_info64 info;
52 __u64 __reserved[8];
53 };
54 #define LOOP_CONFIGURE 0x4C0A
55 #endif
56
57 namespace android {
58 namespace apex {
59 namespace loop {
60
61 static constexpr const char* kApexLoopIdPrefix = "apex:";
62
63 // 128 kB read-ahead, which we currently use for /system as well
64 static constexpr const char* kReadAheadKb = "128";
65
66 // TODO(b/122059364): Even though the kernel has created the loop
67 // device, we still depend on ueventd to run to actually create the
68 // device node in userspace. To solve this properly we should listen on
69 // the netlink socket for uevents, or use inotify. For now, this will
70 // have to do.
71 static constexpr size_t kLoopDeviceRetryAttempts = 3u;
72
MaybeCloseBad()73 void LoopbackDeviceUniqueFd::MaybeCloseBad() {
74 if (device_fd.get() != -1) {
75 // Disassociate any files.
76 if (ioctl(device_fd.get(), LOOP_CLR_FD) == -1) {
77 PLOG(ERROR) << "Unable to clear fd for loopback device";
78 }
79 }
80 }
81
configureReadAhead(const std::string & device_path)82 Result<void> configureReadAhead(const std::string& device_path) {
83 auto pos = device_path.find("/dev/block/");
84 if (pos != 0) {
85 return Error() << "Device path does not start with /dev/block.";
86 }
87 pos = device_path.find_last_of('/');
88 std::string device_name = device_path.substr(pos + 1, std::string::npos);
89
90 std::string sysfs_device =
91 StringPrintf("/sys/block/%s/queue/read_ahead_kb", device_name.c_str());
92 unique_fd sysfs_fd(open(sysfs_device.c_str(), O_RDWR | O_CLOEXEC));
93 if (sysfs_fd.get() == -1) {
94 return ErrnoError() << "Failed to open " << sysfs_device;
95 }
96
97 int ret = TEMP_FAILURE_RETRY(
98 write(sysfs_fd.get(), kReadAheadKb, strlen(kReadAheadKb) + 1));
99 if (ret < 0) {
100 return ErrnoError() << "Failed to write to " << sysfs_device;
101 }
102
103 return {};
104 }
105
preAllocateLoopDevices(size_t num)106 Result<void> preAllocateLoopDevices(size_t num) {
107 Result<void> loopReady = WaitForFile("/dev/loop-control", 20s);
108 if (!loopReady.ok()) {
109 return loopReady;
110 }
111 unique_fd ctl_fd(
112 TEMP_FAILURE_RETRY(open("/dev/loop-control", O_RDWR | O_CLOEXEC)));
113 if (ctl_fd.get() == -1) {
114 return ErrnoError() << "Failed to open loop-control";
115 }
116
117 // Assumption: loop device ID [0..num) is valid.
118 // This is because pre-allocation happens during bootstrap.
119 // Anyway Kernel pre-allocated loop devices
120 // as many as CONFIG_BLK_DEV_LOOP_MIN_COUNT,
121 // Within the amount of kernel-pre-allocation,
122 // LOOP_CTL_ADD will fail with EEXIST
123 for (size_t id = 0ul; id < num; ++id) {
124 int ret = ioctl(ctl_fd.get(), LOOP_CTL_ADD, id);
125 if (ret < 0 && errno != EEXIST) {
126 return ErrnoError() << "Failed LOOP_CTL_ADD";
127 }
128 }
129
130 // Don't wait until the dev nodes are actually created, which
131 // will delay the boot. By simply returing here, the creation of the dev
132 // nodes will be done in parallel with other boot processes, and we
133 // just optimistally hope that they are all created when we actually
134 // access them for activating APEXes. If the dev nodes are not ready
135 // even then, we wait 50ms and warning message will be printed (see below
136 // createLoopDevice()).
137 LOG(INFO) << "Pre-allocated " << num << " loopback devices";
138 return {};
139 }
140
configureLoopDevice(const int device_fd,const std::string & target,const int32_t imageOffset,const size_t imageSize)141 Result<void> configureLoopDevice(const int device_fd, const std::string& target,
142 const int32_t imageOffset,
143 const size_t imageSize) {
144 static bool useLoopConfigure;
145 static std::once_flag onceFlag;
146 std::call_once(onceFlag, [&]() {
147 // LOOP_CONFIGURE is a new ioctl in Linux 5.8 (and backported in Android
148 // common) that allows atomically configuring a loop device. It is a lot
149 // faster than the traditional LOOP_SET_FD/LOOP_SET_STATUS64 combo, but
150 // it may not be available on updating devices, so try once before
151 // deciding.
152 struct loop_config config;
153 memset(&config, 0, sizeof(config));
154 config.fd = -1;
155 if (ioctl(device_fd, LOOP_CONFIGURE, &config) == -1 && errno == EBADF) {
156 // If the IOCTL exists, it will fail with EBADF for the -1 fd
157 useLoopConfigure = true;
158 }
159 });
160
161 /*
162 * Using O_DIRECT will tell the kernel that we want to use Direct I/O
163 * on the underlying file, which we want to do to avoid double caching.
164 * Note that Direct I/O won't be enabled immediately, because the block
165 * size of the underlying block device may not match the default loop
166 * device block size (512); when we call LOOP_SET_BLOCK_SIZE below, the
167 * kernel driver will automatically enable Direct I/O when it sees that
168 * condition is now met.
169 */
170 unique_fd target_fd(open(target.c_str(), O_RDONLY | O_CLOEXEC | O_DIRECT));
171 if (target_fd.get() == -1) {
172 return ErrnoError() << "Failed to open " << target;
173 }
174
175 struct loop_info64 li;
176 memset(&li, 0, sizeof(li));
177 strlcpy((char*)li.lo_crypt_name, kApexLoopIdPrefix, LO_NAME_SIZE);
178 li.lo_offset = imageOffset;
179 li.lo_sizelimit = imageSize;
180
181 if (useLoopConfigure) {
182 struct loop_config config;
183 memset(&config, 0, sizeof(config));
184 li.lo_flags |= LO_FLAGS_DIRECT_IO;
185 config.fd = target_fd.get();
186 config.info = li;
187 config.block_size = 4096;
188
189 if (ioctl(device_fd, LOOP_CONFIGURE, &config) == -1) {
190 return ErrnoError() << "Failed to LOOP_CONFIGURE";
191 }
192
193 return {};
194 } else {
195 if (ioctl(device_fd, LOOP_SET_FD, target_fd.get()) == -1) {
196 return ErrnoError() << "Failed to LOOP_SET_FD";
197 }
198
199 if (ioctl(device_fd, LOOP_SET_STATUS64, &li) == -1) {
200 return ErrnoError() << "Failed to LOOP_SET_STATUS64";
201 }
202
203 if (ioctl(device_fd, BLKFLSBUF, 0) == -1) {
204 // This works around a kernel bug where the following happens.
205 // 1) The device runs with a value of loop.max_part > 0
206 // 2) As part of LOOP_SET_FD above, we do a partition scan, which loads
207 // the first 2 pages of the underlying file into the buffer cache
208 // 3) When we then change the offset with LOOP_SET_STATUS64, those pages
209 // are not invalidated from the cache.
210 // 4) When we try to mount an ext4 filesystem on the loop device, the ext4
211 // code will try to find a superblock by reading 4k at offset 0; but,
212 // because we still have the old pages at offset 0 lying in the cache,
213 // those pages will be returned directly. However, those pages contain
214 // the data at offset 0 in the underlying file, not at the offset that
215 // we configured
216 // 5) the ext4 driver fails to find a superblock in the (wrong) data, and
217 // fails to mount the filesystem.
218 //
219 // To work around this, explicitly flush the block device, which will
220 // flush the buffer cache and make sure we actually read the data at the
221 // correct offset.
222 return ErrnoError() << "Failed to flush buffers on the loop device";
223 }
224
225 // Direct-IO requires the loop device to have the same block size as the
226 // underlying filesystem.
227 if (ioctl(device_fd, LOOP_SET_BLOCK_SIZE, 4096) == -1) {
228 PLOG(WARNING) << "Failed to LOOP_SET_BLOCK_SIZE";
229 }
230 }
231 return {};
232 }
233
createLoopDevice(const std::string & target,const int32_t imageOffset,const size_t imageSize)234 Result<LoopbackDeviceUniqueFd> createLoopDevice(const std::string& target,
235 const int32_t imageOffset,
236 const size_t imageSize) {
237 unique_fd ctl_fd(open("/dev/loop-control", O_RDWR | O_CLOEXEC));
238 if (ctl_fd.get() == -1) {
239 return ErrnoError() << "Failed to open loop-control";
240 }
241
242 int num = ioctl(ctl_fd.get(), LOOP_CTL_GET_FREE);
243 if (num == -1) {
244 return ErrnoError() << "Failed LOOP_CTL_GET_FREE";
245 }
246
247 std::string device = StringPrintf("/dev/block/loop%d", num);
248
249 LoopbackDeviceUniqueFd device_fd;
250 {
251 // See comment on kLoopDeviceRetryAttempts.
252 unique_fd sysfs_fd;
253 for (size_t i = 0; i != kLoopDeviceRetryAttempts; ++i) {
254 sysfs_fd.reset(open(device.c_str(), O_RDWR | O_CLOEXEC));
255 if (sysfs_fd.get() != -1) {
256 break;
257 }
258 PLOG(WARNING) << "Loopback device " << device
259 << " not ready. Waiting 50ms...";
260 usleep(50000);
261 }
262 if (sysfs_fd.get() == -1) {
263 return ErrnoError() << "Failed to open " << device;
264 }
265 device_fd = LoopbackDeviceUniqueFd(std::move(sysfs_fd), device);
266 CHECK_NE(device_fd.get(), -1);
267 }
268
269 Result<void> configureStatus =
270 configureLoopDevice(device_fd.get(), target, imageOffset, imageSize);
271 if (!configureStatus.ok()) {
272 return configureStatus.error();
273 }
274
275 Result<void> readAheadStatus = configureReadAhead(device);
276 if (!readAheadStatus.ok()) {
277 return readAheadStatus.error();
278 }
279 return device_fd;
280 }
281
DestroyLoopDevice(const std::string & path,const DestroyLoopFn & extra)282 void DestroyLoopDevice(const std::string& path, const DestroyLoopFn& extra) {
283 unique_fd fd(open(path.c_str(), O_RDWR | O_CLOEXEC));
284 if (fd.get() == -1) {
285 if (errno != ENOENT) {
286 PLOG(WARNING) << "Failed to open " << path;
287 }
288 return;
289 }
290
291 struct loop_info64 li;
292 if (ioctl(fd.get(), LOOP_GET_STATUS64, &li) < 0) {
293 if (errno != ENXIO) {
294 PLOG(WARNING) << "Failed to LOOP_GET_STATUS64 " << path;
295 }
296 return;
297 }
298
299 auto id = std::string((char*)li.lo_crypt_name);
300 if (StartsWith(id, kApexLoopIdPrefix)) {
301 extra(path, id);
302
303 if (ioctl(fd.get(), LOOP_CLR_FD, 0) < 0) {
304 PLOG(WARNING) << "Failed to LOOP_CLR_FD " << path;
305 }
306 }
307 }
308
309 } // namespace loop
310 } // namespace apex
311 } // namespace android
312