1 /*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "apexd"
18
19 #include "apexd_loop.h"
20
21 #include <mutex>
22
23 #include <dirent.h>
24 #include <fcntl.h>
25 #include <linux/fs.h>
26 #include <linux/loop.h>
27 #include <sys/ioctl.h>
28 #include <sys/stat.h>
29 #include <sys/statfs.h>
30 #include <sys/types.h>
31 #include <unistd.h>
32
33 #include <android-base/file.h>
34 #include <android-base/logging.h>
35 #include <android-base/parseint.h>
36 #include <android-base/properties.h>
37 #include <android-base/stringprintf.h>
38 #include <android-base/strings.h>
39
40 #include "apexd_utils.h"
41 #include "string_log.h"
42
43 using android::base::Basename;
44 using android::base::ErrnoError;
45 using android::base::Error;
46 using android::base::GetBoolProperty;
47 using android::base::ParseUint;
48 using android::base::Result;
49 using android::base::StartsWith;
50 using android::base::StringPrintf;
51 using android::base::unique_fd;
52
53 #ifndef LOOP_CONFIGURE
54 // These can be removed whenever we pull in the Linux v5.8 UAPI headers
55 struct loop_config {
56 __u32 fd;
57 __u32 block_size;
58 struct loop_info64 info;
59 __u64 __reserved[8];
60 };
61 #define LOOP_CONFIGURE 0x4C0A
62 #endif
63
64 namespace android {
65 namespace apex {
66 namespace loop {
67
68 static constexpr const char* kApexLoopIdPrefix = "apex:";
69
70 // 128 kB read-ahead, which we currently use for /system as well
71 static constexpr const char* kReadAheadKb = "128";
72
73 // TODO(b/122059364): Even though the kernel has created the loop
74 // device, we still depend on ueventd to run to actually create the
75 // device node in userspace. To solve this properly we should listen on
76 // the netlink socket for uevents, or use inotify. For now, this will
77 // have to do.
78 static constexpr size_t kLoopDeviceRetryAttempts = 3u;
79
MaybeCloseBad()80 void LoopbackDeviceUniqueFd::MaybeCloseBad() {
81 if (device_fd.get() != -1) {
82 // Disassociate any files.
83 if (ioctl(device_fd.get(), LOOP_CLR_FD) == -1) {
84 PLOG(ERROR) << "Unable to clear fd for loopback device";
85 }
86 }
87 }
88
ConfigureReadAhead(const std::string & device_path)89 Result<void> ConfigureReadAhead(const std::string& device_path) {
90 CHECK(StartsWith(device_path, "/dev/"));
91 std::string device_name = Basename(device_path);
92
93 std::string sysfs_device =
94 StringPrintf("/sys/block/%s/queue/read_ahead_kb", device_name.c_str());
95 unique_fd sysfs_fd(open(sysfs_device.c_str(), O_RDWR | O_CLOEXEC));
96 if (sysfs_fd.get() == -1) {
97 return ErrnoError() << "Failed to open " << sysfs_device;
98 }
99
100 int ret = TEMP_FAILURE_RETRY(
101 write(sysfs_fd.get(), kReadAheadKb, strlen(kReadAheadKb) + 1));
102 if (ret < 0) {
103 return ErrnoError() << "Failed to write to " << sysfs_device;
104 }
105
106 return {};
107 }
108
PreAllocateLoopDevices(size_t num)109 Result<void> PreAllocateLoopDevices(size_t num) {
110 Result<void> loop_ready = WaitForFile("/dev/loop-control", 20s);
111 if (!loop_ready.ok()) {
112 return loop_ready;
113 }
114 unique_fd ctl_fd(
115 TEMP_FAILURE_RETRY(open("/dev/loop-control", O_RDWR | O_CLOEXEC)));
116 if (ctl_fd.get() == -1) {
117 return ErrnoError() << "Failed to open loop-control";
118 }
119
120 bool found = false;
121 size_t start_id = 0;
122 constexpr const char* kLoopPrefix = "loop";
123 WalkDir("/dev/block", [&](const std::filesystem::directory_entry& entry) {
124 std::string devname = entry.path().filename().string();
125 if (StartsWith(devname, kLoopPrefix)) {
126 size_t id;
127 auto parse_ok = ParseUint(
128 devname.substr(std::char_traits<char>::length(kLoopPrefix)), &id);
129 if (parse_ok && id > start_id) {
130 start_id = id;
131 found = true;
132 }
133 }
134 });
135 if (found) ++start_id;
136
137 // Assumption: loop device ID [0..num) is valid.
138 // This is because pre-allocation happens during bootstrap.
139 // Anyway Kernel pre-allocated loop devices
140 // as many as CONFIG_BLK_DEV_LOOP_MIN_COUNT,
141 // Within the amount of kernel-pre-allocation,
142 // LOOP_CTL_ADD will fail with EEXIST
143 for (size_t id = start_id; id < num + start_id; ++id) {
144 int ret = ioctl(ctl_fd.get(), LOOP_CTL_ADD, id);
145 if (ret < 0 && errno != EEXIST) {
146 return ErrnoError() << "Failed LOOP_CTL_ADD";
147 }
148 }
149
150 // Don't wait until the dev nodes are actually created, which
151 // will delay the boot. By simply returing here, the creation of the dev
152 // nodes will be done in parallel with other boot processes, and we
153 // just optimistally hope that they are all created when we actually
154 // access them for activating APEXes. If the dev nodes are not ready
155 // even then, we wait 50ms and warning message will be printed (see below
156 // CreateLoopDevice()).
157 LOG(INFO) << "Pre-allocated " << num << " loopback devices";
158 return {};
159 }
160
ConfigureLoopDevice(const int device_fd,const std::string & target,const int32_t image_offset,const size_t image_size)161 Result<void> ConfigureLoopDevice(const int device_fd, const std::string& target,
162 const int32_t image_offset,
163 const size_t image_size) {
164 static bool use_loop_configure;
165 static std::once_flag once_flag;
166 std::call_once(once_flag, [&]() {
167 // LOOP_CONFIGURE is a new ioctl in Linux 5.8 (and backported in Android
168 // common) that allows atomically configuring a loop device. It is a lot
169 // faster than the traditional LOOP_SET_FD/LOOP_SET_STATUS64 combo, but
170 // it may not be available on updating devices, so try once before
171 // deciding.
172 struct loop_config config;
173 memset(&config, 0, sizeof(config));
174 config.fd = -1;
175 if (ioctl(device_fd, LOOP_CONFIGURE, &config) == -1 && errno == EBADF) {
176 // If the IOCTL exists, it will fail with EBADF for the -1 fd
177 use_loop_configure = true;
178 }
179 });
180
181 /*
182 * Using O_DIRECT will tell the kernel that we want to use Direct I/O
183 * on the underlying file, which we want to do to avoid double caching.
184 * Note that Direct I/O won't be enabled immediately, because the block
185 * size of the underlying block device may not match the default loop
186 * device block size (512); when we call LOOP_SET_BLOCK_SIZE below, the
187 * kernel driver will automatically enable Direct I/O when it sees that
188 * condition is now met.
189 */
190 unique_fd target_fd(open(target.c_str(), O_RDONLY | O_CLOEXEC | O_DIRECT));
191 if (target_fd.get() == -1) {
192 struct statfs stbuf;
193 int saved_errno = errno;
194 // let's give another try with buffered I/O for EROFS and squashfs
195 if (statfs(target.c_str(), &stbuf) != 0 ||
196 (stbuf.f_type != EROFS_SUPER_MAGIC_V1 &&
197 stbuf.f_type != SQUASHFS_MAGIC &&
198 stbuf.f_type != OVERLAYFS_SUPER_MAGIC)) {
199 return Error(saved_errno) << "Failed to open " << target;
200 }
201 LOG(WARNING) << "Fallback to buffered I/O for " << target;
202 target_fd.reset(open(target.c_str(), O_RDONLY | O_CLOEXEC));
203 if (target_fd.get() == -1) {
204 return ErrnoError() << "Failed to open " << target;
205 }
206 }
207
208 struct loop_info64 li;
209 memset(&li, 0, sizeof(li));
210 strlcpy((char*)li.lo_crypt_name, kApexLoopIdPrefix, LO_NAME_SIZE);
211 li.lo_offset = image_offset;
212 li.lo_sizelimit = image_size;
213 // Automatically free loop device on last close.
214 li.lo_flags |= LO_FLAGS_AUTOCLEAR;
215
216 if (use_loop_configure) {
217 struct loop_config config;
218 memset(&config, 0, sizeof(config));
219 li.lo_flags |= LO_FLAGS_DIRECT_IO;
220 config.fd = target_fd.get();
221 config.info = li;
222 config.block_size = 4096;
223
224 if (ioctl(device_fd, LOOP_CONFIGURE, &config) == -1) {
225 return ErrnoError() << "Failed to LOOP_CONFIGURE";
226 }
227
228 return {};
229 } else {
230 if (ioctl(device_fd, LOOP_SET_FD, target_fd.get()) == -1) {
231 return ErrnoError() << "Failed to LOOP_SET_FD";
232 }
233
234 if (ioctl(device_fd, LOOP_SET_STATUS64, &li) == -1) {
235 return ErrnoError() << "Failed to LOOP_SET_STATUS64";
236 }
237
238 if (ioctl(device_fd, BLKFLSBUF, 0) == -1) {
239 // This works around a kernel bug where the following happens.
240 // 1) The device runs with a value of loop.max_part > 0
241 // 2) As part of LOOP_SET_FD above, we do a partition scan, which loads
242 // the first 2 pages of the underlying file into the buffer cache
243 // 3) When we then change the offset with LOOP_SET_STATUS64, those pages
244 // are not invalidated from the cache.
245 // 4) When we try to mount an ext4 filesystem on the loop device, the ext4
246 // code will try to find a superblock by reading 4k at offset 0; but,
247 // because we still have the old pages at offset 0 lying in the cache,
248 // those pages will be returned directly. However, those pages contain
249 // the data at offset 0 in the underlying file, not at the offset that
250 // we configured
251 // 5) the ext4 driver fails to find a superblock in the (wrong) data, and
252 // fails to mount the filesystem.
253 //
254 // To work around this, explicitly flush the block device, which will
255 // flush the buffer cache and make sure we actually read the data at the
256 // correct offset.
257 return ErrnoError() << "Failed to flush buffers on the loop device";
258 }
259
260 // Direct-IO requires the loop device to have the same block size as the
261 // underlying filesystem.
262 if (ioctl(device_fd, LOOP_SET_BLOCK_SIZE, 4096) == -1) {
263 PLOG(WARNING) << "Failed to LOOP_SET_BLOCK_SIZE";
264 }
265 }
266 return {};
267 }
268
WaitForDevice(int num)269 Result<LoopbackDeviceUniqueFd> WaitForDevice(int num) {
270 std::string opened_device;
271 const std::vector<std::string> candidate_devices = {
272 StringPrintf("/dev/block/loop%d", num),
273 StringPrintf("/dev/loop%d", num),
274 };
275
276 // apexd-bootstrap runs in parallel with ueventd to optimize boot time. In
277 // rare cases apexd would try attempt to mount an apex before ueventd created
278 // a loop device for it. To work around this we keep polling for loop device
279 // to be created until ueventd's cold boot sequence is done.
280 // See comment on kLoopDeviceRetryAttempts.
281 unique_fd sysfs_fd;
282 bool cold_boot_done = GetBoolProperty("ro.cold_boot_done", false);
283 for (size_t i = 0; i != kLoopDeviceRetryAttempts; ++i) {
284 if (!cold_boot_done) {
285 cold_boot_done = GetBoolProperty("ro.cold_boot_done", false);
286 }
287 for (const auto& device : candidate_devices) {
288 sysfs_fd.reset(open(device.c_str(), O_RDWR | O_CLOEXEC));
289 if (sysfs_fd.get() != -1) {
290 return LoopbackDeviceUniqueFd(std::move(sysfs_fd), device);
291 }
292 }
293 PLOG(WARNING) << "Loopback device " << num << " not ready. Waiting 50ms...";
294 usleep(50000);
295 if (!cold_boot_done) {
296 // ueventd hasn't finished cold boot yet, keep trying.
297 i = 0;
298 }
299 }
300
301 return Error() << "Faled to open loopback device " << num;
302 }
303
CreateLoopDevice(const std::string & target,const int32_t image_offset,const size_t image_size)304 Result<LoopbackDeviceUniqueFd> CreateLoopDevice(const std::string& target,
305 const int32_t image_offset,
306 const size_t image_size) {
307 unique_fd ctl_fd(open("/dev/loop-control", O_RDWR | O_CLOEXEC));
308 if (ctl_fd.get() == -1) {
309 return ErrnoError() << "Failed to open loop-control";
310 }
311
312 static std::mutex mlock;
313 std::lock_guard lock(mlock);
314 int num = ioctl(ctl_fd.get(), LOOP_CTL_GET_FREE);
315 if (num == -1) {
316 return ErrnoError() << "Failed LOOP_CTL_GET_FREE";
317 }
318
319 Result<LoopbackDeviceUniqueFd> loop_device = WaitForDevice(num);
320 if (!loop_device.ok()) {
321 return loop_device.error();
322 }
323 CHECK_NE(loop_device->device_fd.get(), -1);
324
325 Result<void> configureStatus = ConfigureLoopDevice(
326 loop_device->device_fd.get(), target, image_offset, image_size);
327 if (!configureStatus.ok()) {
328 return configureStatus.error();
329 }
330
331 Result<void> read_ahead_status = ConfigureReadAhead(loop_device->name);
332 if (!read_ahead_status.ok()) {
333 return read_ahead_status.error();
334 }
335
336 return loop_device;
337 }
338
DestroyLoopDevice(const std::string & path,const DestroyLoopFn & extra)339 void DestroyLoopDevice(const std::string& path, const DestroyLoopFn& extra) {
340 unique_fd fd(open(path.c_str(), O_RDWR | O_CLOEXEC));
341 if (fd.get() == -1) {
342 if (errno != ENOENT) {
343 PLOG(WARNING) << "Failed to open " << path;
344 }
345 return;
346 }
347
348 struct loop_info64 li;
349 if (ioctl(fd.get(), LOOP_GET_STATUS64, &li) < 0) {
350 if (errno != ENXIO) {
351 PLOG(WARNING) << "Failed to LOOP_GET_STATUS64 " << path;
352 }
353 return;
354 }
355
356 auto id = std::string((char*)li.lo_crypt_name);
357 if (StartsWith(id, kApexLoopIdPrefix)) {
358 extra(path, id);
359
360 if (ioctl(fd.get(), LOOP_CLR_FD, 0) < 0) {
361 PLOG(WARNING) << "Failed to LOOP_CLR_FD " << path;
362 }
363 }
364 }
365
366 } // namespace loop
367 } // namespace apex
368 } // namespace android
369