1 /* SPDX-License-Identifier: MIT */
2 /*
3 * Proof-of-concept for doing file digests using the kernel's AF_ALG API.
4 * Needs a bit of error handling.
5 */
6 #include <stdio.h>
7 #include <fcntl.h>
8 #include <string.h>
9 #include <stdlib.h>
10 #include <unistd.h>
11 #include <assert.h>
12 #include <errno.h>
13 #include <inttypes.h>
14 #include <sys/types.h>
15 #include <sys/stat.h>
16 #include <sys/ioctl.h>
17 #include <linux/if_alg.h>
18 #include "liburing.h"
19
20 #define QD 64
21 #define WAIT_BATCH (QD / 8)
22 #define BS (64*1024)
23
24 #define BGID 1
25 #define BID_MASK (QD - 1)
26
27 enum req_state {
28 IO_INIT = 0,
29 IO_READ,
30 IO_READ_COMPLETE,
31 IO_WRITE,
32 IO_WRITE_COMPLETE,
33 };
34
35 struct req {
36 off_t offset;
37 enum req_state state;
38 struct iovec iov;
39 };
40
41 struct kdigest {
42 struct io_uring ring;
43 struct io_uring_buf_ring *br;
44 struct req reqs[QD];
45 /* heap allocated, aligned QD*BS buffer */
46 uint8_t *bufs;
47 };
48
49 static int infd, outfd;
50
get_file_size(int fd,size_t * size)51 static int get_file_size(int fd, size_t *size)
52 {
53 struct stat st;
54
55 if (fstat(fd, &st) < 0)
56 return -1;
57 if (S_ISREG(st.st_mode)) {
58 *size = st.st_size;
59 } else if (S_ISBLK(st.st_mode)) {
60 unsigned long long bytes;
61
62 if (ioctl(fd, BLKGETSIZE64, &bytes) != 0)
63 return -1;
64
65 *size = bytes;
66 } else {
67 return -1;
68 }
69
70 return 0;
71 }
72
reap_completions(struct io_uring * ring,int * inflight,size_t * outsize)73 static int reap_completions(struct io_uring *ring, int *inflight,
74 size_t *outsize)
75 {
76 struct io_uring_cqe *cqe;
77 unsigned head;
78 int ret = 0, nr;
79
80 nr = 0;
81 io_uring_for_each_cqe(ring, head, cqe) {
82 struct req *req;
83
84 req = io_uring_cqe_get_data(cqe);
85 assert(req->state == IO_READ || req->state == IO_WRITE);
86 if (cqe->res < 0) {
87 fprintf(stderr, "%s: cqe error %d\n",
88 req->state == IO_WRITE ? "send" : "read",
89 cqe->res);
90 *outsize = 0;
91 ret = 1;
92 break;
93 }
94
95 (*inflight)--;
96 req->state++;
97 if (req->state == IO_WRITE_COMPLETE)
98 *outsize -= cqe->res;
99 nr++;
100 }
101
102 io_uring_cq_advance(ring, nr);
103 return ret;
104 }
105
106 /*
107 * Add buffers to the outgoing ring, and submit a single bundle send that
108 * will finish when all of them have completed.
109 */
submit_sends_br(struct kdigest * kdigest,int * write_idx,int * inflight)110 static void submit_sends_br(struct kdigest *kdigest, int *write_idx,
111 int *inflight)
112 {
113 struct io_uring_buf_ring *br = kdigest->br;
114 struct req *req, *first_req = NULL;
115 struct io_uring_sqe *sqe;
116 int nr = 0;
117
118 /*
119 * Find any completed reads, and add the buffers to the outgoing
120 * send ring. That will serialize the data sent.
121 */
122 while (kdigest->reqs[*write_idx].state == IO_READ_COMPLETE) {
123 req = &kdigest->reqs[*write_idx];
124 io_uring_buf_ring_add(br, req->iov.iov_base, req->iov.iov_len,
125 *write_idx, BID_MASK, nr++);
126 /*
127 * Mark as a write/send if it's the first one, that serve
128 * as the "barrier" in the array. The rest can be marked
129 * complete upfront, if there's more in this bundle, as
130 * the first will serve a the stopping point.
131 */
132 if (!first_req) {
133 req->state = IO_WRITE;
134 first_req = req;
135 } else {
136 req->state = IO_WRITE_COMPLETE;
137 }
138 *write_idx = (*write_idx + 1) % QD;
139 }
140
141 /*
142 * If any completed reads were found and we added buffers, advance
143 * the buffer ring and prepare a single bundle send for all of them.
144 */
145 if (first_req) {
146 io_uring_buf_ring_advance(br, nr);
147
148 sqe = io_uring_get_sqe(&kdigest->ring);
149 io_uring_prep_send_bundle(sqe, outfd, 0, MSG_MORE);
150 sqe->flags |= IOSQE_BUFFER_SELECT;
151 sqe->buf_group = BGID;
152 io_uring_sqe_set_data(sqe, first_req);
153 (*inflight)++;
154 }
155 }
156
157 /*
158 * Serialize multiple writes with IOSQE_IO_LINK. Not the most efficient
159 * way, as it's both more expensive on the kernel side to handle link, and
160 * if there's bundle support, all of the below can be done with a single
161 * send rather than multiple ones.
162 */
submit_sends_linked(struct kdigest * kdigest,int * write_idx,int * inflight)163 static void submit_sends_linked(struct kdigest *kdigest, int *write_idx,
164 int *inflight)
165 {
166 struct io_uring_sqe *sqe;
167 struct req *req;
168
169 /* Queue up any possible writes. Link flag ensures ordering. */
170 sqe = NULL;
171 while (kdigest->reqs[*write_idx].state == IO_READ_COMPLETE) {
172 if (sqe)
173 sqe->flags |= IOSQE_IO_LINK;
174
175 req = &kdigest->reqs[*write_idx];
176 req->state = IO_WRITE;
177 sqe = io_uring_get_sqe(&kdigest->ring);
178 io_uring_prep_send(sqe, outfd, req->iov.iov_base,
179 req->iov.iov_len, MSG_MORE);
180 io_uring_sqe_set_data(sqe, req);
181 (*inflight)++;
182
183 *write_idx = (*write_idx + 1) % QD;
184 }
185 }
186
submit_sends(struct kdigest * kdigest,int * write_idx,int * inflight)187 static void submit_sends(struct kdigest *kdigest, int *write_idx, int *inflight)
188 {
189 if (kdigest->br)
190 submit_sends_br(kdigest, write_idx, inflight);
191 else
192 submit_sends_linked(kdigest, write_idx, inflight);
193 }
194
digest_file(struct kdigest * kdigest,size_t insize)195 static int digest_file(struct kdigest *kdigest, size_t insize)
196 {
197 struct io_uring *ring = &kdigest->ring;
198 off_t read_off = 0;
199 size_t outsize = insize;
200 int read_idx = 0, write_idx = 0, inflight = 0;
201
202 while (outsize) {
203 struct io_uring_sqe *sqe;
204 struct req *req;
205 int to_wait;
206
207 submit_sends(kdigest, &write_idx, &inflight);
208
209 /* Queue up any reads. Completions may arrive out of order. */
210 while (insize && (kdigest->reqs[read_idx].state == IO_INIT
211 || kdigest->reqs[read_idx].state == IO_WRITE_COMPLETE)) {
212 size_t this_size = (insize < BS ? insize : BS);
213
214 req = &kdigest->reqs[read_idx];
215 req->state = IO_READ;
216 req->offset = read_off;
217 req->iov.iov_base = &kdigest->bufs[read_idx * BS];
218 req->iov.iov_len = this_size;
219
220 sqe = io_uring_get_sqe(ring);
221 io_uring_prep_read(sqe, infd, req->iov.iov_base,
222 req->iov.iov_len, read_off);
223 io_uring_sqe_set_data(sqe, req);
224
225 read_off += this_size;
226 insize -= this_size;
227 inflight++;
228
229 read_idx = (read_idx + 1) % QD;
230 }
231
232 /* wait for about half queue completion before resubmit */
233 for (to_wait = (inflight >> 1) | 1; to_wait; to_wait--) {
234 int ret, wait_nr;
235
236 wait_nr = inflight;
237 if (wait_nr > WAIT_BATCH)
238 wait_nr = WAIT_BATCH;
239
240 ret = io_uring_submit_and_wait(ring, wait_nr);
241 if (ret < 0) {
242 fprintf(stderr, "wait cqe: %s\n",
243 strerror(-ret));
244 return 1;
245 }
246
247 if (reap_completions(ring, &inflight, &outsize))
248 return 1;
249 }
250 }
251 assert(!inflight);
252
253 return 0;
254 }
255
get_result(struct kdigest * kdigest,const char * alg,const char * file)256 static int get_result(struct kdigest *kdigest, const char *alg, const char *file)
257 {
258 struct io_uring *ring = &kdigest->ring;
259 struct io_uring_sqe *sqe;
260 struct io_uring_cqe *cqe;
261 int i, ret;
262 /* reuse I/O buf block to stash hash result */
263
264 sqe = io_uring_get_sqe(ring);
265 io_uring_prep_recv(sqe, outfd, kdigest->bufs, BS, 0);
266
267 if (io_uring_submit_and_wait(ring, 1) < 0)
268 return 1;
269
270 ret = io_uring_peek_cqe(ring, &cqe);
271 if (ret < 0) {
272 fprintf(stderr, "peek cqe: %s\n", strerror(-ret));
273 return 1;
274 }
275
276 if (cqe->res < 0) {
277 fprintf(stderr, "cqe error: %s\n", strerror(-cqe->res));
278 goto err;
279 }
280
281 fprintf(stdout, "uring %s%s(%s) returned(len=%u): ",
282 kdigest->br ? "bundled " : "", alg, file, cqe->res);
283 for (i = 0; i < cqe->res; i++)
284 fprintf(stdout, "%02x", kdigest->bufs[i]);
285 putc('\n', stdout);
286 ret = 0;
287 err:
288 io_uring_cqe_seen(ring, cqe);
289 return ret;
290 }
291
main(int argc,char * argv[])292 int main(int argc, char *argv[])
293 {
294 const char *alg;
295 const char *infile;
296 size_t alg_len, insize;
297 struct sockaddr_alg sa = {
298 .salg_family = AF_ALG,
299 .salg_type = "hash",
300 };
301 struct kdigest kdigest = { };
302 struct io_uring_params p = { };
303 int sfd, ret;
304
305 if (argc < 3) {
306 fprintf(stderr, "%s: algorithm infile\n", argv[0]);
307 return 1;
308 }
309
310 alg = argv[1];
311 infile = argv[2];
312 alg_len = strlen(alg);
313 if (alg_len >= sizeof(sa.salg_name)) {
314 fprintf(stderr, "algorithm name too long\n");
315 return 1;
316 }
317 /* +1 for null terminator */
318 memcpy(sa.salg_name, alg, alg_len + 1);
319
320 infd = open(infile, O_RDONLY);
321 if (infd < 0) {
322 perror("open infile");
323 return 1;
324 }
325
326 sfd = socket(AF_ALG, SOCK_SEQPACKET, 0);
327 if (sfd < 0) {
328 if (errno == EAFNOSUPPORT)
329 fprintf(stderr, "kernel AF_ALG support not available. "
330 "CONFIG_CRYPTO_USER_API_HASH required.\n");
331 else
332 perror("AF_ALG socket");
333 return 1;
334 }
335
336 if (bind(sfd, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
337 if (errno == ENOENT)
338 fprintf(stderr, "AF_ALG bind(%s): hash not available. "
339 "See /proc/crypto hash algorithm list.\n",
340 alg);
341 else
342 fprintf(stderr, "AF_ALG bind(%s): %s\n",
343 alg, strerror(errno));
344 return 1;
345 }
346
347 outfd = accept(sfd, NULL, 0);
348 if (outfd < 0) {
349 perror("AF_ALG accept");
350 return 1;
351 }
352
353 if (posix_memalign((void **)&kdigest.bufs, 4096, QD * BS)) {
354 fprintf(stderr, "failed to alloc I/O bufs\n");
355 return 1;
356 }
357
358 p.flags = IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN;
359 do {
360 ret = io_uring_queue_init_params(QD, &kdigest.ring, &p);
361 if (!ret)
362 break;
363 if (!p.flags) {
364 fprintf(stderr, "queue_init: %s\n", strerror(-ret));
365 return 1;
366 }
367 p.flags = 0;
368 } while (1);
369
370 /* use send bundles, if available */
371 if (p.features & IORING_FEAT_RECVSEND_BUNDLE) {
372 kdigest.br = io_uring_setup_buf_ring(&kdigest.ring, QD, BGID, 0, &ret);
373 if (!kdigest.br) {
374 fprintf(stderr, "Failed setting up bundle buffer ring: %d\n", ret);
375 return 1;
376 }
377 }
378
379 if (get_file_size(infd, &insize))
380 return 1;
381
382 ret = digest_file(&kdigest, insize);
383 if (ret) {
384 fprintf(stderr, "%s digest failed\n", alg);
385 return 1;
386 }
387
388 ret = get_result(&kdigest, alg, infile);
389 if (ret) {
390 fprintf(stderr, "failed to retrieve %s digest result\n", alg);
391 return 1;
392 }
393
394 if (kdigest.br)
395 io_uring_free_buf_ring(&kdigest.ring, kdigest.br, QD, BGID);
396 io_uring_queue_exit(&kdigest.ring);
397 free(kdigest.bufs);
398 if (close(infd) < 0)
399 ret |= 1;
400 if (close(sfd) < 0)
401 ret |= 1;
402 if (close(outfd) < 0)
403 ret |= 1;
404 return ret;
405 }
406