1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright 2020-21 IBM Corp.
4 */
5
6 #define pr_fmt(fmt) "vas: " fmt
7
8 #include <linux/module.h>
9 #include <linux/kernel.h>
10 #include <linux/export.h>
11 #include <linux/types.h>
12 #include <linux/delay.h>
13 #include <linux/slab.h>
14 #include <linux/interrupt.h>
15 #include <linux/irqdomain.h>
16 #include <asm/machdep.h>
17 #include <asm/hvcall.h>
18 #include <asm/plpar_wrappers.h>
19 #include <asm/vas.h>
20 #include "vas.h"
21
22 #define VAS_INVALID_WIN_ADDRESS 0xFFFFFFFFFFFFFFFFul
23 #define VAS_DEFAULT_DOMAIN_ID 0xFFFFFFFFFFFFFFFFul
24 /* The hypervisor allows one credit per window right now */
25 #define DEF_WIN_CREDS 1
26
27 static struct vas_all_caps caps_all;
28 static bool copypaste_feat;
29
30 static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE];
31 static DEFINE_MUTEX(vas_pseries_mutex);
32
hcall_return_busy_check(long rc)33 static long hcall_return_busy_check(long rc)
34 {
35 /* Check if we are stalled for some time */
36 if (H_IS_LONG_BUSY(rc)) {
37 msleep(get_longbusy_msecs(rc));
38 rc = H_BUSY;
39 } else if (rc == H_BUSY) {
40 cond_resched();
41 }
42
43 return rc;
44 }
45
46 /*
47 * Allocate VAS window hcall
48 */
h_allocate_vas_window(struct pseries_vas_window * win,u64 * domain,u8 wintype,u16 credits)49 static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain,
50 u8 wintype, u16 credits)
51 {
52 long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
53 long rc;
54
55 do {
56 rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype,
57 credits, domain[0], domain[1], domain[2],
58 domain[3], domain[4], domain[5]);
59
60 rc = hcall_return_busy_check(rc);
61 } while (rc == H_BUSY);
62
63 if (rc == H_SUCCESS) {
64 if (win->win_addr == VAS_INVALID_WIN_ADDRESS) {
65 pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n");
66 return -ENOTSUPP;
67 }
68 win->vas_win.winid = retbuf[0];
69 win->win_addr = retbuf[1];
70 win->complete_irq = retbuf[2];
71 win->fault_irq = retbuf[3];
72 return 0;
73 }
74
75 pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n",
76 rc, wintype, credits);
77
78 return -EIO;
79 }
80
81 /*
82 * Deallocate VAS window hcall.
83 */
h_deallocate_vas_window(u64 winid)84 static int h_deallocate_vas_window(u64 winid)
85 {
86 long rc;
87
88 do {
89 rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid);
90
91 rc = hcall_return_busy_check(rc);
92 } while (rc == H_BUSY);
93
94 if (rc == H_SUCCESS)
95 return 0;
96
97 pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n",
98 rc, winid);
99 return -EIO;
100 }
101
102 /*
103 * Modify VAS window.
104 * After the window is opened with allocate window hcall, configure it
105 * with flags and LPAR PID before using.
106 */
h_modify_vas_window(struct pseries_vas_window * win)107 static int h_modify_vas_window(struct pseries_vas_window *win)
108 {
109 long rc;
110 u32 lpid = mfspr(SPRN_PID);
111
112 /*
113 * AMR value is not supported in Linux VAS implementation.
114 * The hypervisor ignores it if 0 is passed.
115 */
116 do {
117 rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW,
118 win->vas_win.winid, lpid, 0,
119 VAS_MOD_WIN_FLAGS, 0);
120
121 rc = hcall_return_busy_check(rc);
122 } while (rc == H_BUSY);
123
124 if (rc == H_SUCCESS)
125 return 0;
126
127 pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u lpid %u\n",
128 rc, win->vas_win.winid, lpid);
129 return -EIO;
130 }
131
132 /*
133 * This hcall is used to determine the capabilities from the hypervisor.
134 * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES
135 * @query_type: If 0 is passed, the hypervisor returns the overall
136 * capabilities which provides all feature(s) that are
137 * available. Then query the hypervisor to get the
138 * corresponding capabilities for the specific feature.
139 * Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS
140 * and VAS GZIP Default capabilities.
141 * H_QUERY_NX_CAPABILITIES provides NX GZIP
142 * capabilities.
143 * @result: Return buffer to save capabilities.
144 */
h_query_vas_capabilities(const u64 hcall,u8 query_type,u64 result)145 int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result)
146 {
147 long rc;
148
149 rc = plpar_hcall_norets(hcall, query_type, result);
150
151 if (rc == H_SUCCESS)
152 return 0;
153
154 pr_err("HCALL(%llx) error %ld, query_type %u, result buffer 0x%llx\n",
155 hcall, rc, query_type, result);
156 return -EIO;
157 }
158 EXPORT_SYMBOL_GPL(h_query_vas_capabilities);
159
160 /*
161 * hcall to get fault CRB from the hypervisor.
162 */
h_get_nx_fault(u32 winid,u64 buffer)163 static int h_get_nx_fault(u32 winid, u64 buffer)
164 {
165 long rc;
166
167 rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer);
168
169 if (rc == H_SUCCESS)
170 return 0;
171
172 pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n",
173 rc, winid, buffer);
174 return -EIO;
175
176 }
177
178 /*
179 * Handle the fault interrupt.
180 * When the fault interrupt is received for each window, query the
181 * hypervisor to get the fault CRB on the specific fault. Then
182 * process the CRB by updating CSB or send signal if the user space
183 * CSB is invalid.
184 * Note: The hypervisor forwards an interrupt for each fault request.
185 * So one fault CRB to process for each H_GET_NX_FAULT hcall.
186 */
pseries_vas_fault_thread_fn(int irq,void * data)187 static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
188 {
189 struct pseries_vas_window *txwin = data;
190 struct coprocessor_request_block crb;
191 struct vas_user_win_ref *tsk_ref;
192 int rc;
193
194 rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb));
195 if (!rc) {
196 tsk_ref = &txwin->vas_win.task_ref;
197 vas_dump_crb(&crb);
198 vas_update_csb(&crb, tsk_ref);
199 }
200
201 return IRQ_HANDLED;
202 }
203
204 /*
205 * Allocate window and setup IRQ mapping.
206 */
allocate_setup_window(struct pseries_vas_window * txwin,u64 * domain,u8 wintype)207 static int allocate_setup_window(struct pseries_vas_window *txwin,
208 u64 *domain, u8 wintype)
209 {
210 int rc;
211
212 rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS);
213 if (rc)
214 return rc;
215 /*
216 * On PowerVM, the hypervisor setup and forwards the fault
217 * interrupt per window. So the IRQ setup and fault handling
218 * will be done for each open window separately.
219 */
220 txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq);
221 if (!txwin->fault_virq) {
222 pr_err("Failed irq mapping %d\n", txwin->fault_irq);
223 rc = -EINVAL;
224 goto out_win;
225 }
226
227 txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d",
228 txwin->vas_win.winid);
229 if (!txwin->name) {
230 rc = -ENOMEM;
231 goto out_irq;
232 }
233
234 rc = request_threaded_irq(txwin->fault_virq, NULL,
235 pseries_vas_fault_thread_fn, IRQF_ONESHOT,
236 txwin->name, txwin);
237 if (rc) {
238 pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n",
239 txwin->vas_win.winid, txwin->fault_virq, rc);
240 goto out_free;
241 }
242
243 txwin->vas_win.wcreds_max = DEF_WIN_CREDS;
244
245 return 0;
246 out_free:
247 kfree(txwin->name);
248 out_irq:
249 irq_dispose_mapping(txwin->fault_virq);
250 out_win:
251 h_deallocate_vas_window(txwin->vas_win.winid);
252 return rc;
253 }
254
free_irq_setup(struct pseries_vas_window * txwin)255 static inline void free_irq_setup(struct pseries_vas_window *txwin)
256 {
257 free_irq(txwin->fault_virq, txwin);
258 kfree(txwin->name);
259 irq_dispose_mapping(txwin->fault_virq);
260 }
261
vas_allocate_window(int vas_id,u64 flags,enum vas_cop_type cop_type)262 static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
263 enum vas_cop_type cop_type)
264 {
265 long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
266 struct vas_cop_feat_caps *cop_feat_caps;
267 struct vas_caps *caps;
268 struct pseries_vas_window *txwin;
269 int rc;
270
271 txwin = kzalloc(sizeof(*txwin), GFP_KERNEL);
272 if (!txwin)
273 return ERR_PTR(-ENOMEM);
274
275 /*
276 * A VAS window can have many credits which means that many
277 * requests can be issued simultaneously. But the hypervisor
278 * restricts one credit per window.
279 * The hypervisor introduces 2 different types of credits:
280 * Default credit type (Uses normal priority FIFO):
281 * A limited number of credits are assigned to partitions
282 * based on processor entitlement. But these credits may be
283 * over-committed on a system depends on whether the CPUs
284 * are in shared or dedicated modes - that is, more requests
285 * may be issued across the system than NX can service at
286 * once which can result in paste command failure (RMA_busy).
287 * Then the process has to resend requests or fall-back to
288 * SW compression.
289 * Quality of Service (QoS) credit type (Uses high priority FIFO):
290 * To avoid NX HW contention, the system admins can assign
291 * QoS credits for each LPAR so that this partition is
292 * guaranteed access to NX resources. These credits are
293 * assigned to partitions via the HMC.
294 * Refer PAPR for more information.
295 *
296 * Allocate window with QoS credits if user requested. Otherwise
297 * default credits are used.
298 */
299 if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT)
300 caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE];
301 else
302 caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE];
303
304 cop_feat_caps = &caps->caps;
305
306 if (atomic_inc_return(&cop_feat_caps->used_lpar_creds) >
307 atomic_read(&cop_feat_caps->target_lpar_creds)) {
308 pr_err("Credits are not available to allocate window\n");
309 rc = -EINVAL;
310 goto out;
311 }
312
313 if (vas_id == -1) {
314 /*
315 * The user space is requesting to allocate a window on
316 * a VAS instance where the process is executing.
317 * On PowerVM, domain values are passed to the hypervisor
318 * to select VAS instance. Useful if the process is
319 * affinity to NUMA node.
320 * The hypervisor selects VAS instance if
321 * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values.
322 * The h_allocate_vas_window hcall is defined to take a
323 * domain values as specified by h_home_node_associativity,
324 * So no unpacking needs to be done.
325 */
326 rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain,
327 VPHN_FLAG_VCPU, hard_smp_processor_id());
328 if (rc != H_SUCCESS) {
329 pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc);
330 goto out;
331 }
332 }
333
334 /*
335 * Allocate / Deallocate window hcalls and setup / free IRQs
336 * have to be protected with mutex.
337 * Open VAS window: Allocate window hcall and setup IRQ
338 * Close VAS window: Deallocate window hcall and free IRQ
339 * The hypervisor waits until all NX requests are
340 * completed before closing the window. So expects OS
341 * to handle NX faults, means IRQ can be freed only
342 * after the deallocate window hcall is returned.
343 * So once the window is closed with deallocate hcall before
344 * the IRQ is freed, it can be assigned to new allocate
345 * hcall with the same fault IRQ by the hypervisor. It can
346 * result in setup IRQ fail for the new window since the
347 * same fault IRQ is not freed by the OS before.
348 */
349 mutex_lock(&vas_pseries_mutex);
350 rc = allocate_setup_window(txwin, (u64 *)&domain[0],
351 cop_feat_caps->win_type);
352 mutex_unlock(&vas_pseries_mutex);
353 if (rc)
354 goto out;
355
356 /*
357 * Modify window and it is ready to use.
358 */
359 rc = h_modify_vas_window(txwin);
360 if (!rc)
361 rc = get_vas_user_win_ref(&txwin->vas_win.task_ref);
362 if (rc)
363 goto out_free;
364
365 vas_user_win_add_mm_context(&txwin->vas_win.task_ref);
366 txwin->win_type = cop_feat_caps->win_type;
367 mutex_lock(&vas_pseries_mutex);
368 list_add(&txwin->win_list, &caps->list);
369 mutex_unlock(&vas_pseries_mutex);
370
371 return &txwin->vas_win;
372
373 out_free:
374 /*
375 * Window is not operational. Free IRQ before closing
376 * window so that do not have to hold mutex.
377 */
378 free_irq_setup(txwin);
379 h_deallocate_vas_window(txwin->vas_win.winid);
380 out:
381 atomic_dec(&cop_feat_caps->used_lpar_creds);
382 kfree(txwin);
383 return ERR_PTR(rc);
384 }
385
vas_paste_address(struct vas_window * vwin)386 static u64 vas_paste_address(struct vas_window *vwin)
387 {
388 struct pseries_vas_window *win;
389
390 win = container_of(vwin, struct pseries_vas_window, vas_win);
391 return win->win_addr;
392 }
393
deallocate_free_window(struct pseries_vas_window * win)394 static int deallocate_free_window(struct pseries_vas_window *win)
395 {
396 int rc = 0;
397
398 /*
399 * The hypervisor waits for all requests including faults
400 * are processed before closing the window - Means all
401 * credits have to be returned. In the case of fault
402 * request, a credit is returned after OS issues
403 * H_GET_NX_FAULT hcall.
404 * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW
405 * hcall.
406 */
407 rc = h_deallocate_vas_window(win->vas_win.winid);
408 if (!rc)
409 free_irq_setup(win);
410
411 return rc;
412 }
413
vas_deallocate_window(struct vas_window * vwin)414 static int vas_deallocate_window(struct vas_window *vwin)
415 {
416 struct pseries_vas_window *win;
417 struct vas_cop_feat_caps *caps;
418 int rc = 0;
419
420 if (!vwin)
421 return -EINVAL;
422
423 win = container_of(vwin, struct pseries_vas_window, vas_win);
424
425 /* Should not happen */
426 if (win->win_type >= VAS_MAX_FEAT_TYPE) {
427 pr_err("Window (%u): Invalid window type %u\n",
428 vwin->winid, win->win_type);
429 return -EINVAL;
430 }
431
432 caps = &vascaps[win->win_type].caps;
433 mutex_lock(&vas_pseries_mutex);
434 rc = deallocate_free_window(win);
435 if (rc) {
436 mutex_unlock(&vas_pseries_mutex);
437 return rc;
438 }
439
440 list_del(&win->win_list);
441 atomic_dec(&caps->used_lpar_creds);
442 mutex_unlock(&vas_pseries_mutex);
443
444 mm_context_remove_vas_window(vwin->task_ref.mm);
445 put_vas_user_win_ref(&vwin->task_ref);
446
447 kfree(win);
448 return 0;
449 }
450
451 static const struct vas_user_win_ops vops_pseries = {
452 .open_win = vas_allocate_window, /* Open and configure window */
453 .paste_addr = vas_paste_address, /* To do copy/paste */
454 .close_win = vas_deallocate_window, /* Close window */
455 };
456
457 /*
458 * Supporting only nx-gzip coprocessor type now, but this API code
459 * extended to other coprocessor types later.
460 */
vas_register_api_pseries(struct module * mod,enum vas_cop_type cop_type,const char * name)461 int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type,
462 const char *name)
463 {
464 int rc;
465
466 if (!copypaste_feat)
467 return -ENOTSUPP;
468
469 rc = vas_register_coproc_api(mod, cop_type, name, &vops_pseries);
470
471 return rc;
472 }
473 EXPORT_SYMBOL_GPL(vas_register_api_pseries);
474
vas_unregister_api_pseries(void)475 void vas_unregister_api_pseries(void)
476 {
477 vas_unregister_coproc_api();
478 }
479 EXPORT_SYMBOL_GPL(vas_unregister_api_pseries);
480
481 /*
482 * Get the specific capabilities based on the feature type.
483 * Right now supports GZIP default and GZIP QoS capabilities.
484 */
get_vas_capabilities(u8 feat,enum vas_cop_feat_type type,struct hv_vas_cop_feat_caps * hv_caps)485 static int get_vas_capabilities(u8 feat, enum vas_cop_feat_type type,
486 struct hv_vas_cop_feat_caps *hv_caps)
487 {
488 struct vas_cop_feat_caps *caps;
489 struct vas_caps *vcaps;
490 int rc = 0;
491
492 vcaps = &vascaps[type];
493 memset(vcaps, 0, sizeof(*vcaps));
494 INIT_LIST_HEAD(&vcaps->list);
495
496 caps = &vcaps->caps;
497
498 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat,
499 (u64)virt_to_phys(hv_caps));
500 if (rc)
501 return rc;
502
503 caps->user_mode = hv_caps->user_mode;
504 if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) {
505 pr_err("User space COPY/PASTE is not supported\n");
506 return -ENOTSUPP;
507 }
508
509 caps->descriptor = be64_to_cpu(hv_caps->descriptor);
510 caps->win_type = hv_caps->win_type;
511 if (caps->win_type >= VAS_MAX_FEAT_TYPE) {
512 pr_err("Unsupported window type %u\n", caps->win_type);
513 return -EINVAL;
514 }
515 caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds);
516 caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds);
517 atomic_set(&caps->target_lpar_creds,
518 be16_to_cpu(hv_caps->target_lpar_creds));
519 if (feat == VAS_GZIP_DEF_FEAT) {
520 caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds);
521
522 if (caps->max_win_creds < DEF_WIN_CREDS) {
523 pr_err("Window creds(%u) > max allowed window creds(%u)\n",
524 DEF_WIN_CREDS, caps->max_win_creds);
525 return -EINVAL;
526 }
527 }
528
529 copypaste_feat = true;
530
531 return 0;
532 }
533
pseries_vas_init(void)534 static int __init pseries_vas_init(void)
535 {
536 struct hv_vas_cop_feat_caps *hv_cop_caps;
537 struct hv_vas_all_caps *hv_caps;
538 int rc;
539
540 /*
541 * Linux supports user space COPY/PASTE only with Radix
542 */
543 if (!radix_enabled()) {
544 pr_err("API is supported only with radix page tables\n");
545 return -ENOTSUPP;
546 }
547
548 hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL);
549 if (!hv_caps)
550 return -ENOMEM;
551 /*
552 * Get VAS overall capabilities by passing 0 to feature type.
553 */
554 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0,
555 (u64)virt_to_phys(hv_caps));
556 if (rc)
557 goto out;
558
559 caps_all.descriptor = be64_to_cpu(hv_caps->descriptor);
560 caps_all.feat_type = be64_to_cpu(hv_caps->feat_type);
561
562 hv_cop_caps = kmalloc(sizeof(*hv_cop_caps), GFP_KERNEL);
563 if (!hv_cop_caps) {
564 rc = -ENOMEM;
565 goto out;
566 }
567 /*
568 * QOS capabilities available
569 */
570 if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) {
571 rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT,
572 VAS_GZIP_QOS_FEAT_TYPE, hv_cop_caps);
573
574 if (rc)
575 goto out_cop;
576 }
577 /*
578 * Default capabilities available
579 */
580 if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT) {
581 rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT,
582 VAS_GZIP_DEF_FEAT_TYPE, hv_cop_caps);
583 if (rc)
584 goto out_cop;
585 }
586
587 pr_info("GZIP feature is available\n");
588
589 out_cop:
590 kfree(hv_cop_caps);
591 out:
592 kfree(hv_caps);
593 return rc;
594 }
595 machine_device_initcall(pseries, pseries_vas_init);
596