• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright 2016-2019 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7 
8 #include "habanalabs.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 
11 #include <linux/genalloc.h>
12 #include <linux/slab.h>
13 
14 static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr);
15 
get_pgt_info(struct hl_ctx * ctx,u64 hop_addr)16 static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr)
17 {
18 	struct pgt_info *pgt_info = NULL;
19 
20 	hash_for_each_possible(ctx->mmu_shadow_hash, pgt_info, node,
21 				(unsigned long) hop_addr)
22 		if (hop_addr == pgt_info->shadow_addr)
23 			break;
24 
25 	return pgt_info;
26 }
27 
_free_hop(struct hl_ctx * ctx,struct pgt_info * pgt_info)28 static void _free_hop(struct hl_ctx *ctx, struct pgt_info *pgt_info)
29 {
30 	struct hl_device *hdev = ctx->hdev;
31 
32 	gen_pool_free(hdev->mmu_priv.mmu_pgt_pool, pgt_info->phys_addr,
33 			hdev->asic_prop.mmu_hop_table_size);
34 	hash_del(&pgt_info->node);
35 	kfree((u64 *) (uintptr_t) pgt_info->shadow_addr);
36 	kfree(pgt_info);
37 }
38 
free_hop(struct hl_ctx * ctx,u64 hop_addr)39 static void free_hop(struct hl_ctx *ctx, u64 hop_addr)
40 {
41 	struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
42 
43 	_free_hop(ctx, pgt_info);
44 }
45 
alloc_hop(struct hl_ctx * ctx)46 static u64 alloc_hop(struct hl_ctx *ctx)
47 {
48 	struct hl_device *hdev = ctx->hdev;
49 	struct asic_fixed_properties *prop = &hdev->asic_prop;
50 	struct pgt_info *pgt_info;
51 	u64 phys_addr, shadow_addr;
52 
53 	pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL);
54 	if (!pgt_info)
55 		return ULLONG_MAX;
56 
57 	phys_addr = (u64) gen_pool_alloc(hdev->mmu_priv.mmu_pgt_pool,
58 					prop->mmu_hop_table_size);
59 	if (!phys_addr) {
60 		dev_err(hdev->dev, "failed to allocate page\n");
61 		goto pool_add_err;
62 	}
63 
64 	shadow_addr = (u64) (uintptr_t) kzalloc(prop->mmu_hop_table_size,
65 						GFP_KERNEL);
66 	if (!shadow_addr)
67 		goto shadow_err;
68 
69 	pgt_info->phys_addr = phys_addr;
70 	pgt_info->shadow_addr = shadow_addr;
71 	pgt_info->ctx = ctx;
72 	pgt_info->num_of_ptes = 0;
73 	hash_add(ctx->mmu_shadow_hash, &pgt_info->node, shadow_addr);
74 
75 	return shadow_addr;
76 
77 shadow_err:
78 	gen_pool_free(hdev->mmu_priv.mmu_pgt_pool, phys_addr,
79 			prop->mmu_hop_table_size);
80 pool_add_err:
81 	kfree(pgt_info);
82 
83 	return ULLONG_MAX;
84 }
85 
get_phys_hop0_addr(struct hl_ctx * ctx)86 static inline u64 get_phys_hop0_addr(struct hl_ctx *ctx)
87 {
88 	return ctx->hdev->asic_prop.mmu_pgt_addr +
89 			(ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
90 }
91 
get_hop0_addr(struct hl_ctx * ctx)92 static inline u64 get_hop0_addr(struct hl_ctx *ctx)
93 {
94 	return (u64) (uintptr_t) ctx->hdev->mmu_priv.mmu_shadow_hop0 +
95 			(ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
96 }
97 
flush(struct hl_ctx * ctx)98 static void flush(struct hl_ctx *ctx)
99 {
100 	/* flush all writes from all cores to reach PCI */
101 	mb();
102 	ctx->hdev->asic_funcs->read_pte(ctx->hdev, get_phys_hop0_addr(ctx));
103 }
104 
105 /* transform the value to physical address when writing to H/W */
write_pte(struct hl_ctx * ctx,u64 shadow_pte_addr,u64 val)106 static inline void write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val)
107 {
108 	/*
109 	 * The value to write is actually the address of the next shadow hop +
110 	 * flags at the 12 LSBs.
111 	 * Hence in order to get the value to write to the physical PTE, we
112 	 * clear the 12 LSBs and translate the shadow hop to its associated
113 	 * physical hop, and add back the original 12 LSBs.
114 	 */
115 	u64 phys_val = get_phys_addr(ctx, val & HOP_PHYS_ADDR_MASK) |
116 				(val & FLAGS_MASK);
117 
118 	ctx->hdev->asic_funcs->write_pte(ctx->hdev,
119 					get_phys_addr(ctx, shadow_pte_addr),
120 					phys_val);
121 
122 	*(u64 *) (uintptr_t) shadow_pte_addr = val;
123 }
124 
125 /* do not transform the value to physical address when writing to H/W */
write_final_pte(struct hl_ctx * ctx,u64 shadow_pte_addr,u64 val)126 static inline void write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr,
127 					u64 val)
128 {
129 	ctx->hdev->asic_funcs->write_pte(ctx->hdev,
130 					get_phys_addr(ctx, shadow_pte_addr),
131 					val);
132 	*(u64 *) (uintptr_t) shadow_pte_addr = val;
133 }
134 
135 /* clear the last and present bits */
clear_pte(struct hl_ctx * ctx,u64 pte_addr)136 static inline void clear_pte(struct hl_ctx *ctx, u64 pte_addr)
137 {
138 	/* no need to transform the value to physical address */
139 	write_final_pte(ctx, pte_addr, 0);
140 }
141 
get_pte(struct hl_ctx * ctx,u64 hop_addr)142 static inline void get_pte(struct hl_ctx *ctx, u64 hop_addr)
143 {
144 	get_pgt_info(ctx, hop_addr)->num_of_ptes++;
145 }
146 
147 /*
148  * put_pte - decrement the num of ptes and free the hop if possible
149  *
150  * @ctx: pointer to the context structure
151  * @hop_addr: addr of the hop
152  *
153  * This function returns the number of ptes left on this hop. If the number is
154  * 0, it means the pte was freed.
155  */
put_pte(struct hl_ctx * ctx,u64 hop_addr)156 static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr)
157 {
158 	struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
159 	int num_of_ptes_left;
160 
161 	pgt_info->num_of_ptes--;
162 
163 	/*
164 	 * Need to save the number of ptes left because free_hop might free
165 	 * the pgt_info
166 	 */
167 	num_of_ptes_left = pgt_info->num_of_ptes;
168 	if (!num_of_ptes_left)
169 		_free_hop(ctx, pgt_info);
170 
171 	return num_of_ptes_left;
172 }
173 
get_hopN_pte_addr(struct hl_ctx * ctx,u64 hop_addr,u64 virt_addr,u64 mask,u64 shift)174 static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
175 					u64 virt_addr, u64 mask, u64 shift)
176 {
177 	return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
178 			((virt_addr & mask) >> shift);
179 }
180 
get_hop0_pte_addr(struct hl_ctx * ctx,struct hl_mmu_properties * mmu_prop,u64 hop_addr,u64 vaddr)181 static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx,
182 					struct hl_mmu_properties *mmu_prop,
183 					u64 hop_addr, u64 vaddr)
184 {
185 	return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop0_mask,
186 					mmu_prop->hop0_shift);
187 }
188 
get_hop1_pte_addr(struct hl_ctx * ctx,struct hl_mmu_properties * mmu_prop,u64 hop_addr,u64 vaddr)189 static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx,
190 					struct hl_mmu_properties *mmu_prop,
191 					u64 hop_addr, u64 vaddr)
192 {
193 	return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop1_mask,
194 					mmu_prop->hop1_shift);
195 }
196 
get_hop2_pte_addr(struct hl_ctx * ctx,struct hl_mmu_properties * mmu_prop,u64 hop_addr,u64 vaddr)197 static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx,
198 					struct hl_mmu_properties *mmu_prop,
199 					u64 hop_addr, u64 vaddr)
200 {
201 	return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop2_mask,
202 					mmu_prop->hop2_shift);
203 }
204 
get_hop3_pte_addr(struct hl_ctx * ctx,struct hl_mmu_properties * mmu_prop,u64 hop_addr,u64 vaddr)205 static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx,
206 					struct hl_mmu_properties *mmu_prop,
207 					u64 hop_addr, u64 vaddr)
208 {
209 	return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop3_mask,
210 					mmu_prop->hop3_shift);
211 }
212 
get_hop4_pte_addr(struct hl_ctx * ctx,struct hl_mmu_properties * mmu_prop,u64 hop_addr,u64 vaddr)213 static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
214 					struct hl_mmu_properties *mmu_prop,
215 					u64 hop_addr, u64 vaddr)
216 {
217 	return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop4_mask,
218 					mmu_prop->hop4_shift);
219 }
220 
get_next_hop_addr(struct hl_ctx * ctx,u64 curr_pte)221 static inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte)
222 {
223 	if (curr_pte & PAGE_PRESENT_MASK)
224 		return curr_pte & HOP_PHYS_ADDR_MASK;
225 	else
226 		return ULLONG_MAX;
227 }
228 
get_alloc_next_hop_addr(struct hl_ctx * ctx,u64 curr_pte,bool * is_new_hop)229 static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte,
230 						bool *is_new_hop)
231 {
232 	u64 hop_addr = get_next_hop_addr(ctx, curr_pte);
233 
234 	if (hop_addr == ULLONG_MAX) {
235 		hop_addr = alloc_hop(ctx);
236 		*is_new_hop = (hop_addr != ULLONG_MAX);
237 	}
238 
239 	return hop_addr;
240 }
241 
242 /* translates shadow address inside hop to a physical address */
get_phys_addr(struct hl_ctx * ctx,u64 shadow_addr)243 static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr)
244 {
245 	u64 page_mask = (ctx->hdev->asic_prop.mmu_hop_table_size - 1);
246 	u64 shadow_hop_addr = shadow_addr & ~page_mask;
247 	u64 pte_offset = shadow_addr & page_mask;
248 	u64 phys_hop_addr;
249 
250 	if (shadow_hop_addr != get_hop0_addr(ctx))
251 		phys_hop_addr = get_pgt_info(ctx, shadow_hop_addr)->phys_addr;
252 	else
253 		phys_hop_addr = get_phys_hop0_addr(ctx);
254 
255 	return phys_hop_addr + pte_offset;
256 }
257 
dram_default_mapping_init(struct hl_ctx * ctx)258 static int dram_default_mapping_init(struct hl_ctx *ctx)
259 {
260 	struct hl_device *hdev = ctx->hdev;
261 	struct asic_fixed_properties *prop = &hdev->asic_prop;
262 	u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
263 		hop2_pte_addr, hop3_pte_addr, pte_val;
264 	int rc, i, j, hop3_allocated = 0;
265 
266 	if ((!hdev->dram_supports_virtual_memory) ||
267 			(!hdev->dram_default_page_mapping) ||
268 			(ctx->asid == HL_KERNEL_ASID_ID))
269 		return 0;
270 
271 	num_of_hop3 = prop->dram_size_for_default_page_mapping;
272 	do_div(num_of_hop3, prop->dram_page_size);
273 	do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
274 
275 	/* add hop1 and hop2 */
276 	total_hops = num_of_hop3 + 2;
277 
278 	ctx->dram_default_hops = kzalloc(HL_PTE_SIZE * total_hops,  GFP_KERNEL);
279 	if (!ctx->dram_default_hops)
280 		return -ENOMEM;
281 
282 	hop0_addr = get_hop0_addr(ctx);
283 
284 	hop1_addr = alloc_hop(ctx);
285 	if (hop1_addr == ULLONG_MAX) {
286 		dev_err(hdev->dev, "failed to alloc hop 1\n");
287 		rc = -ENOMEM;
288 		goto hop1_err;
289 	}
290 
291 	ctx->dram_default_hops[total_hops - 1] = hop1_addr;
292 
293 	hop2_addr = alloc_hop(ctx);
294 	if (hop2_addr == ULLONG_MAX) {
295 		dev_err(hdev->dev, "failed to alloc hop 2\n");
296 		rc = -ENOMEM;
297 		goto hop2_err;
298 	}
299 
300 	ctx->dram_default_hops[total_hops - 2] = hop2_addr;
301 
302 	for (i = 0 ; i < num_of_hop3 ; i++) {
303 		ctx->dram_default_hops[i] = alloc_hop(ctx);
304 		if (ctx->dram_default_hops[i] == ULLONG_MAX) {
305 			dev_err(hdev->dev, "failed to alloc hop 3, i: %d\n", i);
306 			rc = -ENOMEM;
307 			goto hop3_err;
308 		}
309 		hop3_allocated++;
310 	}
311 
312 	/* need only pte 0 in hops 0 and 1 */
313 	pte_val = (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
314 	write_pte(ctx, hop0_addr, pte_val);
315 
316 	pte_val = (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
317 	write_pte(ctx, hop1_addr, pte_val);
318 	get_pte(ctx, hop1_addr);
319 
320 	hop2_pte_addr = hop2_addr;
321 	for (i = 0 ; i < num_of_hop3 ; i++) {
322 		pte_val = (ctx->dram_default_hops[i] & HOP_PHYS_ADDR_MASK) |
323 				PAGE_PRESENT_MASK;
324 		write_pte(ctx, hop2_pte_addr, pte_val);
325 		get_pte(ctx, hop2_addr);
326 		hop2_pte_addr += HL_PTE_SIZE;
327 	}
328 
329 	pte_val = (prop->mmu_dram_default_page_addr & HOP_PHYS_ADDR_MASK) |
330 			LAST_MASK | PAGE_PRESENT_MASK;
331 
332 	for (i = 0 ; i < num_of_hop3 ; i++) {
333 		hop3_pte_addr = ctx->dram_default_hops[i];
334 		for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
335 			write_final_pte(ctx, hop3_pte_addr, pte_val);
336 			get_pte(ctx, ctx->dram_default_hops[i]);
337 			hop3_pte_addr += HL_PTE_SIZE;
338 		}
339 	}
340 
341 	flush(ctx);
342 
343 	return 0;
344 
345 hop3_err:
346 	for (i = 0 ; i < hop3_allocated ; i++)
347 		free_hop(ctx, ctx->dram_default_hops[i]);
348 
349 	free_hop(ctx, hop2_addr);
350 hop2_err:
351 	free_hop(ctx, hop1_addr);
352 hop1_err:
353 	kfree(ctx->dram_default_hops);
354 
355 	return rc;
356 }
357 
dram_default_mapping_fini(struct hl_ctx * ctx)358 static void dram_default_mapping_fini(struct hl_ctx *ctx)
359 {
360 	struct hl_device *hdev = ctx->hdev;
361 	struct asic_fixed_properties *prop = &hdev->asic_prop;
362 	u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
363 		hop2_pte_addr, hop3_pte_addr;
364 	int i, j;
365 
366 	if ((!hdev->dram_supports_virtual_memory) ||
367 			(!hdev->dram_default_page_mapping) ||
368 			(ctx->asid == HL_KERNEL_ASID_ID))
369 		return;
370 
371 	num_of_hop3 = prop->dram_size_for_default_page_mapping;
372 	do_div(num_of_hop3, prop->dram_page_size);
373 	do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
374 
375 	hop0_addr = get_hop0_addr(ctx);
376 	/* add hop1 and hop2 */
377 	total_hops = num_of_hop3 + 2;
378 	hop1_addr = ctx->dram_default_hops[total_hops - 1];
379 	hop2_addr = ctx->dram_default_hops[total_hops - 2];
380 
381 	for (i = 0 ; i < num_of_hop3 ; i++) {
382 		hop3_pte_addr = ctx->dram_default_hops[i];
383 		for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
384 			clear_pte(ctx, hop3_pte_addr);
385 			put_pte(ctx, ctx->dram_default_hops[i]);
386 			hop3_pte_addr += HL_PTE_SIZE;
387 		}
388 	}
389 
390 	hop2_pte_addr = hop2_addr;
391 	hop2_pte_addr = hop2_addr;
392 	for (i = 0 ; i < num_of_hop3 ; i++) {
393 		clear_pte(ctx, hop2_pte_addr);
394 		put_pte(ctx, hop2_addr);
395 		hop2_pte_addr += HL_PTE_SIZE;
396 	}
397 
398 	clear_pte(ctx, hop1_addr);
399 	put_pte(ctx, hop1_addr);
400 	clear_pte(ctx, hop0_addr);
401 
402 	kfree(ctx->dram_default_hops);
403 
404 	flush(ctx);
405 }
406 
407 /**
408  * hl_mmu_v1_init() - initialize the MMU module.
409  * @hdev: habanalabs device structure.
410  *
411  * This function does the following:
412  * - Create a pool of pages for pgt_infos.
413  * - Create a shadow table for pgt
414  *
415  * Return: 0 for success, non-zero for failure.
416  */
hl_mmu_v1_init(struct hl_device * hdev)417 static int hl_mmu_v1_init(struct hl_device *hdev)
418 {
419 	struct asic_fixed_properties *prop = &hdev->asic_prop;
420 	int rc;
421 
422 	hdev->mmu_priv.mmu_pgt_pool =
423 			gen_pool_create(__ffs(prop->mmu_hop_table_size), -1);
424 
425 	if (!hdev->mmu_priv.mmu_pgt_pool) {
426 		dev_err(hdev->dev, "Failed to create page gen pool\n");
427 		return -ENOMEM;
428 	}
429 
430 	rc = gen_pool_add(hdev->mmu_priv.mmu_pgt_pool, prop->mmu_pgt_addr +
431 			prop->mmu_hop0_tables_total_size,
432 			prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size,
433 			-1);
434 	if (rc) {
435 		dev_err(hdev->dev, "Failed to add memory to page gen pool\n");
436 		goto err_pool_add;
437 	}
438 
439 	hdev->mmu_priv.mmu_shadow_hop0 = kvmalloc_array(prop->max_asid,
440 						prop->mmu_hop_table_size,
441 						GFP_KERNEL | __GFP_ZERO);
442 	if (ZERO_OR_NULL_PTR(hdev->mmu_priv.mmu_shadow_hop0)) {
443 		rc = -ENOMEM;
444 		goto err_pool_add;
445 	}
446 
447 	/* MMU H/W init will be done in device hw_init() */
448 
449 	return 0;
450 
451 err_pool_add:
452 	gen_pool_destroy(hdev->mmu_priv.mmu_pgt_pool);
453 
454 	return rc;
455 }
456 
457 /**
458  * hl_mmu_fini() - release the MMU module.
459  * @hdev: habanalabs device structure.
460  *
461  * This function does the following:
462  * - Disable MMU in H/W.
463  * - Free the pgt_infos pool.
464  *
465  * All contexts should be freed before calling this function.
466  */
hl_mmu_v1_fini(struct hl_device * hdev)467 static void hl_mmu_v1_fini(struct hl_device *hdev)
468 {
469 	/* MMU H/W fini was already done in device hw_fini() */
470 
471 	kvfree(hdev->mmu_priv.mmu_shadow_hop0);
472 	gen_pool_destroy(hdev->mmu_priv.mmu_pgt_pool);
473 }
474 
475 /**
476  * hl_mmu_ctx_init() - initialize a context for using the MMU module.
477  * @ctx: pointer to the context structure to initialize.
478  *
479  * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all
480  * page tables hops related to this context.
481  * Return: 0 on success, non-zero otherwise.
482  */
hl_mmu_v1_ctx_init(struct hl_ctx * ctx)483 static int hl_mmu_v1_ctx_init(struct hl_ctx *ctx)
484 {
485 	mutex_init(&ctx->mmu_lock);
486 	hash_init(ctx->mmu_shadow_hash);
487 
488 	return dram_default_mapping_init(ctx);
489 }
490 
491 /*
492  * hl_mmu_ctx_fini - disable a ctx from using the mmu module
493  *
494  * @ctx: pointer to the context structure
495  *
496  * This function does the following:
497  * - Free any pgts which were not freed yet
498  * - Free the mutex
499  * - Free DRAM default page mapping hops
500  */
hl_mmu_v1_ctx_fini(struct hl_ctx * ctx)501 static void hl_mmu_v1_ctx_fini(struct hl_ctx *ctx)
502 {
503 	struct hl_device *hdev = ctx->hdev;
504 	struct pgt_info *pgt_info;
505 	struct hlist_node *tmp;
506 	int i;
507 
508 	dram_default_mapping_fini(ctx);
509 
510 	if (!hash_empty(ctx->mmu_shadow_hash))
511 		dev_err(hdev->dev, "ctx %d is freed while it has pgts in use\n",
512 			ctx->asid);
513 
514 	hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) {
515 		dev_err_ratelimited(hdev->dev,
516 			"pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
517 			pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);
518 		_free_hop(ctx, pgt_info);
519 	}
520 
521 	mutex_destroy(&ctx->mmu_lock);
522 }
523 
_hl_mmu_v1_unmap(struct hl_ctx * ctx,u64 virt_addr,bool is_dram_addr)524 static int _hl_mmu_v1_unmap(struct hl_ctx *ctx,
525 				u64 virt_addr, bool is_dram_addr)
526 {
527 	struct hl_device *hdev = ctx->hdev;
528 	struct asic_fixed_properties *prop = &hdev->asic_prop;
529 	struct hl_mmu_properties *mmu_prop;
530 	u64 hop0_addr = 0, hop0_pte_addr = 0,
531 		hop1_addr = 0, hop1_pte_addr = 0,
532 		hop2_addr = 0, hop2_pte_addr = 0,
533 		hop3_addr = 0, hop3_pte_addr = 0,
534 		hop4_addr = 0, hop4_pte_addr = 0,
535 		curr_pte;
536 	bool is_huge, clear_hop3 = true;
537 
538 	/* shifts and masks are the same in PMMU and HPMMU, use one of them */
539 	mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
540 
541 	hop0_addr = get_hop0_addr(ctx);
542 	hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
543 
544 	curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
545 
546 	hop1_addr = get_next_hop_addr(ctx, curr_pte);
547 
548 	if (hop1_addr == ULLONG_MAX)
549 		goto not_mapped;
550 
551 	hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
552 
553 	curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
554 
555 	hop2_addr = get_next_hop_addr(ctx, curr_pte);
556 
557 	if (hop2_addr == ULLONG_MAX)
558 		goto not_mapped;
559 
560 	hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
561 
562 	curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
563 
564 	hop3_addr = get_next_hop_addr(ctx, curr_pte);
565 
566 	if (hop3_addr == ULLONG_MAX)
567 		goto not_mapped;
568 
569 	hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
570 
571 	curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
572 
573 	is_huge = curr_pte & LAST_MASK;
574 
575 	if (is_dram_addr && !is_huge) {
576 		dev_err(hdev->dev,
577 				"DRAM unmapping should use huge pages only\n");
578 		return -EFAULT;
579 	}
580 
581 	if (!is_huge) {
582 		hop4_addr = get_next_hop_addr(ctx, curr_pte);
583 
584 		if (hop4_addr == ULLONG_MAX)
585 			goto not_mapped;
586 
587 		hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
588 							virt_addr);
589 
590 		curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
591 
592 		clear_hop3 = false;
593 	}
594 
595 	if (hdev->dram_default_page_mapping && is_dram_addr) {
596 		u64 default_pte = (prop->mmu_dram_default_page_addr &
597 				HOP_PHYS_ADDR_MASK) | LAST_MASK |
598 					PAGE_PRESENT_MASK;
599 		if (curr_pte == default_pte) {
600 			dev_err(hdev->dev,
601 				"DRAM: hop3 PTE points to zero page, can't unmap, va: 0x%llx\n",
602 					virt_addr);
603 			goto not_mapped;
604 		}
605 
606 		if (!(curr_pte & PAGE_PRESENT_MASK)) {
607 			dev_err(hdev->dev,
608 				"DRAM: hop3 PTE is cleared! can't unmap, va: 0x%llx\n",
609 					virt_addr);
610 			goto not_mapped;
611 		}
612 
613 		write_final_pte(ctx, hop3_pte_addr, default_pte);
614 		put_pte(ctx, hop3_addr);
615 	} else {
616 		if (!(curr_pte & PAGE_PRESENT_MASK))
617 			goto not_mapped;
618 
619 		if (hop4_addr)
620 			clear_pte(ctx, hop4_pte_addr);
621 		else
622 			clear_pte(ctx, hop3_pte_addr);
623 
624 		if (hop4_addr && !put_pte(ctx, hop4_addr))
625 			clear_hop3 = true;
626 
627 		if (!clear_hop3)
628 			goto mapped;
629 
630 		clear_pte(ctx, hop3_pte_addr);
631 
632 		if (put_pte(ctx, hop3_addr))
633 			goto mapped;
634 
635 		clear_pte(ctx, hop2_pte_addr);
636 
637 		if (put_pte(ctx, hop2_addr))
638 			goto mapped;
639 
640 		clear_pte(ctx, hop1_pte_addr);
641 
642 		if (put_pte(ctx, hop1_addr))
643 			goto mapped;
644 
645 		clear_pte(ctx, hop0_pte_addr);
646 	}
647 
648 mapped:
649 	return 0;
650 
651 not_mapped:
652 	dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
653 		virt_addr);
654 
655 	return -EINVAL;
656 }
657 
_hl_mmu_v1_map(struct hl_ctx * ctx,u64 virt_addr,u64 phys_addr,u32 page_size,bool is_dram_addr)658 static int _hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
659 			u32 page_size, bool is_dram_addr)
660 {
661 	struct hl_device *hdev = ctx->hdev;
662 	struct asic_fixed_properties *prop = &hdev->asic_prop;
663 	struct hl_mmu_properties *mmu_prop;
664 	u64 hop0_addr = 0, hop0_pte_addr = 0,
665 		hop1_addr = 0, hop1_pte_addr = 0,
666 		hop2_addr = 0, hop2_pte_addr = 0,
667 		hop3_addr = 0, hop3_pte_addr = 0,
668 		hop4_addr = 0, hop4_pte_addr = 0,
669 		curr_pte = 0;
670 	bool hop1_new = false, hop2_new = false, hop3_new = false,
671 		hop4_new = false, is_huge;
672 	int rc = -ENOMEM;
673 
674 	/*
675 	 * This mapping function can map a page or a huge page. For huge page
676 	 * there are only 3 hops rather than 4. Currently the DRAM allocation
677 	 * uses huge pages only but user memory could have been allocated with
678 	 * one of the two page sizes. Since this is a common code for all the
679 	 * three cases, we need this hugs page check.
680 	 */
681 	if (is_dram_addr) {
682 		mmu_prop = &prop->dmmu;
683 		is_huge = true;
684 	} else if (page_size == prop->pmmu_huge.page_size) {
685 		mmu_prop = &prop->pmmu_huge;
686 		is_huge = true;
687 	} else {
688 		mmu_prop = &prop->pmmu;
689 		is_huge = false;
690 	}
691 
692 	hop0_addr = get_hop0_addr(ctx);
693 	hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
694 	curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
695 
696 	hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new);
697 	if (hop1_addr == ULLONG_MAX)
698 		goto err;
699 
700 	hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
701 	curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
702 
703 	hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new);
704 	if (hop2_addr == ULLONG_MAX)
705 		goto err;
706 
707 	hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
708 	curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
709 
710 	hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new);
711 	if (hop3_addr == ULLONG_MAX)
712 		goto err;
713 
714 	hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
715 	curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
716 
717 	if (!is_huge) {
718 		hop4_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop4_new);
719 		if (hop4_addr == ULLONG_MAX)
720 			goto err;
721 
722 		hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
723 							virt_addr);
724 		curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
725 	}
726 
727 	if (hdev->dram_default_page_mapping && is_dram_addr) {
728 		u64 default_pte = (prop->mmu_dram_default_page_addr &
729 					HOP_PHYS_ADDR_MASK) | LAST_MASK |
730 						PAGE_PRESENT_MASK;
731 
732 		if (curr_pte != default_pte) {
733 			dev_err(hdev->dev,
734 				"DRAM: mapping already exists for virt_addr 0x%llx\n",
735 					virt_addr);
736 			rc = -EINVAL;
737 			goto err;
738 		}
739 
740 		if (hop1_new || hop2_new || hop3_new || hop4_new) {
741 			dev_err(hdev->dev,
742 				"DRAM mapping should not allocate more hops\n");
743 			rc = -EFAULT;
744 			goto err;
745 		}
746 	} else if (curr_pte & PAGE_PRESENT_MASK) {
747 		dev_err(hdev->dev,
748 			"mapping already exists for virt_addr 0x%llx\n",
749 				virt_addr);
750 
751 		dev_dbg(hdev->dev, "hop0 pte: 0x%llx (0x%llx)\n",
752 			*(u64 *) (uintptr_t) hop0_pte_addr, hop0_pte_addr);
753 		dev_dbg(hdev->dev, "hop1 pte: 0x%llx (0x%llx)\n",
754 			*(u64 *) (uintptr_t) hop1_pte_addr, hop1_pte_addr);
755 		dev_dbg(hdev->dev, "hop2 pte: 0x%llx (0x%llx)\n",
756 			*(u64 *) (uintptr_t) hop2_pte_addr, hop2_pte_addr);
757 		dev_dbg(hdev->dev, "hop3 pte: 0x%llx (0x%llx)\n",
758 			*(u64 *) (uintptr_t) hop3_pte_addr, hop3_pte_addr);
759 
760 		if (!is_huge)
761 			dev_dbg(hdev->dev, "hop4 pte: 0x%llx (0x%llx)\n",
762 				*(u64 *) (uintptr_t) hop4_pte_addr,
763 				hop4_pte_addr);
764 
765 		rc = -EINVAL;
766 		goto err;
767 	}
768 
769 	curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | LAST_MASK
770 			| PAGE_PRESENT_MASK;
771 
772 	if (is_huge)
773 		write_final_pte(ctx, hop3_pte_addr, curr_pte);
774 	else
775 		write_final_pte(ctx, hop4_pte_addr, curr_pte);
776 
777 	if (hop1_new) {
778 		curr_pte =
779 			(hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
780 		write_pte(ctx, hop0_pte_addr, curr_pte);
781 	}
782 	if (hop2_new) {
783 		curr_pte =
784 			(hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
785 		write_pte(ctx, hop1_pte_addr, curr_pte);
786 		get_pte(ctx, hop1_addr);
787 	}
788 	if (hop3_new) {
789 		curr_pte =
790 			(hop3_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
791 		write_pte(ctx, hop2_pte_addr, curr_pte);
792 		get_pte(ctx, hop2_addr);
793 	}
794 
795 	if (!is_huge) {
796 		if (hop4_new) {
797 			curr_pte = (hop4_addr & HOP_PHYS_ADDR_MASK) |
798 					PAGE_PRESENT_MASK;
799 			write_pte(ctx, hop3_pte_addr, curr_pte);
800 			get_pte(ctx, hop3_addr);
801 		}
802 
803 		get_pte(ctx, hop4_addr);
804 	} else {
805 		get_pte(ctx, hop3_addr);
806 	}
807 
808 	return 0;
809 
810 err:
811 	if (hop4_new)
812 		free_hop(ctx, hop4_addr);
813 	if (hop3_new)
814 		free_hop(ctx, hop3_addr);
815 	if (hop2_new)
816 		free_hop(ctx, hop2_addr);
817 	if (hop1_new)
818 		free_hop(ctx, hop1_addr);
819 
820 	return rc;
821 }
822 
823 /*
824  * hl_mmu_v1_swap_out - marks all mapping of the given ctx as swapped out
825  *
826  * @ctx: pointer to the context structure
827  *
828  */
hl_mmu_v1_swap_out(struct hl_ctx * ctx)829 static void hl_mmu_v1_swap_out(struct hl_ctx *ctx)
830 {
831 
832 }
833 
834 /*
835  * hl_mmu_v1_swap_in - marks all mapping of the given ctx as swapped in
836  *
837  * @ctx: pointer to the context structure
838  *
839  */
hl_mmu_v1_swap_in(struct hl_ctx * ctx)840 static void hl_mmu_v1_swap_in(struct hl_ctx *ctx)
841 {
842 
843 }
844 
845 /*
846  * hl_mmu_v1_prepare - prepare mmu  for working with mmu v1
847  *
848  * @hdev: pointer to the device structure
849  */
hl_mmu_v1_set_funcs(struct hl_device * hdev)850 void hl_mmu_v1_set_funcs(struct hl_device *hdev)
851 {
852 	struct hl_mmu_funcs *mmu = &hdev->mmu_func;
853 
854 	mmu->init = hl_mmu_v1_init;
855 	mmu->fini = hl_mmu_v1_fini;
856 	mmu->ctx_init = hl_mmu_v1_ctx_init;
857 	mmu->ctx_fini = hl_mmu_v1_ctx_fini;
858 	mmu->map = _hl_mmu_v1_map;
859 	mmu->unmap = _hl_mmu_v1_unmap;
860 	mmu->flush = flush;
861 	mmu->swap_out = hl_mmu_v1_swap_out;
862 	mmu->swap_in = hl_mmu_v1_swap_in;
863 }
864