1 //===------- state-queuei.h - OpenMP GPU State Queue ------------- CUDA -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the implementation of a queue to hand out OpenMP state
10 // objects to teams of one or more kernels.
11 //
12 // Reference:
13 // Thomas R.W. Scogland and Wu-chun Feng. 2015.
14 // Design and Evaluation of Scalable Concurrent Queues for Many-Core
15 // Architectures. International Conference on Performance Engineering.
16 //
17 //===----------------------------------------------------------------------===//
18
19 #include "state-queue.h"
20 #include "common/target_atomic.h"
21
22 template <typename ElementType, uint32_t SIZE>
ENQUEUE_TICKET()23 INLINE uint32_t omptarget_nvptx_Queue<ElementType, SIZE>::ENQUEUE_TICKET() {
24 return __kmpc_atomic_add((unsigned int *)&tail, 1u);
25 }
26
27 template <typename ElementType, uint32_t SIZE>
DEQUEUE_TICKET()28 INLINE uint32_t omptarget_nvptx_Queue<ElementType, SIZE>::DEQUEUE_TICKET() {
29 return __kmpc_atomic_add((unsigned int *)&head, 1u);
30 }
31
32 template <typename ElementType, uint32_t SIZE>
33 INLINE uint32_t
ID(uint32_t ticket)34 omptarget_nvptx_Queue<ElementType, SIZE>::ID(uint32_t ticket) {
35 return (ticket / SIZE) * 2;
36 }
37
38 template <typename ElementType, uint32_t SIZE>
IsServing(uint32_t slot,uint32_t id)39 INLINE bool omptarget_nvptx_Queue<ElementType, SIZE>::IsServing(uint32_t slot,
40 uint32_t id) {
41 return __kmpc_atomic_add((unsigned int *)&ids[slot], 0u) == id;
42 }
43
44 template <typename ElementType, uint32_t SIZE>
45 INLINE void
PushElement(uint32_t slot,ElementType * element)46 omptarget_nvptx_Queue<ElementType, SIZE>::PushElement(uint32_t slot,
47 ElementType *element) {
48 __kmpc_atomic_exchange((unsigned long long *)&elementQueue[slot],
49 (unsigned long long)element);
50 }
51
52 template <typename ElementType, uint32_t SIZE>
53 INLINE ElementType *
PopElement(uint32_t slot)54 omptarget_nvptx_Queue<ElementType, SIZE>::PopElement(uint32_t slot) {
55 return (ElementType *)__kmpc_atomic_add(
56 (unsigned long long *)&elementQueue[slot], (unsigned long long)0);
57 }
58
59 template <typename ElementType, uint32_t SIZE>
DoneServing(uint32_t slot,uint32_t id)60 INLINE void omptarget_nvptx_Queue<ElementType, SIZE>::DoneServing(uint32_t slot,
61 uint32_t id) {
62 __kmpc_atomic_exchange((unsigned int *)&ids[slot], (id + 1) % MAX_ID);
63 }
64
65 template <typename ElementType, uint32_t SIZE>
66 INLINE void
Enqueue(ElementType * element)67 omptarget_nvptx_Queue<ElementType, SIZE>::Enqueue(ElementType *element) {
68 uint32_t ticket = ENQUEUE_TICKET();
69 uint32_t slot = ticket % SIZE;
70 uint32_t id = ID(ticket) + 1;
71 while (!IsServing(slot, id))
72 ;
73 PushElement(slot, element);
74 DoneServing(slot, id);
75 }
76
77 template <typename ElementType, uint32_t SIZE>
Dequeue()78 INLINE ElementType *omptarget_nvptx_Queue<ElementType, SIZE>::Dequeue() {
79 uint32_t ticket = DEQUEUE_TICKET();
80 uint32_t slot = ticket % SIZE;
81 uint32_t id = ID(ticket);
82 while (!IsServing(slot, id))
83 ;
84 ElementType *element = PopElement(slot);
85 // This is to populate the queue because of the lack of GPU constructors.
86 if (element == 0)
87 element = &elements[slot];
88 DoneServing(slot, id);
89 return element;
90 }
91