1 // Copyright 2024 The Pigweed Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4 // use this file except in compliance with the License. You may obtain a copy of
5 // the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 // License for the specific language governing permissions and limitations under
13 // the License.
14
15 /*
16 * Copyright 2018 - 2022 NXP
17 * All rights reserved.
18 *
19 *
20 * SPDX-License-Identifier: BSD-3-Clause
21 */
22
23 #include "pw_stream_uart_mcuxpresso/dma_stream.h"
24
25 #include "pw_assert/check.h"
26 #include "pw_preprocessor/util.h"
27
28 namespace pw::stream {
29
30 // Deinitialize the DMA channels and USART.
Deinit()31 void UartDmaStreamMcuxpresso::Deinit() {
32 // We need to touch register space that can be shared
33 // among several DMA peripherals, hence we need to access
34 // it exclusively. We achieve exclusive access on non-SMP systems as
35 // a side effect of acquiring the interrupt_lock_, since acquiring the
36 // interrupt_lock_ disables interrupts on the current CPU, which means
37 // we cannot get descheduled until we release the interrupt_lock_.
38 interrupt_lock_.lock();
39 DMA_DisableChannel(config_.dma_base, config_.tx_dma_ch);
40 DMA_DisableChannel(config_.dma_base, config_.rx_dma_ch);
41 interrupt_lock_.unlock();
42
43 USART_Deinit(config_.usart_base);
44 clock_tree_element_controller_.Release().IgnoreError();
45 }
46
~UartDmaStreamMcuxpresso()47 UartDmaStreamMcuxpresso::~UartDmaStreamMcuxpresso() {
48 if (!initialized_) {
49 return;
50 }
51 Deinit();
52 }
53
54 // Initialize the USART and DMA channels based on the configuration
55 // specified during object creation.
Init(uint32_t srcclk)56 Status UartDmaStreamMcuxpresso::Init(uint32_t srcclk) {
57 if (srcclk == 0) {
58 return Status::InvalidArgument();
59 }
60 if (config_.usart_base == nullptr) {
61 return Status::InvalidArgument();
62 }
63 if (config_.baud_rate == 0) {
64 return Status::InvalidArgument();
65 }
66 if (config_.dma_base == nullptr) {
67 return Status::InvalidArgument();
68 }
69
70 usart_config_t defconfig_;
71 USART_GetDefaultConfig(&defconfig_);
72
73 defconfig_.baudRate_Bps = config_.baud_rate;
74 defconfig_.parityMode = config_.parity;
75 defconfig_.enableTx = true;
76 defconfig_.enableRx = true;
77
78 PW_TRY(clock_tree_element_controller_.Acquire());
79 status_t status = USART_Init(config_.usart_base, &defconfig_, srcclk);
80 if (status != kStatus_Success) {
81 clock_tree_element_controller_.Release().IgnoreError();
82 return Status::Internal();
83 }
84
85 // We need to touch register space that can be shared
86 // among several DMA peripherals, hence we need to access
87 // it exclusively. We achieve exclusive access on non-SMP systems as
88 // a side effect of acquiring the interrupt_lock_, since acquiring the
89 // interrupt_lock_ disables interrupts on the current CPU, which means
90 // we cannot get descheduled until we release the interrupt_lock_.
91 interrupt_lock_.lock();
92
93 INPUTMUX_Init(INPUTMUX);
94 // Enable DMA request.
95 INPUTMUX_EnableSignal(
96 INPUTMUX, config_.rx_input_mux_dmac_ch_request_en, true);
97 INPUTMUX_EnableSignal(
98 INPUTMUX, config_.tx_input_mux_dmac_ch_request_en, true);
99 // Turnoff clock to inputmux to save power. Clock is only needed to make
100 // changes.
101 INPUTMUX_Deinit(INPUTMUX);
102
103 DMA_EnableChannel(config_.dma_base, config_.tx_dma_ch);
104 DMA_EnableChannel(config_.dma_base, config_.rx_dma_ch);
105
106 DMA_CreateHandle(&tx_data_.dma_handle, config_.dma_base, config_.tx_dma_ch);
107 DMA_CreateHandle(&rx_data_.dma_handle, config_.dma_base, config_.rx_dma_ch);
108
109 interrupt_lock_.unlock();
110
111 status = USART_TransferCreateHandleDMA(config_.usart_base,
112 &uart_dma_handle_,
113 TxRxCompletionCallback,
114 this,
115 &tx_data_.dma_handle,
116 &rx_data_.dma_handle);
117
118 if (status != kStatus_Success) {
119 Deinit();
120 return Status::Internal();
121 }
122
123 // Read into the rx ring buffer.
124 interrupt_lock_.lock();
125 TriggerReadDma();
126 interrupt_lock_.unlock();
127
128 initialized_ = true;
129 return OkStatus();
130 }
131
132 // DMA usart data into ring buffer
133 //
134 // At most kUsartDmaMaxTransferCount bytes can be copied per DMA transfer.
135 // If completion_size is specified and dataSize is larger than completion_size,
136 // the dataSize will be limited to completion_size so that the completion
137 // callback will be called once completion_size bytes have been received.
TriggerReadDma()138 void UartDmaStreamMcuxpresso::TriggerReadDma() {
139 uint8_t* ring_buffer =
140 reinterpret_cast<uint8_t*>(rx_data_.ring_buffer.data());
141 rx_data_.transfer.data = &ring_buffer[rx_data_.ring_buffer_write_idx];
142
143 if (rx_data_.ring_buffer_write_idx + kUsartDmaMaxTransferCount >
144 rx_data_.ring_buffer.size_bytes()) {
145 rx_data_.transfer.dataSize =
146 rx_data_.ring_buffer.size_bytes() - rx_data_.ring_buffer_write_idx;
147 } else {
148 rx_data_.transfer.dataSize = kUsartDmaMaxTransferCount;
149 }
150
151 if (rx_data_.completion_size > 0 &&
152 rx_data_.transfer.dataSize > rx_data_.completion_size) {
153 // Completion callback will be called once this transfer completes.
154 rx_data_.transfer.dataSize = rx_data_.completion_size;
155 }
156
157 USART_TransferReceiveDMA(
158 config_.usart_base, &uart_dma_handle_, &rx_data_.transfer);
159 }
160
161 // DMA send buffer data
TriggerWriteDma()162 void UartDmaStreamMcuxpresso::TriggerWriteDma() {
163 const uint8_t* tx_buffer =
164 reinterpret_cast<const uint8_t*>(tx_data_.buffer.data());
165 tx_data_.transfer.txData = &tx_buffer[tx_data_.tx_idx];
166 if (tx_data_.tx_idx + kUsartDmaMaxTransferCount >
167 tx_data_.buffer.size_bytes()) {
168 // Completion callback will be called once this transfer completes.
169 tx_data_.transfer.dataSize = tx_data_.buffer.size_bytes() - tx_data_.tx_idx;
170 } else {
171 tx_data_.transfer.dataSize = kUsartDmaMaxTransferCount;
172 }
173
174 USART_TransferSendDMA(
175 config_.usart_base, &uart_dma_handle_, &tx_data_.transfer);
176 }
177
178 // Completion callback for TX and RX transactions
TxRxCompletionCallback(USART_Type *,usart_dma_handle_t *,status_t status,void * param)179 void UartDmaStreamMcuxpresso::TxRxCompletionCallback(
180 USART_Type* /* base */,
181 usart_dma_handle_t* /* state */,
182 status_t status,
183 void* param) {
184 UartDmaStreamMcuxpresso* stream =
185 reinterpret_cast<UartDmaStreamMcuxpresso*>(param);
186
187 if (status == kStatus_USART_RxIdle) {
188 // RX transfer
189
190 // Acquire the interrupt_lock_ to ensure that on SMP systems
191 // access to the rx_data is synchronized.
192 stream->interrupt_lock_.lock();
193
194 struct UsartDmaRxData* rx_data = &stream->rx_data_;
195 rx_data->ring_buffer_write_idx += rx_data->transfer.dataSize;
196 rx_data->data_received += rx_data->transfer.dataSize;
197
198 PW_DCHECK_INT_LE(rx_data->ring_buffer_write_idx,
199 rx_data->ring_buffer.size_bytes());
200 if (rx_data->ring_buffer_write_idx == rx_data->ring_buffer.size_bytes()) {
201 rx_data->ring_buffer_write_idx = 0;
202 }
203
204 bool notify_rx_completion = false;
205 if (rx_data->completion_size > 0) {
206 PW_DCHECK_INT_GE(rx_data->completion_size, rx_data->transfer.dataSize);
207 rx_data->completion_size -= rx_data->transfer.dataSize;
208 if (rx_data->completion_size == 0) {
209 // We have satisified the receive request, we must wake up the receiver.
210 // Before we can issue the wake up, we must trigger the next DMA read
211 // operation, since the notification might yield the CPU.
212 notify_rx_completion = true;
213 }
214 }
215 stream->TriggerReadDma();
216
217 stream->interrupt_lock_.unlock();
218
219 if (notify_rx_completion) {
220 rx_data->notification.release();
221 }
222 } else if (status == kStatus_USART_TxIdle) {
223 // Tx transfer
224 UsartDmaTxData* tx_data = &stream->tx_data_;
225 tx_data->tx_idx += tx_data->transfer.dataSize;
226 if (tx_data->tx_idx == tx_data->buffer.size_bytes()) {
227 // We have completed the send request, we must wake up the sender.
228 tx_data->notification.release();
229 } else {
230 PW_CHECK_INT_LT(tx_data->tx_idx, tx_data->buffer.size_bytes());
231 stream->TriggerWriteDma();
232 }
233 }
234 }
235
236 // Get the amount of bytes that have been received, but haven't been copied yet
237 //
238 // Note: The caller must ensure that the interrupt handler cannot execute.
TransferGetReceiveDMACountLockHeld()239 StatusWithSize UartDmaStreamMcuxpresso::TransferGetReceiveDMACountLockHeld() {
240 uint32_t count = 0;
241
242 // If no in-flight transfer is in progress, there is no pending data
243 // available. We have initialized count to 0 to account for that.
244 (void)USART_TransferGetReceiveCountDMA(
245 config_.usart_base, &uart_dma_handle_, &count);
246
247 // We must be executing with the interrupt_lock_ held, so that the interrupt
248 // handler cannot change data_received.
249 count += rx_data_.data_received - rx_data_.data_copied;
250 // Check whether we hit an overflow condition
251 if (count > rx_data_.ring_buffer.size_bytes()) {
252 return StatusWithSize(Status::DataLoss(), 0);
253 }
254 return StatusWithSize(count);
255 }
256
257 // Get the amount of bytes that have been received, but haven't been copied yet
TransferGetReceiveDMACount()258 StatusWithSize UartDmaStreamMcuxpresso::TransferGetReceiveDMACount() {
259 // We need to acquire the interrupt_lock_ , so that the interrupt handler
260 // cannot run to change rxRingBufferWriteIdx.
261 interrupt_lock_.lock();
262 StatusWithSize status = TransferGetReceiveDMACountLockHeld();
263 interrupt_lock_.unlock();
264 return status;
265 }
266
267 // Get the amount of bytes that have not been yet received for the current
268 // transfer
269 //
270 // Note: This function may only be called once the RX transaction has been
271 // aborted.
GetReceiveTransferRemainingBytes()272 size_t UartDmaStreamMcuxpresso::GetReceiveTransferRemainingBytes() {
273 return DMA_GetRemainingBytes(uart_dma_handle_.rxDmaHandle->base,
274 uart_dma_handle_.rxDmaHandle->channel);
275 }
276
277 // Wait for more receive bytes to arrive to satisfy request
278 //
279 // Once we have acquired the interrupt_lock_, we check whether we can
280 // satisfy the request, and if not, we will abort the current
281 // transaction if the current transaction will be able to satisfy
282 // the outstanding request. Once the transaction has been aborted
283 // we can specify the completion_size, so that the completion callback
284 // can wake us up when the bytes_needed bytes have been received.
285 //
286 // If more than one transaction is required to satisfy the request,
287 // we don't need to abort the transaction and instead can leverage
288 // the fact that the completion callback won't be triggered since we
289 // have acquired the interrupt_lock_ . This allows us to specify
290 // the completion_size that will be seen by the completion callback
291 // when it executes. A subsequent completion callback will wake us up
292 // when the bytes_needed have been received.
WaitForReceiveBytes(size_t bytes_needed)293 Status UartDmaStreamMcuxpresso::WaitForReceiveBytes(size_t bytes_needed) {
294 // Acquire the interrupt_lock_, so that the interrupt handler cannot
295 // execute and modify the shared state.
296 interrupt_lock_.lock();
297
298 // Recheck what the current amount of available bytes is.
299 StatusWithSize status = TransferGetReceiveDMACountLockHeld();
300 if (!status.ok()) {
301 interrupt_lock_.unlock();
302 return status.status();
303 }
304
305 size_t rx_count = status.size();
306 if (rx_count >= bytes_needed) {
307 interrupt_lock_.unlock();
308 return OkStatus();
309 }
310
311 // Not enough bytes available yet.
312 // We check whether more bytes are needed than the transfer's
313 // dataSize, which means that at least one more transfer must
314 // complete to satisfy this receive request.
315 size_t pos_in_transfer =
316 rx_data_.data_copied + rx_count - rx_data_.data_received;
317 PW_DCHECK_INT_LE(pos_in_transfer, rx_data_.transfer.dataSize);
318
319 size_t transfer_bytes_needed =
320 bytes_needed + rx_data_.data_copied - rx_data_.data_received;
321 bool aborted = false;
322
323 if (transfer_bytes_needed < rx_data_.transfer.dataSize) {
324 // Abort the current transfer, so that we can schedule a receive
325 // transfer to satisfy this request.
326 USART_TransferAbortReceiveDMA(config_.usart_base, &uart_dma_handle_);
327 size_t remaining_transfer_bytes = GetReceiveTransferRemainingBytes();
328 if (remaining_transfer_bytes == 0) {
329 // We have received all bytes for the current transfer, we will
330 // restart the loop in the caller's context.
331 // The interrupt handler will execute and call TriggerReadDma
332 // to schedule the next receive DMA transfer.
333 interrupt_lock_.unlock();
334 return OkStatus();
335 }
336 // We have successfully aborted an in-flight transfer. No interrupt
337 // callback will be called for it.
338 aborted = true;
339 // We need to fix up the transfer size for the aborted transfer.
340 rx_data_.transfer.dataSize -= remaining_transfer_bytes;
341 } else {
342 // We require at least as much data as provided by the current
343 // transfer. We know that this code cannot execute while the
344 // receive transaction isn't active, so we know that the
345 // completion callback will still execute.
346 }
347
348 // Tell the transfer callback when to deliver the completion
349 // notification.
350 rx_data_.completion_size = transfer_bytes_needed;
351
352 // Since a caller could request a receive amount that exceeds the ring
353 // buffer size, we must cap the rxCompletionSize. In addition, we
354 // don't want that the rxRingBuffer overflows, so we cap the
355 // rxCompletionSize to 25% of the ringBufferSize to ensure that the
356 // ring buffer gets drained frequently enough.
357 if (rx_data_.completion_size >
358 rx_data_.ring_buffer.size_bytes() / kUsartRxRingBufferSplitCount) {
359 rx_data_.completion_size =
360 rx_data_.ring_buffer.size_bytes() / kUsartRxRingBufferSplitCount;
361 }
362
363 interrupt_lock_.unlock();
364
365 if (aborted) {
366 // We have received data, but we haven't accounted for it, since the
367 // callback won't execute due to the abort. Execute the callback
368 // from here instead. Since the DMA transfer has been aborted, and
369 // the available data isn't sufficient to satisfy this request, the
370 // next receive DMA transfer will unblock this thread.
371 TxRxCompletionCallback(
372 config_.usart_base, &uart_dma_handle_, kStatus_USART_RxIdle, this);
373 }
374
375 // Wait for the interrupt handler to deliver the completion
376 // notificiation.
377 rx_data_.notification.acquire();
378 // We have received bytes that can be copied out, we will restart
379 // the loop in the caller's context.
380 return OkStatus();
381 }
382
383 // Copy the data from the receive ring buffer into the destination data buffer
CopyReceiveData(ByteBuilder & bb,size_t copy_size)384 void UartDmaStreamMcuxpresso::CopyReceiveData(ByteBuilder& bb,
385 size_t copy_size) {
386 ByteSpan ring_buffer = rx_data_.ring_buffer;
387 reinterpret_cast<uint8_t*>(rx_data_.ring_buffer.data());
388 // Check whether we need to perform a wrap around copy operation or end
389 // right at the end of the buffer.
390 if (rx_data_.ring_buffer_read_idx + copy_size >=
391 rx_data_.ring_buffer.size_bytes()) {
392 size_t first_copy_size =
393 rx_data_.ring_buffer.size_bytes() - rx_data_.ring_buffer_read_idx;
394 bb.append(
395 ring_buffer.subspan(rx_data_.ring_buffer_read_idx, first_copy_size));
396 size_t second_copy_size = copy_size - first_copy_size;
397 // Source buffer is at offset 0.
398 bb.append(ring_buffer.subspan(0, second_copy_size));
399 rx_data_.ring_buffer_read_idx = second_copy_size;
400 } else {
401 // Normal copy operation
402 PW_DCHECK_INT_LT(rx_data_.ring_buffer_read_idx + copy_size,
403 rx_data_.ring_buffer.size_bytes());
404 bb.append(ring_buffer.subspan(rx_data_.ring_buffer_read_idx, copy_size));
405 rx_data_.ring_buffer_read_idx += copy_size;
406 }
407 rx_data_.data_copied += copy_size;
408 }
409
410 // Copy data from the RX ring buffer into the caller provided buffer
411 //
412 // If the ring buffer can already satisfy the read request, the
413 // data will be copied from the ring buffer into the provided buffer.
414 // If no data, or not sufficient data is available to satisfy the
415 // read request, the caller will wait for the completion callback to
416 // signal that data is available and can be copied from the ring buffer
417 // to the provided buffer.
418 //
419 // Note: A reader may request to read more data than can be stored
420 // inside the RX ring buffer.
421 //
422 // Note: Only one thread should be calling this function,
423 // otherwise DoRead calls might fail due to contention for
424 // the USART RX channel.
DoRead(ByteSpan data)425 StatusWithSize UartDmaStreamMcuxpresso::DoRead(ByteSpan data) {
426 size_t length = data.size();
427 if (length == 0) {
428 return StatusWithSize(Status::InvalidArgument(), 0);
429 }
430
431 // We only allow a single thread to read from the USART at a time.
432 bool was_busy = rx_data_.busy.exchange(true);
433 if (was_busy) {
434 return StatusWithSize(Status::FailedPrecondition(), 0);
435 }
436
437 size_t rx_count = 0;
438 ByteBuilder bb(data);
439
440 for (size_t buf_idx = 0; buf_idx < length;) {
441 size_t bytes_needed = length - buf_idx;
442
443 while (rx_count == 0) {
444 StatusWithSize status_with_size = TransferGetReceiveDMACount();
445 if (!status_with_size.ok()) {
446 rx_data_.busy.store(false);
447 return StatusWithSize(status_with_size.status(), buf_idx);
448 }
449 rx_count = status_with_size.size();
450 if (rx_count < bytes_needed) {
451 // Wait to receive more bytes.
452 Status status = WaitForReceiveBytes(bytes_needed);
453 if (!status.ok()) {
454 rx_data_.busy.store(false);
455 return StatusWithSize(status, buf_idx);
456 }
457 // Restart the loop and refetch rx_count. We should be able
458 // to copy out data to the destination data buffer.
459 rx_count = 0;
460 continue;
461 }
462 }
463
464 size_t copy_size = MIN(bytes_needed, rx_count);
465 CopyReceiveData(bb, copy_size);
466 buf_idx += copy_size;
467 PW_DCHECK(rx_count == copy_size || buf_idx == length);
468 }
469
470 rx_data_.busy.store(false);
471 return StatusWithSize(length);
472 }
473
474 // Write data to USART using DMA transactions
475 //
476 // Note: Only one thread should be calling this function,
477 // otherwise DoWrite calls might fail due to contention for
478 // the USART TX channel.
DoWrite(ConstByteSpan data)479 Status UartDmaStreamMcuxpresso::DoWrite(ConstByteSpan data) {
480 if (data.size() == 0) {
481 return OkStatus();
482 }
483
484 bool was_busy = tx_data_.busy.exchange(true);
485 if (was_busy) {
486 // Another thread is already transmitting data.
487 return Status::FailedPrecondition();
488 }
489
490 tx_data_.buffer = data;
491 tx_data_.tx_idx = 0;
492
493 TriggerWriteDma();
494
495 tx_data_.notification.acquire();
496
497 tx_data_.busy.store(false);
498
499 return OkStatus();
500 }
501
502 } // namespace pw::stream
503