1 /** 2 * Copyright (C) 2021 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 package com.android.car.voicecontrol; 17 18 import android.content.Context; 19 import android.content.Intent; 20 import android.os.Bundle; 21 import android.speech.RecognitionListener; 22 import android.speech.RecognizerIntent; 23 import android.speech.SpeechRecognizer; 24 import android.util.Log; 25 import android.util.Pair; 26 27 import java.util.ArrayList; 28 import java.util.List; 29 import java.util.Locale; 30 import java.util.stream.Collectors; 31 import java.util.stream.IntStream; 32 33 /** 34 * Sample implementation of voice recognition module. This implementation uses Google Assistant's 35 * voice recognizer. 36 * 37 * TODO: Replace this with pre-recorded messages as we can't depend on Google Assistant in AOSP. 38 */ 39 public class SpeechToTextImpl implements SpeechToText { 40 private static final String TAG = "Mica.SpeechToTextImpl"; 41 42 private Listener mListener; 43 private final SpeechRecognizer mRecognizer; 44 private final Intent mRecognizerIntent; 45 private final RecognitionListener mRecognizerListener = new RecognitionListener() { 46 @Override 47 public void onReadyForSpeech(Bundle params) { 48 Log.d(TAG, "Speech recognition ready"); 49 if (mListener != null) { 50 mListener.onRecognitionStarted(); 51 } 52 } 53 54 @Override 55 public void onBeginningOfSpeech() { 56 if (mListener != null) { 57 mListener.onPartialRecognition(new ArrayList<>()); 58 } 59 } 60 61 @Override 62 public void onRmsChanged(float rmsdB) { 63 // Ignored 64 } 65 66 @Override 67 public void onBufferReceived(byte[] buffer) { 68 // Ignored 69 } 70 71 @Override 72 public void onEndOfSpeech() { 73 // Ignored 74 } 75 76 @Override 77 public void onError(int error) { 78 Log.d(TAG, "Speech recognition finished with error: " + getErrorMsg(error)); 79 if (mListener != null) { 80 mListener.onRecognitionFinished(new ArrayList<>()); 81 stopListening(); 82 } 83 } 84 85 private String getErrorMsg(int error) { 86 switch (error) { 87 case SpeechRecognizer.ERROR_NETWORK_TIMEOUT: 88 return "ERROR_NETWORK_TIMEOUT"; 89 case SpeechRecognizer.ERROR_NETWORK: 90 return "ERROR_NETWORK"; 91 case SpeechRecognizer.ERROR_AUDIO: 92 return "ERROR_AUDIO"; 93 case SpeechRecognizer.ERROR_SERVER: 94 return "ERROR_SERVER"; 95 case SpeechRecognizer.ERROR_CLIENT: 96 return "ERROR_CLIENT"; 97 case SpeechRecognizer.ERROR_SPEECH_TIMEOUT: 98 return "ERROR_SPEECH_TIMEOUT"; 99 case SpeechRecognizer.ERROR_NO_MATCH: 100 return "ERROR_NO_MATCH"; 101 case SpeechRecognizer.ERROR_RECOGNIZER_BUSY: 102 return "ERROR_RECOGNIZER_BUSY"; 103 case SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS: 104 return "ERROR_INSUFFICIENT_PERMISSIONS"; 105 } 106 return "ERROR_UNKNOWN"; 107 } 108 109 @Override 110 public void onResults(Bundle results) { 111 Log.d(TAG, "Speech recognition finished with results: " + results.toString()); 112 if (mListener != null) { 113 mListener.onRecognitionFinished(getResultsInConfidenceOrder(results)); 114 stopListening(); 115 } 116 } 117 118 @Override 119 public void onPartialResults(Bundle partialResults) { 120 if (mListener != null) { 121 mListener.onPartialRecognition(getResultsInConfidenceOrder(partialResults)); 122 } 123 } 124 125 @Override 126 public void onEvent(int eventType, Bundle params) { 127 Log.d(TAG, "Speech recognition event: " + eventType + ", params: " + params); 128 // Ignored 129 } 130 131 private List<String> getResultsInConfidenceOrder(Bundle partialResults) { 132 List<String> values = 133 partialResults.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION); 134 float[] scores = 135 partialResults.getFloatArray(SpeechRecognizer.CONFIDENCE_SCORES); 136 if (scores == null || values == null || scores.length != values.size()) { 137 return values != null ? values : new ArrayList<>(); 138 } 139 List<Pair<String, Float>> resultsWithConfidence = IntStream.range(0, values.size()) 140 .mapToObj(i -> Pair.create(values.get(i), scores[i])) 141 .collect(Collectors.toList()); 142 Log.d(TAG, "Results confidences: " + resultsWithConfidence.stream() 143 .map(p -> String.format(Locale.US, "%s [%01.2f]", p.first, p.second)) 144 .collect(Collectors.joining(", "))); 145 return resultsWithConfidence.stream() 146 .sorted((o1, o2) -> -o1.second.compareTo(o2.second)) 147 .map(p -> p.first) 148 .collect(Collectors.toList()); 149 } 150 }; 151 SpeechToTextImpl(Context context)152 public SpeechToTextImpl(Context context) { 153 // Use system default recognition service 154 mRecognizer = SpeechRecognizer.createSpeechRecognizer(context); 155 mRecognizer.setRecognitionListener(mRecognizerListener); 156 mRecognizerIntent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH); 157 mRecognizerIntent.putExtra( 158 RecognizerIntent.EXTRA_LANGUAGE_MODEL, RecognizerIntent.LANGUAGE_MODEL_FREE_FORM); 159 mRecognizerIntent.putExtra( 160 RecognizerIntent.EXTRA_CALLING_PACKAGE, context.getPackageName()); 161 mRecognizerIntent.putExtra(RecognizerIntent.EXTRA_PARTIAL_RESULTS, true); 162 } 163 164 @Override startListening(Listener listener)165 public void startListening(Listener listener) { 166 if (mListener != null) { 167 stopListening(); 168 } 169 mListener = listener; 170 mRecognizer.startListening(mRecognizerIntent); 171 } 172 173 @Override stopListening()174 public void stopListening() { 175 mListener = null; 176 mRecognizer.cancel(); 177 } 178 179 @Override destroy()180 public void destroy() { 181 mListener = null; 182 mRecognizer.destroy(); 183 } 184 } 185