• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (C) 2021 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 package com.android.car.voicecontrol;
17 
18 import android.content.Context;
19 import android.content.Intent;
20 import android.os.Bundle;
21 import android.speech.RecognitionListener;
22 import android.speech.RecognizerIntent;
23 import android.speech.SpeechRecognizer;
24 import android.util.Log;
25 import android.util.Pair;
26 
27 import java.util.ArrayList;
28 import java.util.List;
29 import java.util.Locale;
30 import java.util.stream.Collectors;
31 import java.util.stream.IntStream;
32 
33 /**
34  * Sample implementation of voice recognition module. This implementation uses Google Assistant's
35  * voice recognizer.
36  *
37  * TODO: Replace this with pre-recorded messages as we can't depend on Google Assistant in AOSP.
38  */
39 public class SpeechToTextImpl implements SpeechToText {
40     private static final String TAG = "Mica.SpeechToTextImpl";
41 
42     private Listener mListener;
43     private final SpeechRecognizer mRecognizer;
44     private final Intent mRecognizerIntent;
45     private final RecognitionListener mRecognizerListener = new RecognitionListener() {
46         @Override
47         public void onReadyForSpeech(Bundle params) {
48             Log.d(TAG, "Speech recognition ready");
49             if (mListener != null) {
50                 mListener.onRecognitionStarted();
51             }
52         }
53 
54         @Override
55         public void onBeginningOfSpeech() {
56             if (mListener != null) {
57                 mListener.onPartialRecognition(new ArrayList<>());
58             }
59         }
60 
61         @Override
62         public void onRmsChanged(float rmsdB) {
63             // Ignored
64         }
65 
66         @Override
67         public void onBufferReceived(byte[] buffer) {
68             // Ignored
69         }
70 
71         @Override
72         public void onEndOfSpeech() {
73             // Ignored
74         }
75 
76         @Override
77         public void onError(int error) {
78             Log.d(TAG, "Speech recognition finished with error: " + getErrorMsg(error));
79             if (mListener != null) {
80                 mListener.onRecognitionFinished(new ArrayList<>());
81                 stopListening();
82             }
83         }
84 
85         private String getErrorMsg(int error) {
86             switch (error) {
87                 case SpeechRecognizer.ERROR_NETWORK_TIMEOUT:
88                     return "ERROR_NETWORK_TIMEOUT";
89                 case SpeechRecognizer.ERROR_NETWORK:
90                     return "ERROR_NETWORK";
91                 case SpeechRecognizer.ERROR_AUDIO:
92                     return "ERROR_AUDIO";
93                 case SpeechRecognizer.ERROR_SERVER:
94                     return "ERROR_SERVER";
95                 case SpeechRecognizer.ERROR_CLIENT:
96                     return "ERROR_CLIENT";
97                 case SpeechRecognizer.ERROR_SPEECH_TIMEOUT:
98                     return "ERROR_SPEECH_TIMEOUT";
99                 case SpeechRecognizer.ERROR_NO_MATCH:
100                     return "ERROR_NO_MATCH";
101                 case SpeechRecognizer.ERROR_RECOGNIZER_BUSY:
102                     return "ERROR_RECOGNIZER_BUSY";
103                 case SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS:
104                     return "ERROR_INSUFFICIENT_PERMISSIONS";
105             }
106             return "ERROR_UNKNOWN";
107         }
108 
109         @Override
110         public void onResults(Bundle results) {
111             Log.d(TAG, "Speech recognition finished with results: " + results.toString());
112             if (mListener != null) {
113                 mListener.onRecognitionFinished(getResultsInConfidenceOrder(results));
114                 stopListening();
115             }
116         }
117 
118         @Override
119         public void onPartialResults(Bundle partialResults) {
120             if (mListener != null) {
121                 mListener.onPartialRecognition(getResultsInConfidenceOrder(partialResults));
122             }
123         }
124 
125         @Override
126         public void onEvent(int eventType, Bundle params) {
127             Log.d(TAG, "Speech recognition event: " + eventType + ", params: " + params);
128             // Ignored
129         }
130 
131         private List<String> getResultsInConfidenceOrder(Bundle partialResults) {
132             List<String> values =
133                     partialResults.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION);
134             float[] scores =
135                     partialResults.getFloatArray(SpeechRecognizer.CONFIDENCE_SCORES);
136             if (scores == null || values == null || scores.length != values.size()) {
137                 return values != null ? values : new ArrayList<>();
138             }
139             List<Pair<String, Float>> resultsWithConfidence = IntStream.range(0, values.size())
140                     .mapToObj(i -> Pair.create(values.get(i), scores[i]))
141                     .collect(Collectors.toList());
142             Log.d(TAG, "Results confidences: " + resultsWithConfidence.stream()
143                     .map(p -> String.format(Locale.US, "%s [%01.2f]", p.first, p.second))
144                     .collect(Collectors.joining(", ")));
145             return resultsWithConfidence.stream()
146                     .sorted((o1, o2) -> -o1.second.compareTo(o2.second))
147                     .map(p -> p.first)
148                     .collect(Collectors.toList());
149         }
150     };
151 
SpeechToTextImpl(Context context)152     public SpeechToTextImpl(Context context) {
153         // Use system default recognition service
154         mRecognizer = SpeechRecognizer.createSpeechRecognizer(context);
155         mRecognizer.setRecognitionListener(mRecognizerListener);
156         mRecognizerIntent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
157         mRecognizerIntent.putExtra(
158                 RecognizerIntent.EXTRA_LANGUAGE_MODEL, RecognizerIntent.LANGUAGE_MODEL_FREE_FORM);
159         mRecognizerIntent.putExtra(
160                 RecognizerIntent.EXTRA_CALLING_PACKAGE, context.getPackageName());
161         mRecognizerIntent.putExtra(RecognizerIntent.EXTRA_PARTIAL_RESULTS, true);
162     }
163 
164     @Override
startListening(Listener listener)165     public void startListening(Listener listener) {
166         if (mListener != null) {
167             stopListening();
168         }
169         mListener = listener;
170         mRecognizer.startListening(mRecognizerIntent);
171     }
172 
173     @Override
stopListening()174     public void stopListening() {
175         mListener = null;
176         mRecognizer.cancel();
177     }
178 
179     @Override
destroy()180     public void destroy() {
181         mListener = null;
182         mRecognizer.destroy();
183     }
184 }
185