• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
7 
8 #include "base/basictypes.h"
9 #include "base/memory/scoped_ptr.h"
10 #include "content/browser/speech/endpointer/endpointer.h"
11 #include "content/browser/speech/speech_recognition_engine.h"
12 #include "content/browser/speech/speech_recognizer.h"
13 #include "content/public/common/speech_recognition_error.h"
14 #include "content/public/common/speech_recognition_result.h"
15 #include "media/audio/audio_input_controller.h"
16 #include "net/url_request/url_request_context_getter.h"
17 
18 namespace media {
19 class AudioBus;
20 class AudioManager;
21 }
22 
23 namespace content {
24 
25 class SpeechRecognitionEventListener;
26 
27 // Handles speech recognition for a session (identified by |session_id|), taking
28 // care of audio capture, silence detection/endpointer and interaction with the
29 // SpeechRecognitionEngine.
30 class CONTENT_EXPORT SpeechRecognizerImpl
31     : public SpeechRecognizer,
32       public media::AudioInputController::EventHandler,
33       public NON_EXPORTED_BASE(SpeechRecognitionEngineDelegate) {
34  public:
35   static const int kAudioSampleRate;
36   static const media::ChannelLayout kChannelLayout;
37   static const int kNumBitsPerAudioSample;
38   static const int kNoSpeechTimeoutMs;
39   static const int kEndpointerEstimationTimeMs;
40 
41   static void SetAudioManagerForTesting(media::AudioManager* audio_manager);
42 
43   SpeechRecognizerImpl(SpeechRecognitionEventListener* listener,
44                        int session_id,
45                        bool continuous,
46                        bool provisional_results,
47                        SpeechRecognitionEngine* engine);
48 
49   virtual void StartRecognition(const std::string& device_id) OVERRIDE;
50   virtual void AbortRecognition() OVERRIDE;
51   virtual void StopAudioCapture() OVERRIDE;
52   virtual bool IsActive() const OVERRIDE;
53   virtual bool IsCapturingAudio() const OVERRIDE;
54   const SpeechRecognitionEngine& recognition_engine() const;
55 
56  private:
57   friend class SpeechRecognizerTest;
58 
59   enum FSMState {
60     STATE_IDLE = 0,
61     STATE_STARTING,
62     STATE_ESTIMATING_ENVIRONMENT,
63     STATE_WAITING_FOR_SPEECH,
64     STATE_RECOGNIZING,
65     STATE_WAITING_FINAL_RESULT,
66     STATE_ENDED,
67     STATE_MAX_VALUE = STATE_ENDED
68   };
69 
70   enum FSMEvent {
71     EVENT_ABORT = 0,
72     EVENT_START,
73     EVENT_STOP_CAPTURE,
74     EVENT_AUDIO_DATA,
75     EVENT_ENGINE_RESULT,
76     EVENT_ENGINE_ERROR,
77     EVENT_AUDIO_ERROR,
78     EVENT_MAX_VALUE = EVENT_AUDIO_ERROR
79   };
80 
81   struct FSMEventArgs {
82     explicit FSMEventArgs(FSMEvent event_value);
83     ~FSMEventArgs();
84 
85     FSMEvent event;
86     scoped_refptr<AudioChunk> audio_data;
87     SpeechRecognitionResults engine_results;
88     SpeechRecognitionError engine_error;
89   };
90 
91   virtual ~SpeechRecognizerImpl();
92 
93   // Entry point for pushing any new external event into the recognizer FSM.
94   void DispatchEvent(const FSMEventArgs& event_args);
95 
96   // Defines the behavior of the recognizer FSM, selecting the appropriate
97   // transition according to the current state and event.
98   FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& args);
99 
100   // Process a new audio chunk in the audio pipeline (endpointer, vumeter, etc).
101   void ProcessAudioPipeline(const AudioChunk& raw_audio);
102 
103   // The methods below handle transitions of the recognizer FSM.
104   FSMState StartRecording(const FSMEventArgs& event_args);
105   FSMState StartRecognitionEngine(const FSMEventArgs& event_args);
106   FSMState WaitEnvironmentEstimationCompletion(const FSMEventArgs& event_args);
107   FSMState DetectUserSpeechOrTimeout(const FSMEventArgs& event_args);
108   FSMState StopCaptureAndWaitForResult(const FSMEventArgs& event_args);
109   FSMState ProcessIntermediateResult(const FSMEventArgs& event_args);
110   FSMState ProcessFinalResult(const FSMEventArgs& event_args);
111   FSMState AbortSilently(const FSMEventArgs& event_args);
112   FSMState AbortWithError(const FSMEventArgs& event_args);
113   FSMState Abort(const SpeechRecognitionError& error);
114   FSMState DetectEndOfSpeech(const FSMEventArgs& event_args);
115   FSMState DoNothing(const FSMEventArgs& event_args) const;
116   FSMState NotFeasible(const FSMEventArgs& event_args);
117 
118   // Returns the time span of captured audio samples since the start of capture.
119   int GetElapsedTimeMs() const;
120 
121   // Calculates the input volume to be displayed in the UI, triggering the
122   // OnAudioLevelsChange event accordingly.
123   void UpdateSignalAndNoiseLevels(const float& rms, bool clip_detected);
124 
125   void CloseAudioControllerAsynchronously();
126 
127   // Callback called on IO thread by audio_controller->Close().
128   void OnAudioClosed(media::AudioInputController*);
129 
130   // AudioInputController::EventHandler methods.
OnCreated(media::AudioInputController * controller)131   virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {}
OnRecording(media::AudioInputController * controller)132   virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {}
133   virtual void OnError(media::AudioInputController* controller,
134       media::AudioInputController::ErrorCode error_code) OVERRIDE;
135   virtual void OnData(media::AudioInputController* controller,
136                       const media::AudioBus* data) OVERRIDE;
OnLog(media::AudioInputController * controller,const std::string & message)137   virtual void OnLog(media::AudioInputController* controller,
138                      const std::string& message) OVERRIDE {}
139 
140   // SpeechRecognitionEngineDelegate methods.
141   virtual void OnSpeechRecognitionEngineResults(
142       const SpeechRecognitionResults& results) OVERRIDE;
143   virtual void OnSpeechRecognitionEngineError(
144       const SpeechRecognitionError& error) OVERRIDE;
145 
146   static media::AudioManager* audio_manager_for_tests_;
147 
148   scoped_ptr<SpeechRecognitionEngine> recognition_engine_;
149   Endpointer endpointer_;
150   scoped_refptr<media::AudioInputController> audio_controller_;
151   int num_samples_recorded_;
152   float audio_level_;
153   bool is_dispatching_event_;
154   bool provisional_results_;
155   FSMState state_;
156   std::string device_id_;
157 
158   class OnDataConverter;
159 
160   // Converts data between native input format and a WebSpeech specific
161   // output format.
162   scoped_ptr<SpeechRecognizerImpl::OnDataConverter> audio_converter_;
163 
164   DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);
165 };
166 
167 }  // namespace content
168 
169 #endif  // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
170