• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "content/browser/speech/speech_input_manager.h"
6 
7 #include <map>
8 #include <string>
9 
10 #include "base/lazy_instance.h"
11 #include "base/memory/ref_counted.h"
12 #include "base/synchronization/lock.h"
13 #include "base/threading/thread_restrictions.h"
14 #include "base/utf_string_conversions.h"
15 #include "chrome/browser/browser_process.h"
16 #include "chrome/browser/platform_util.h"
17 #include "chrome/browser/prefs/pref_service.h"
18 #include "chrome/browser/speech/speech_input_bubble_controller.h"
19 #include "chrome/browser/tab_contents/tab_util.h"
20 #include "chrome/common/chrome_switches.h"
21 #include "chrome/common/pref_names.h"
22 #include "content/browser/browser_thread.h"
23 #include "content/browser/speech/speech_recognizer.h"
24 #include "grit/generated_resources.h"
25 #include "media/audio/audio_manager.h"
26 #include "ui/base/l10n/l10n_util.h"
27 
28 #if defined(OS_WIN)
29 #include "chrome/installer/util/wmi.h"
30 #endif
31 
32 namespace speech_input {
33 
34 namespace {
35 
36 // Asynchronously fetches the PC and audio hardware/driver info if
37 // the user has opted into UMA. This information is sent with speech input
38 // requests to the server for identifying and improving quality issues with
39 // specific device configurations.
40 class OptionalRequestInfo
41     : public base::RefCountedThreadSafe<OptionalRequestInfo> {
42  public:
OptionalRequestInfo()43   OptionalRequestInfo() : can_report_metrics_(false) {}
44 
Refresh()45   void Refresh() {
46     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
47     // UMA opt-in can be checked only from the UI thread, so switch to that.
48     BrowserThread::PostTask(BrowserThread::UI, FROM_HERE,
49         NewRunnableMethod(this,
50                           &OptionalRequestInfo::CheckUMAAndGetHardwareInfo));
51   }
52 
CheckUMAAndGetHardwareInfo()53   void CheckUMAAndGetHardwareInfo() {
54     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
55     if (g_browser_process->local_state()->GetBoolean(
56         prefs::kMetricsReportingEnabled)) {
57       // Access potentially slow OS calls from the FILE thread.
58       BrowserThread::PostTask(BrowserThread::FILE, FROM_HERE,
59           NewRunnableMethod(this, &OptionalRequestInfo::GetHardwareInfo));
60     }
61   }
62 
GetHardwareInfo()63   void GetHardwareInfo() {
64     DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
65     base::AutoLock lock(lock_);
66     can_report_metrics_ = true;
67 #if defined(OS_WIN)
68     value_ = UTF16ToUTF8(
69         installer::WMIComputerSystem::GetModel() + L"|" +
70         AudioManager::GetAudioManager()->GetAudioInputDeviceModel());
71 #else  // defined(OS_WIN)
72     value_ = UTF16ToUTF8(
73         AudioManager::GetAudioManager()->GetAudioInputDeviceModel());
74 #endif  // defined(OS_WIN)
75   }
76 
value()77   std::string value() {
78     base::AutoLock lock(lock_);
79     return value_;
80   }
81 
can_report_metrics()82   bool can_report_metrics() {
83     base::AutoLock lock(lock_);
84     return can_report_metrics_;
85   }
86 
87  private:
88   base::Lock lock_;
89   std::string value_;
90   bool can_report_metrics_;
91 
92   DISALLOW_COPY_AND_ASSIGN(OptionalRequestInfo);
93 };
94 
95 class SpeechInputManagerImpl : public SpeechInputManager,
96                                public SpeechInputBubbleControllerDelegate,
97                                public SpeechRecognizerDelegate {
98  public:
99   // SpeechInputManager methods.
100   virtual void StartRecognition(SpeechInputManagerDelegate* delegate,
101                                 int caller_id,
102                                 int render_process_id,
103                                 int render_view_id,
104                                 const gfx::Rect& element_rect,
105                                 const std::string& language,
106                                 const std::string& grammar,
107                                 const std::string& origin_url);
108   virtual void CancelRecognition(int caller_id);
109   virtual void StopRecording(int caller_id);
110   virtual void CancelAllRequestsWithDelegate(
111       SpeechInputManagerDelegate* delegate);
112 
113   // SpeechRecognizer::Delegate methods.
114   virtual void DidStartReceivingAudio(int caller_id);
115   virtual void SetRecognitionResult(int caller_id,
116                                     bool error,
117                                     const SpeechInputResultArray& result);
118   virtual void DidCompleteRecording(int caller_id);
119   virtual void DidCompleteRecognition(int caller_id);
120   virtual void OnRecognizerError(int caller_id,
121                                  SpeechRecognizer::ErrorCode error);
122   virtual void DidCompleteEnvironmentEstimation(int caller_id);
123   virtual void SetInputVolume(int caller_id, float volume, float noise_volume);
124 
125   // SpeechInputBubbleController::Delegate methods.
126   virtual void InfoBubbleButtonClicked(int caller_id,
127                                        SpeechInputBubble::Button button);
128   virtual void InfoBubbleFocusChanged(int caller_id);
129 
130  private:
131   struct SpeechInputRequest {
132     SpeechInputManagerDelegate* delegate;
133     scoped_refptr<SpeechRecognizer> recognizer;
134     bool is_active;  // Set to true when recording or recognition is going on.
135   };
136 
137   // Private constructor to enforce singleton.
138   friend struct base::DefaultLazyInstanceTraits<SpeechInputManagerImpl>;
139   SpeechInputManagerImpl();
140   virtual ~SpeechInputManagerImpl();
141 
142   bool HasPendingRequest(int caller_id) const;
143   SpeechInputManagerDelegate* GetDelegate(int caller_id) const;
144 
145   void CancelRecognitionAndInformDelegate(int caller_id);
146 
147   // Starts/restarts recognition for an existing request.
148   void StartRecognitionForRequest(int caller_id);
149 
150   typedef std::map<int, SpeechInputRequest> SpeechRecognizerMap;
151   SpeechRecognizerMap requests_;
152   int recording_caller_id_;
153   scoped_refptr<SpeechInputBubbleController> bubble_controller_;
154   scoped_refptr<OptionalRequestInfo> optional_request_info_;
155 };
156 
157 base::LazyInstance<SpeechInputManagerImpl> g_speech_input_manager_impl(
158     base::LINKER_INITIALIZED);
159 
160 }  // namespace
161 
Get()162 SpeechInputManager* SpeechInputManager::Get() {
163   return g_speech_input_manager_impl.Pointer();
164 }
165 
ShowAudioInputSettings()166 void SpeechInputManager::ShowAudioInputSettings() {
167   // Since AudioManager::ShowAudioInputSettings can potentially launch external
168   // processes, do that in the FILE thread to not block the calling threads.
169   if (!BrowserThread::CurrentlyOn(BrowserThread::FILE)) {
170     BrowserThread::PostTask(
171         BrowserThread::FILE, FROM_HERE,
172         NewRunnableFunction(&SpeechInputManager::ShowAudioInputSettings));
173     return;
174   }
175 
176   DCHECK(AudioManager::GetAudioManager()->CanShowAudioInputSettings());
177   if (AudioManager::GetAudioManager()->CanShowAudioInputSettings())
178     AudioManager::GetAudioManager()->ShowAudioInputSettings();
179 }
180 
SpeechInputManagerImpl()181 SpeechInputManagerImpl::SpeechInputManagerImpl()
182     : recording_caller_id_(0),
183       bubble_controller_(new SpeechInputBubbleController(
184           ALLOW_THIS_IN_INITIALIZER_LIST(this))) {
185 }
186 
~SpeechInputManagerImpl()187 SpeechInputManagerImpl::~SpeechInputManagerImpl() {
188   while (requests_.begin() != requests_.end())
189     CancelRecognition(requests_.begin()->first);
190 }
191 
HasPendingRequest(int caller_id) const192 bool SpeechInputManagerImpl::HasPendingRequest(int caller_id) const {
193   return requests_.find(caller_id) != requests_.end();
194 }
195 
GetDelegate(int caller_id) const196 SpeechInputManagerDelegate* SpeechInputManagerImpl::GetDelegate(
197     int caller_id) const {
198   return requests_.find(caller_id)->second.delegate;
199 }
200 
StartRecognition(SpeechInputManagerDelegate * delegate,int caller_id,int render_process_id,int render_view_id,const gfx::Rect & element_rect,const std::string & language,const std::string & grammar,const std::string & origin_url)201 void SpeechInputManagerImpl::StartRecognition(
202     SpeechInputManagerDelegate* delegate,
203     int caller_id,
204     int render_process_id,
205     int render_view_id,
206     const gfx::Rect& element_rect,
207     const std::string& language,
208     const std::string& grammar,
209     const std::string& origin_url) {
210   DCHECK(!HasPendingRequest(caller_id));
211 
212   bubble_controller_->CreateBubble(caller_id, render_process_id, render_view_id,
213                                    element_rect);
214 
215   if (!optional_request_info_.get()) {
216     optional_request_info_ = new OptionalRequestInfo();
217     // Since hardware info is optional with speech input requests, we start an
218     // asynchronous fetch here and move on with recording audio. This first
219     // speech input request would send an empty string for hardware info and
220     // subsequent requests may have the hardware info available if the fetch
221     // completed before them. This way we don't end up stalling the user with
222     // a long wait and disk seeks when they click on a UI element and start
223     // speaking.
224     optional_request_info_->Refresh();
225   }
226 
227   SpeechInputRequest* request = &requests_[caller_id];
228   request->delegate = delegate;
229   request->recognizer = new SpeechRecognizer(
230       this, caller_id, language, grammar, optional_request_info_->value(),
231       optional_request_info_->can_report_metrics() ? origin_url : "");
232   request->is_active = false;
233 
234   StartRecognitionForRequest(caller_id);
235 }
236 
StartRecognitionForRequest(int caller_id)237 void SpeechInputManagerImpl::StartRecognitionForRequest(int caller_id) {
238   DCHECK(HasPendingRequest(caller_id));
239 
240   // If we are currently recording audio for another caller, abort that cleanly.
241   if (recording_caller_id_)
242     CancelRecognitionAndInformDelegate(recording_caller_id_);
243 
244   if (!AudioManager::GetAudioManager()->HasAudioInputDevices()) {
245     bubble_controller_->SetBubbleMessage(
246         caller_id, l10n_util::GetStringUTF16(IDS_SPEECH_INPUT_NO_MIC));
247   } else {
248     recording_caller_id_ = caller_id;
249     requests_[caller_id].is_active = true;
250     requests_[caller_id].recognizer->StartRecording();
251     bubble_controller_->SetBubbleWarmUpMode(caller_id);
252   }
253 }
254 
CancelRecognition(int caller_id)255 void SpeechInputManagerImpl::CancelRecognition(int caller_id) {
256   DCHECK(HasPendingRequest(caller_id));
257   if (requests_[caller_id].is_active)
258     requests_[caller_id].recognizer->CancelRecognition();
259   requests_.erase(caller_id);
260   if (recording_caller_id_ == caller_id)
261     recording_caller_id_ = 0;
262   bubble_controller_->CloseBubble(caller_id);
263 }
264 
CancelAllRequestsWithDelegate(SpeechInputManagerDelegate * delegate)265 void SpeechInputManagerImpl::CancelAllRequestsWithDelegate(
266     SpeechInputManagerDelegate* delegate) {
267   SpeechRecognizerMap::iterator it = requests_.begin();
268   while (it != requests_.end()) {
269     if (it->second.delegate == delegate) {
270       CancelRecognition(it->first);
271       // This map will have very few elements so it is simpler to restart.
272       it = requests_.begin();
273     } else {
274       ++it;
275     }
276   }
277 }
278 
StopRecording(int caller_id)279 void SpeechInputManagerImpl::StopRecording(int caller_id) {
280   DCHECK(HasPendingRequest(caller_id));
281   requests_[caller_id].recognizer->StopRecording();
282 }
283 
SetRecognitionResult(int caller_id,bool error,const SpeechInputResultArray & result)284 void SpeechInputManagerImpl::SetRecognitionResult(
285     int caller_id, bool error, const SpeechInputResultArray& result) {
286   DCHECK(HasPendingRequest(caller_id));
287   GetDelegate(caller_id)->SetRecognitionResult(caller_id, result);
288 }
289 
DidCompleteRecording(int caller_id)290 void SpeechInputManagerImpl::DidCompleteRecording(int caller_id) {
291   DCHECK(recording_caller_id_ == caller_id);
292   DCHECK(HasPendingRequest(caller_id));
293   recording_caller_id_ = 0;
294   GetDelegate(caller_id)->DidCompleteRecording(caller_id);
295   bubble_controller_->SetBubbleRecognizingMode(caller_id);
296 }
297 
DidCompleteRecognition(int caller_id)298 void SpeechInputManagerImpl::DidCompleteRecognition(int caller_id) {
299   GetDelegate(caller_id)->DidCompleteRecognition(caller_id);
300   requests_.erase(caller_id);
301   bubble_controller_->CloseBubble(caller_id);
302 }
303 
OnRecognizerError(int caller_id,SpeechRecognizer::ErrorCode error)304 void SpeechInputManagerImpl::OnRecognizerError(
305     int caller_id, SpeechRecognizer::ErrorCode error) {
306   if (caller_id == recording_caller_id_)
307     recording_caller_id_ = 0;
308 
309   requests_[caller_id].is_active = false;
310 
311   struct ErrorMessageMapEntry {
312     SpeechRecognizer::ErrorCode error;
313     int message_id;
314   };
315   ErrorMessageMapEntry error_message_map[] = {
316     {
317       SpeechRecognizer::RECOGNIZER_ERROR_CAPTURE, IDS_SPEECH_INPUT_MIC_ERROR
318     }, {
319       SpeechRecognizer::RECOGNIZER_ERROR_NO_SPEECH, IDS_SPEECH_INPUT_NO_SPEECH
320     }, {
321       SpeechRecognizer::RECOGNIZER_ERROR_NO_RESULTS, IDS_SPEECH_INPUT_NO_RESULTS
322     }, {
323       SpeechRecognizer::RECOGNIZER_ERROR_NETWORK, IDS_SPEECH_INPUT_NET_ERROR
324     }
325   };
326   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(error_message_map); ++i) {
327     if (error_message_map[i].error == error) {
328       bubble_controller_->SetBubbleMessage(
329           caller_id,
330           l10n_util::GetStringUTF16(error_message_map[i].message_id));
331       return;
332     }
333   }
334 
335   NOTREACHED() << "unknown error " << error;
336 }
337 
DidStartReceivingAudio(int caller_id)338 void SpeechInputManagerImpl::DidStartReceivingAudio(int caller_id) {
339   DCHECK(HasPendingRequest(caller_id));
340   DCHECK(recording_caller_id_ == caller_id);
341   bubble_controller_->SetBubbleRecordingMode(caller_id);
342 }
343 
DidCompleteEnvironmentEstimation(int caller_id)344 void SpeechInputManagerImpl::DidCompleteEnvironmentEstimation(int caller_id) {
345   DCHECK(HasPendingRequest(caller_id));
346   DCHECK(recording_caller_id_ == caller_id);
347 }
348 
SetInputVolume(int caller_id,float volume,float noise_volume)349 void SpeechInputManagerImpl::SetInputVolume(int caller_id, float volume,
350                                             float noise_volume) {
351   DCHECK(HasPendingRequest(caller_id));
352   DCHECK_EQ(recording_caller_id_, caller_id);
353 
354   bubble_controller_->SetBubbleInputVolume(caller_id, volume, noise_volume);
355 }
356 
CancelRecognitionAndInformDelegate(int caller_id)357 void SpeechInputManagerImpl::CancelRecognitionAndInformDelegate(int caller_id) {
358   SpeechInputManagerDelegate* cur_delegate = GetDelegate(caller_id);
359   CancelRecognition(caller_id);
360   cur_delegate->DidCompleteRecording(caller_id);
361   cur_delegate->DidCompleteRecognition(caller_id);
362 }
363 
InfoBubbleButtonClicked(int caller_id,SpeechInputBubble::Button button)364 void SpeechInputManagerImpl::InfoBubbleButtonClicked(
365     int caller_id, SpeechInputBubble::Button button) {
366   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
367   // Ignore if the caller id was not in our active recognizers list because the
368   // user might have clicked more than once, or recognition could have been
369   // cancelled due to other reasons before the user click was processed.
370   if (!HasPendingRequest(caller_id))
371     return;
372 
373   if (button == SpeechInputBubble::BUTTON_CANCEL) {
374     CancelRecognitionAndInformDelegate(caller_id);
375   } else if (button == SpeechInputBubble::BUTTON_TRY_AGAIN) {
376     StartRecognitionForRequest(caller_id);
377   }
378 }
379 
InfoBubbleFocusChanged(int caller_id)380 void SpeechInputManagerImpl::InfoBubbleFocusChanged(int caller_id) {
381   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
382   // Ignore if the caller id was not in our active recognizers list because the
383   // user might have clicked more than once, or recognition could have been
384   // ended due to other reasons before the user click was processed.
385   if (HasPendingRequest(caller_id)) {
386     // If this is an ongoing recording or if we were displaying an error message
387     // to the user, abort it since user has switched focus. Otherwise
388     // recognition has started and keep that going so user can start speaking to
389     // another element while this gets the results in parallel.
390     if (recording_caller_id_ == caller_id || !requests_[caller_id].is_active) {
391       CancelRecognitionAndInformDelegate(caller_id);
392     }
393   }
394 }
395 
396 }  // namespace speech_input
397