1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "content/browser/speech/speech_input_manager.h"
6
7 #include <map>
8 #include <string>
9
10 #include "base/lazy_instance.h"
11 #include "base/memory/ref_counted.h"
12 #include "base/synchronization/lock.h"
13 #include "base/threading/thread_restrictions.h"
14 #include "base/utf_string_conversions.h"
15 #include "chrome/browser/browser_process.h"
16 #include "chrome/browser/platform_util.h"
17 #include "chrome/browser/prefs/pref_service.h"
18 #include "chrome/browser/speech/speech_input_bubble_controller.h"
19 #include "chrome/browser/tab_contents/tab_util.h"
20 #include "chrome/common/chrome_switches.h"
21 #include "chrome/common/pref_names.h"
22 #include "content/browser/browser_thread.h"
23 #include "content/browser/speech/speech_recognizer.h"
24 #include "grit/generated_resources.h"
25 #include "media/audio/audio_manager.h"
26 #include "ui/base/l10n/l10n_util.h"
27
28 #if defined(OS_WIN)
29 #include "chrome/installer/util/wmi.h"
30 #endif
31
32 namespace speech_input {
33
34 namespace {
35
36 // Asynchronously fetches the PC and audio hardware/driver info if
37 // the user has opted into UMA. This information is sent with speech input
38 // requests to the server for identifying and improving quality issues with
39 // specific device configurations.
40 class OptionalRequestInfo
41 : public base::RefCountedThreadSafe<OptionalRequestInfo> {
42 public:
OptionalRequestInfo()43 OptionalRequestInfo() : can_report_metrics_(false) {}
44
Refresh()45 void Refresh() {
46 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
47 // UMA opt-in can be checked only from the UI thread, so switch to that.
48 BrowserThread::PostTask(BrowserThread::UI, FROM_HERE,
49 NewRunnableMethod(this,
50 &OptionalRequestInfo::CheckUMAAndGetHardwareInfo));
51 }
52
CheckUMAAndGetHardwareInfo()53 void CheckUMAAndGetHardwareInfo() {
54 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
55 if (g_browser_process->local_state()->GetBoolean(
56 prefs::kMetricsReportingEnabled)) {
57 // Access potentially slow OS calls from the FILE thread.
58 BrowserThread::PostTask(BrowserThread::FILE, FROM_HERE,
59 NewRunnableMethod(this, &OptionalRequestInfo::GetHardwareInfo));
60 }
61 }
62
GetHardwareInfo()63 void GetHardwareInfo() {
64 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
65 base::AutoLock lock(lock_);
66 can_report_metrics_ = true;
67 #if defined(OS_WIN)
68 value_ = UTF16ToUTF8(
69 installer::WMIComputerSystem::GetModel() + L"|" +
70 AudioManager::GetAudioManager()->GetAudioInputDeviceModel());
71 #else // defined(OS_WIN)
72 value_ = UTF16ToUTF8(
73 AudioManager::GetAudioManager()->GetAudioInputDeviceModel());
74 #endif // defined(OS_WIN)
75 }
76
value()77 std::string value() {
78 base::AutoLock lock(lock_);
79 return value_;
80 }
81
can_report_metrics()82 bool can_report_metrics() {
83 base::AutoLock lock(lock_);
84 return can_report_metrics_;
85 }
86
87 private:
88 base::Lock lock_;
89 std::string value_;
90 bool can_report_metrics_;
91
92 DISALLOW_COPY_AND_ASSIGN(OptionalRequestInfo);
93 };
94
95 class SpeechInputManagerImpl : public SpeechInputManager,
96 public SpeechInputBubbleControllerDelegate,
97 public SpeechRecognizerDelegate {
98 public:
99 // SpeechInputManager methods.
100 virtual void StartRecognition(SpeechInputManagerDelegate* delegate,
101 int caller_id,
102 int render_process_id,
103 int render_view_id,
104 const gfx::Rect& element_rect,
105 const std::string& language,
106 const std::string& grammar,
107 const std::string& origin_url);
108 virtual void CancelRecognition(int caller_id);
109 virtual void StopRecording(int caller_id);
110 virtual void CancelAllRequestsWithDelegate(
111 SpeechInputManagerDelegate* delegate);
112
113 // SpeechRecognizer::Delegate methods.
114 virtual void DidStartReceivingAudio(int caller_id);
115 virtual void SetRecognitionResult(int caller_id,
116 bool error,
117 const SpeechInputResultArray& result);
118 virtual void DidCompleteRecording(int caller_id);
119 virtual void DidCompleteRecognition(int caller_id);
120 virtual void OnRecognizerError(int caller_id,
121 SpeechRecognizer::ErrorCode error);
122 virtual void DidCompleteEnvironmentEstimation(int caller_id);
123 virtual void SetInputVolume(int caller_id, float volume, float noise_volume);
124
125 // SpeechInputBubbleController::Delegate methods.
126 virtual void InfoBubbleButtonClicked(int caller_id,
127 SpeechInputBubble::Button button);
128 virtual void InfoBubbleFocusChanged(int caller_id);
129
130 private:
131 struct SpeechInputRequest {
132 SpeechInputManagerDelegate* delegate;
133 scoped_refptr<SpeechRecognizer> recognizer;
134 bool is_active; // Set to true when recording or recognition is going on.
135 };
136
137 // Private constructor to enforce singleton.
138 friend struct base::DefaultLazyInstanceTraits<SpeechInputManagerImpl>;
139 SpeechInputManagerImpl();
140 virtual ~SpeechInputManagerImpl();
141
142 bool HasPendingRequest(int caller_id) const;
143 SpeechInputManagerDelegate* GetDelegate(int caller_id) const;
144
145 void CancelRecognitionAndInformDelegate(int caller_id);
146
147 // Starts/restarts recognition for an existing request.
148 void StartRecognitionForRequest(int caller_id);
149
150 typedef std::map<int, SpeechInputRequest> SpeechRecognizerMap;
151 SpeechRecognizerMap requests_;
152 int recording_caller_id_;
153 scoped_refptr<SpeechInputBubbleController> bubble_controller_;
154 scoped_refptr<OptionalRequestInfo> optional_request_info_;
155 };
156
157 base::LazyInstance<SpeechInputManagerImpl> g_speech_input_manager_impl(
158 base::LINKER_INITIALIZED);
159
160 } // namespace
161
Get()162 SpeechInputManager* SpeechInputManager::Get() {
163 return g_speech_input_manager_impl.Pointer();
164 }
165
ShowAudioInputSettings()166 void SpeechInputManager::ShowAudioInputSettings() {
167 // Since AudioManager::ShowAudioInputSettings can potentially launch external
168 // processes, do that in the FILE thread to not block the calling threads.
169 if (!BrowserThread::CurrentlyOn(BrowserThread::FILE)) {
170 BrowserThread::PostTask(
171 BrowserThread::FILE, FROM_HERE,
172 NewRunnableFunction(&SpeechInputManager::ShowAudioInputSettings));
173 return;
174 }
175
176 DCHECK(AudioManager::GetAudioManager()->CanShowAudioInputSettings());
177 if (AudioManager::GetAudioManager()->CanShowAudioInputSettings())
178 AudioManager::GetAudioManager()->ShowAudioInputSettings();
179 }
180
SpeechInputManagerImpl()181 SpeechInputManagerImpl::SpeechInputManagerImpl()
182 : recording_caller_id_(0),
183 bubble_controller_(new SpeechInputBubbleController(
184 ALLOW_THIS_IN_INITIALIZER_LIST(this))) {
185 }
186
~SpeechInputManagerImpl()187 SpeechInputManagerImpl::~SpeechInputManagerImpl() {
188 while (requests_.begin() != requests_.end())
189 CancelRecognition(requests_.begin()->first);
190 }
191
HasPendingRequest(int caller_id) const192 bool SpeechInputManagerImpl::HasPendingRequest(int caller_id) const {
193 return requests_.find(caller_id) != requests_.end();
194 }
195
GetDelegate(int caller_id) const196 SpeechInputManagerDelegate* SpeechInputManagerImpl::GetDelegate(
197 int caller_id) const {
198 return requests_.find(caller_id)->second.delegate;
199 }
200
StartRecognition(SpeechInputManagerDelegate * delegate,int caller_id,int render_process_id,int render_view_id,const gfx::Rect & element_rect,const std::string & language,const std::string & grammar,const std::string & origin_url)201 void SpeechInputManagerImpl::StartRecognition(
202 SpeechInputManagerDelegate* delegate,
203 int caller_id,
204 int render_process_id,
205 int render_view_id,
206 const gfx::Rect& element_rect,
207 const std::string& language,
208 const std::string& grammar,
209 const std::string& origin_url) {
210 DCHECK(!HasPendingRequest(caller_id));
211
212 bubble_controller_->CreateBubble(caller_id, render_process_id, render_view_id,
213 element_rect);
214
215 if (!optional_request_info_.get()) {
216 optional_request_info_ = new OptionalRequestInfo();
217 // Since hardware info is optional with speech input requests, we start an
218 // asynchronous fetch here and move on with recording audio. This first
219 // speech input request would send an empty string for hardware info and
220 // subsequent requests may have the hardware info available if the fetch
221 // completed before them. This way we don't end up stalling the user with
222 // a long wait and disk seeks when they click on a UI element and start
223 // speaking.
224 optional_request_info_->Refresh();
225 }
226
227 SpeechInputRequest* request = &requests_[caller_id];
228 request->delegate = delegate;
229 request->recognizer = new SpeechRecognizer(
230 this, caller_id, language, grammar, optional_request_info_->value(),
231 optional_request_info_->can_report_metrics() ? origin_url : "");
232 request->is_active = false;
233
234 StartRecognitionForRequest(caller_id);
235 }
236
StartRecognitionForRequest(int caller_id)237 void SpeechInputManagerImpl::StartRecognitionForRequest(int caller_id) {
238 DCHECK(HasPendingRequest(caller_id));
239
240 // If we are currently recording audio for another caller, abort that cleanly.
241 if (recording_caller_id_)
242 CancelRecognitionAndInformDelegate(recording_caller_id_);
243
244 if (!AudioManager::GetAudioManager()->HasAudioInputDevices()) {
245 bubble_controller_->SetBubbleMessage(
246 caller_id, l10n_util::GetStringUTF16(IDS_SPEECH_INPUT_NO_MIC));
247 } else {
248 recording_caller_id_ = caller_id;
249 requests_[caller_id].is_active = true;
250 requests_[caller_id].recognizer->StartRecording();
251 bubble_controller_->SetBubbleWarmUpMode(caller_id);
252 }
253 }
254
CancelRecognition(int caller_id)255 void SpeechInputManagerImpl::CancelRecognition(int caller_id) {
256 DCHECK(HasPendingRequest(caller_id));
257 if (requests_[caller_id].is_active)
258 requests_[caller_id].recognizer->CancelRecognition();
259 requests_.erase(caller_id);
260 if (recording_caller_id_ == caller_id)
261 recording_caller_id_ = 0;
262 bubble_controller_->CloseBubble(caller_id);
263 }
264
CancelAllRequestsWithDelegate(SpeechInputManagerDelegate * delegate)265 void SpeechInputManagerImpl::CancelAllRequestsWithDelegate(
266 SpeechInputManagerDelegate* delegate) {
267 SpeechRecognizerMap::iterator it = requests_.begin();
268 while (it != requests_.end()) {
269 if (it->second.delegate == delegate) {
270 CancelRecognition(it->first);
271 // This map will have very few elements so it is simpler to restart.
272 it = requests_.begin();
273 } else {
274 ++it;
275 }
276 }
277 }
278
StopRecording(int caller_id)279 void SpeechInputManagerImpl::StopRecording(int caller_id) {
280 DCHECK(HasPendingRequest(caller_id));
281 requests_[caller_id].recognizer->StopRecording();
282 }
283
SetRecognitionResult(int caller_id,bool error,const SpeechInputResultArray & result)284 void SpeechInputManagerImpl::SetRecognitionResult(
285 int caller_id, bool error, const SpeechInputResultArray& result) {
286 DCHECK(HasPendingRequest(caller_id));
287 GetDelegate(caller_id)->SetRecognitionResult(caller_id, result);
288 }
289
DidCompleteRecording(int caller_id)290 void SpeechInputManagerImpl::DidCompleteRecording(int caller_id) {
291 DCHECK(recording_caller_id_ == caller_id);
292 DCHECK(HasPendingRequest(caller_id));
293 recording_caller_id_ = 0;
294 GetDelegate(caller_id)->DidCompleteRecording(caller_id);
295 bubble_controller_->SetBubbleRecognizingMode(caller_id);
296 }
297
DidCompleteRecognition(int caller_id)298 void SpeechInputManagerImpl::DidCompleteRecognition(int caller_id) {
299 GetDelegate(caller_id)->DidCompleteRecognition(caller_id);
300 requests_.erase(caller_id);
301 bubble_controller_->CloseBubble(caller_id);
302 }
303
OnRecognizerError(int caller_id,SpeechRecognizer::ErrorCode error)304 void SpeechInputManagerImpl::OnRecognizerError(
305 int caller_id, SpeechRecognizer::ErrorCode error) {
306 if (caller_id == recording_caller_id_)
307 recording_caller_id_ = 0;
308
309 requests_[caller_id].is_active = false;
310
311 struct ErrorMessageMapEntry {
312 SpeechRecognizer::ErrorCode error;
313 int message_id;
314 };
315 ErrorMessageMapEntry error_message_map[] = {
316 {
317 SpeechRecognizer::RECOGNIZER_ERROR_CAPTURE, IDS_SPEECH_INPUT_MIC_ERROR
318 }, {
319 SpeechRecognizer::RECOGNIZER_ERROR_NO_SPEECH, IDS_SPEECH_INPUT_NO_SPEECH
320 }, {
321 SpeechRecognizer::RECOGNIZER_ERROR_NO_RESULTS, IDS_SPEECH_INPUT_NO_RESULTS
322 }, {
323 SpeechRecognizer::RECOGNIZER_ERROR_NETWORK, IDS_SPEECH_INPUT_NET_ERROR
324 }
325 };
326 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(error_message_map); ++i) {
327 if (error_message_map[i].error == error) {
328 bubble_controller_->SetBubbleMessage(
329 caller_id,
330 l10n_util::GetStringUTF16(error_message_map[i].message_id));
331 return;
332 }
333 }
334
335 NOTREACHED() << "unknown error " << error;
336 }
337
DidStartReceivingAudio(int caller_id)338 void SpeechInputManagerImpl::DidStartReceivingAudio(int caller_id) {
339 DCHECK(HasPendingRequest(caller_id));
340 DCHECK(recording_caller_id_ == caller_id);
341 bubble_controller_->SetBubbleRecordingMode(caller_id);
342 }
343
DidCompleteEnvironmentEstimation(int caller_id)344 void SpeechInputManagerImpl::DidCompleteEnvironmentEstimation(int caller_id) {
345 DCHECK(HasPendingRequest(caller_id));
346 DCHECK(recording_caller_id_ == caller_id);
347 }
348
SetInputVolume(int caller_id,float volume,float noise_volume)349 void SpeechInputManagerImpl::SetInputVolume(int caller_id, float volume,
350 float noise_volume) {
351 DCHECK(HasPendingRequest(caller_id));
352 DCHECK_EQ(recording_caller_id_, caller_id);
353
354 bubble_controller_->SetBubbleInputVolume(caller_id, volume, noise_volume);
355 }
356
CancelRecognitionAndInformDelegate(int caller_id)357 void SpeechInputManagerImpl::CancelRecognitionAndInformDelegate(int caller_id) {
358 SpeechInputManagerDelegate* cur_delegate = GetDelegate(caller_id);
359 CancelRecognition(caller_id);
360 cur_delegate->DidCompleteRecording(caller_id);
361 cur_delegate->DidCompleteRecognition(caller_id);
362 }
363
InfoBubbleButtonClicked(int caller_id,SpeechInputBubble::Button button)364 void SpeechInputManagerImpl::InfoBubbleButtonClicked(
365 int caller_id, SpeechInputBubble::Button button) {
366 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
367 // Ignore if the caller id was not in our active recognizers list because the
368 // user might have clicked more than once, or recognition could have been
369 // cancelled due to other reasons before the user click was processed.
370 if (!HasPendingRequest(caller_id))
371 return;
372
373 if (button == SpeechInputBubble::BUTTON_CANCEL) {
374 CancelRecognitionAndInformDelegate(caller_id);
375 } else if (button == SpeechInputBubble::BUTTON_TRY_AGAIN) {
376 StartRecognitionForRequest(caller_id);
377 }
378 }
379
InfoBubbleFocusChanged(int caller_id)380 void SpeechInputManagerImpl::InfoBubbleFocusChanged(int caller_id) {
381 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
382 // Ignore if the caller id was not in our active recognizers list because the
383 // user might have clicked more than once, or recognition could have been
384 // ended due to other reasons before the user click was processed.
385 if (HasPendingRequest(caller_id)) {
386 // If this is an ongoing recording or if we were displaying an error message
387 // to the user, abort it since user has switched focus. Otherwise
388 // recognition has started and keep that going so user can start speaking to
389 // another element while this gets the results in parallel.
390 if (recording_caller_id_ == caller_id || !requests_[caller_id].is_active) {
391 CancelRecognitionAndInformDelegate(caller_id);
392 }
393 }
394 }
395
396 } // namespace speech_input
397