1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/speech/tts_controller.h"
6
7 #include <string>
8 #include <vector>
9
10 #include "base/float_util.h"
11 #include "base/values.h"
12 #include "chrome/browser/browser_process.h"
13 #include "chrome/browser/profiles/profile.h"
14 #include "chrome/browser/speech/extension_api/tts_engine_extension_api.h"
15 #include "chrome/browser/speech/extension_api/tts_extension_api.h"
16 #include "chrome/browser/speech/tts_platform.h"
17 #include "chrome/common/extensions/api/speech/tts_engine_manifest_handler.h"
18 #include "extensions/browser/extension_system.h"
19 #include "extensions/common/extension.h"
20
21 namespace {
22 // A value to be used to indicate that there is no char index available.
23 const int kInvalidCharIndex = -1;
24
25 // Given a language/region code of the form 'fr-FR', returns just the basic
26 // language portion, e.g. 'fr'.
TrimLanguageCode(std::string lang)27 std::string TrimLanguageCode(std::string lang) {
28 if (lang.size() >= 5 && lang[2] == '-')
29 return lang.substr(0, 2);
30 else
31 return lang;
32 }
33
34 } // namespace
35
IsFinalTtsEventType(TtsEventType event_type)36 bool IsFinalTtsEventType(TtsEventType event_type) {
37 return (event_type == TTS_EVENT_END ||
38 event_type == TTS_EVENT_INTERRUPTED ||
39 event_type == TTS_EVENT_CANCELLED ||
40 event_type == TTS_EVENT_ERROR);
41 }
42
43 //
44 // UtteranceContinuousParameters
45 //
46
47
UtteranceContinuousParameters()48 UtteranceContinuousParameters::UtteranceContinuousParameters()
49 : rate(-1),
50 pitch(-1),
51 volume(-1) {}
52
53
54 //
55 // VoiceData
56 //
57
58
VoiceData()59 VoiceData::VoiceData()
60 : gender(TTS_GENDER_NONE),
61 remote(false),
62 native(false) {}
63
~VoiceData()64 VoiceData::~VoiceData() {}
65
66
67 //
68 // Utterance
69 //
70
71 // static
72 int Utterance::next_utterance_id_ = 0;
73
Utterance(Profile * profile)74 Utterance::Utterance(Profile* profile)
75 : profile_(profile),
76 id_(next_utterance_id_++),
77 src_id_(-1),
78 gender_(TTS_GENDER_NONE),
79 can_enqueue_(false),
80 char_index_(0),
81 finished_(false) {
82 options_.reset(new base::DictionaryValue());
83 }
84
~Utterance()85 Utterance::~Utterance() {
86 DCHECK(finished_);
87 }
88
OnTtsEvent(TtsEventType event_type,int char_index,const std::string & error_message)89 void Utterance::OnTtsEvent(TtsEventType event_type,
90 int char_index,
91 const std::string& error_message) {
92 if (char_index >= 0)
93 char_index_ = char_index;
94 if (IsFinalTtsEventType(event_type))
95 finished_ = true;
96
97 if (event_delegate_)
98 event_delegate_->OnTtsEvent(this, event_type, char_index, error_message);
99 if (finished_)
100 event_delegate_.reset();
101 }
102
Finish()103 void Utterance::Finish() {
104 finished_ = true;
105 }
106
set_options(const base::Value * options)107 void Utterance::set_options(const base::Value* options) {
108 options_.reset(options->DeepCopy());
109 }
110
111 //
112 // TtsController
113 //
114
115 // static
GetInstance()116 TtsController* TtsController::GetInstance() {
117 return Singleton<TtsController>::get();
118 }
119
TtsController()120 TtsController::TtsController()
121 : current_utterance_(NULL),
122 paused_(false),
123 platform_impl_(NULL) {
124 }
125
~TtsController()126 TtsController::~TtsController() {
127 if (current_utterance_) {
128 current_utterance_->Finish();
129 delete current_utterance_;
130 }
131
132 // Clear any queued utterances too.
133 ClearUtteranceQueue(false); // Don't sent events.
134 }
135
SpeakOrEnqueue(Utterance * utterance)136 void TtsController::SpeakOrEnqueue(Utterance* utterance) {
137 // If we're paused and we get an utterance that can't be queued,
138 // flush the queue but stay in the paused state.
139 if (paused_ && !utterance->can_enqueue()) {
140 Stop();
141 paused_ = true;
142 return;
143 }
144
145 if (paused_ || (IsSpeaking() && utterance->can_enqueue())) {
146 utterance_queue_.push(utterance);
147 } else {
148 Stop();
149 SpeakNow(utterance);
150 }
151 }
152
SpeakNow(Utterance * utterance)153 void TtsController::SpeakNow(Utterance* utterance) {
154 // Get all available voices and try to find a matching voice.
155 std::vector<VoiceData> voices;
156 GetVoices(utterance->profile(), &voices);
157 int index = GetMatchingVoice(utterance, voices);
158
159 VoiceData voice;
160 if (index != -1) {
161 // Select the matching voice.
162 voice = voices[index];
163 } else {
164 // However, if no match was found on a platform without native tts voices,
165 // attempt to get a voice based only on the current locale without respect
166 // to any supplied voice names.
167 std::vector<VoiceData> native_voices;
168
169 if (GetPlatformImpl()->PlatformImplAvailable())
170 GetPlatformImpl()->GetVoices(&native_voices);
171
172 if (native_voices.empty() && !voices.empty()) {
173 // TODO(dtseng): Notify extension caller of an error.
174 utterance->set_voice_name("");
175 utterance->set_lang(g_browser_process->GetApplicationLocale());
176 index = GetMatchingVoice(utterance, voices);
177
178 // If even that fails, just take the first available voice.
179 if (index == -1)
180 index = 0;
181 voice = voices[index];
182 } else {
183 // Otherwise, simply give native voices a chance to handle this utterance.
184 voice.native = true;
185 }
186 }
187
188 GetPlatformImpl()->WillSpeakUtteranceWithVoice(utterance, voice);
189
190 if (!voice.native) {
191 #if !defined(OS_ANDROID)
192 DCHECK(!voice.extension_id.empty());
193 current_utterance_ = utterance;
194 utterance->set_extension_id(voice.extension_id);
195 ExtensionTtsEngineSpeak(utterance, voice);
196 bool sends_end_event =
197 voice.events.find(TTS_EVENT_END) != voice.events.end();
198 if (!sends_end_event) {
199 utterance->Finish();
200 delete utterance;
201 current_utterance_ = NULL;
202 SpeakNextUtterance();
203 }
204 #endif
205 } else {
206 // It's possible for certain platforms to send start events immediately
207 // during |speak|.
208 current_utterance_ = utterance;
209 GetPlatformImpl()->clear_error();
210 bool success = GetPlatformImpl()->Speak(
211 utterance->id(),
212 utterance->text(),
213 utterance->lang(),
214 voice,
215 utterance->continuous_parameters());
216 if (!success)
217 current_utterance_ = NULL;
218
219 // If the native voice wasn't able to process this speech, see if
220 // the browser has built-in TTS that isn't loaded yet.
221 if (!success &&
222 GetPlatformImpl()->LoadBuiltInTtsExtension(utterance->profile())) {
223 utterance_queue_.push(utterance);
224 return;
225 }
226
227 if (!success) {
228 utterance->OnTtsEvent(TTS_EVENT_ERROR, kInvalidCharIndex,
229 GetPlatformImpl()->error());
230 delete utterance;
231 return;
232 }
233 }
234 }
235
Stop()236 void TtsController::Stop() {
237 paused_ = false;
238 if (current_utterance_ && !current_utterance_->extension_id().empty()) {
239 #if !defined(OS_ANDROID)
240 ExtensionTtsEngineStop(current_utterance_);
241 #endif
242 } else {
243 GetPlatformImpl()->clear_error();
244 GetPlatformImpl()->StopSpeaking();
245 }
246
247 if (current_utterance_)
248 current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
249 std::string());
250 FinishCurrentUtterance();
251 ClearUtteranceQueue(true); // Send events.
252 }
253
Pause()254 void TtsController::Pause() {
255 paused_ = true;
256 if (current_utterance_ && !current_utterance_->extension_id().empty()) {
257 #if !defined(OS_ANDROID)
258 ExtensionTtsEnginePause(current_utterance_);
259 #endif
260 } else if (current_utterance_) {
261 GetPlatformImpl()->clear_error();
262 GetPlatformImpl()->Pause();
263 }
264 }
265
Resume()266 void TtsController::Resume() {
267 paused_ = false;
268 if (current_utterance_ && !current_utterance_->extension_id().empty()) {
269 #if !defined(OS_ANDROID)
270 ExtensionTtsEngineResume(current_utterance_);
271 #endif
272 } else if (current_utterance_) {
273 GetPlatformImpl()->clear_error();
274 GetPlatformImpl()->Resume();
275 } else {
276 SpeakNextUtterance();
277 }
278 }
279
OnTtsEvent(int utterance_id,TtsEventType event_type,int char_index,const std::string & error_message)280 void TtsController::OnTtsEvent(int utterance_id,
281 TtsEventType event_type,
282 int char_index,
283 const std::string& error_message) {
284 // We may sometimes receive completion callbacks "late", after we've
285 // already finished the utterance (for example because another utterance
286 // interrupted or we got a call to Stop). This is normal and we can
287 // safely just ignore these events.
288 if (!current_utterance_ || utterance_id != current_utterance_->id()) {
289 return;
290 }
291 current_utterance_->OnTtsEvent(event_type, char_index, error_message);
292 if (current_utterance_->finished()) {
293 FinishCurrentUtterance();
294 SpeakNextUtterance();
295 }
296 }
297
GetVoices(Profile * profile,std::vector<VoiceData> * out_voices)298 void TtsController::GetVoices(Profile* profile,
299 std::vector<VoiceData>* out_voices) {
300 #if !defined(OS_ANDROID)
301 if (profile)
302 GetExtensionVoices(profile, out_voices);
303 #endif
304
305 TtsPlatformImpl* platform_impl = GetPlatformImpl();
306 if (platform_impl && platform_impl->PlatformImplAvailable())
307 platform_impl->GetVoices(out_voices);
308 }
309
IsSpeaking()310 bool TtsController::IsSpeaking() {
311 return current_utterance_ != NULL || GetPlatformImpl()->IsSpeaking();
312 }
313
FinishCurrentUtterance()314 void TtsController::FinishCurrentUtterance() {
315 if (current_utterance_) {
316 if (!current_utterance_->finished())
317 current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
318 std::string());
319 delete current_utterance_;
320 current_utterance_ = NULL;
321 }
322 }
323
SpeakNextUtterance()324 void TtsController::SpeakNextUtterance() {
325 if (paused_)
326 return;
327
328 // Start speaking the next utterance in the queue. Keep trying in case
329 // one fails but there are still more in the queue to try.
330 while (!utterance_queue_.empty() && !current_utterance_) {
331 Utterance* utterance = utterance_queue_.front();
332 utterance_queue_.pop();
333 SpeakNow(utterance);
334 }
335 }
336
RetrySpeakingQueuedUtterances()337 void TtsController::RetrySpeakingQueuedUtterances() {
338 if (current_utterance_ == NULL && !utterance_queue_.empty())
339 SpeakNextUtterance();
340 }
341
ClearUtteranceQueue(bool send_events)342 void TtsController::ClearUtteranceQueue(bool send_events) {
343 while (!utterance_queue_.empty()) {
344 Utterance* utterance = utterance_queue_.front();
345 utterance_queue_.pop();
346 if (send_events)
347 utterance->OnTtsEvent(TTS_EVENT_CANCELLED, kInvalidCharIndex,
348 std::string());
349 else
350 utterance->Finish();
351 delete utterance;
352 }
353 }
354
SetPlatformImpl(TtsPlatformImpl * platform_impl)355 void TtsController::SetPlatformImpl(
356 TtsPlatformImpl* platform_impl) {
357 platform_impl_ = platform_impl;
358 }
359
QueueSize()360 int TtsController::QueueSize() {
361 return static_cast<int>(utterance_queue_.size());
362 }
363
GetPlatformImpl()364 TtsPlatformImpl* TtsController::GetPlatformImpl() {
365 if (!platform_impl_)
366 platform_impl_ = TtsPlatformImpl::GetInstance();
367 return platform_impl_;
368 }
369
GetMatchingVoice(const Utterance * utterance,std::vector<VoiceData> & voices)370 int TtsController::GetMatchingVoice(
371 const Utterance* utterance, std::vector<VoiceData>& voices) {
372 // Make two passes: the first time, do strict language matching
373 // ('fr-FR' does not match 'fr-CA'). The second time, do prefix
374 // language matching ('fr-FR' matches 'fr' and 'fr-CA')
375 for (int pass = 0; pass < 2; ++pass) {
376 for (size_t i = 0; i < voices.size(); ++i) {
377 const VoiceData& voice = voices[i];
378
379 if (!utterance->extension_id().empty() &&
380 utterance->extension_id() != voice.extension_id) {
381 continue;
382 }
383
384 if (!voice.name.empty() &&
385 !utterance->voice_name().empty() &&
386 voice.name != utterance->voice_name()) {
387 continue;
388 }
389 if (!voice.lang.empty() && !utterance->lang().empty()) {
390 std::string voice_lang = voice.lang;
391 std::string utterance_lang = utterance->lang();
392 if (pass == 1) {
393 voice_lang = TrimLanguageCode(voice_lang);
394 utterance_lang = TrimLanguageCode(utterance_lang);
395 }
396 if (voice_lang != utterance_lang) {
397 continue;
398 }
399 }
400 if (voice.gender != TTS_GENDER_NONE &&
401 utterance->gender() != TTS_GENDER_NONE &&
402 voice.gender != utterance->gender()) {
403 continue;
404 }
405
406 if (utterance->required_event_types().size() > 0) {
407 bool has_all_required_event_types = true;
408 for (std::set<TtsEventType>::const_iterator iter =
409 utterance->required_event_types().begin();
410 iter != utterance->required_event_types().end();
411 ++iter) {
412 if (voice.events.find(*iter) == voice.events.end()) {
413 has_all_required_event_types = false;
414 break;
415 }
416 }
417 if (!has_all_required_event_types)
418 continue;
419 }
420
421 return static_cast<int>(i);
422 }
423 }
424
425 return -1;
426 }
427
VoicesChanged()428 void TtsController::VoicesChanged() {
429 for (std::set<VoicesChangedDelegate*>::iterator iter =
430 voices_changed_delegates_.begin();
431 iter != voices_changed_delegates_.end(); ++iter) {
432 (*iter)->OnVoicesChanged();
433 }
434 }
435
AddVoicesChangedDelegate(VoicesChangedDelegate * delegate)436 void TtsController::AddVoicesChangedDelegate(VoicesChangedDelegate* delegate) {
437 voices_changed_delegates_.insert(delegate);
438 }
439
RemoveVoicesChangedDelegate(VoicesChangedDelegate * delegate)440 void TtsController::RemoveVoicesChangedDelegate(
441 VoicesChangedDelegate* delegate) {
442 voices_changed_delegates_.erase(delegate);
443 }
444