1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/speech/tts_controller.h"
6
7 #include <string>
8 #include <vector>
9
10 #include "base/float_util.h"
11 #include "base/values.h"
12 #include "chrome/browser/extensions/extension_system.h"
13 #include "chrome/browser/profiles/profile.h"
14 #include "chrome/browser/speech/extension_api/tts_engine_extension_api.h"
15 #include "chrome/browser/speech/extension_api/tts_extension_api.h"
16 #include "chrome/browser/speech/tts_platform.h"
17 #include "chrome/common/extensions/api/speech/tts_engine_manifest_handler.h"
18 #include "extensions/common/extension.h"
19
20 namespace {
21 // A value to be used to indicate that there is no char index available.
22 const int kInvalidCharIndex = -1;
23
24 // Given a language/region code of the form 'fr-FR', returns just the basic
25 // language portion, e.g. 'fr'.
TrimLanguageCode(std::string lang)26 std::string TrimLanguageCode(std::string lang) {
27 if (lang.size() >= 5 && lang[2] == '-')
28 return lang.substr(0, 2);
29 else
30 return lang;
31 }
32
33 } // namespace
34
IsFinalTtsEventType(TtsEventType event_type)35 bool IsFinalTtsEventType(TtsEventType event_type) {
36 return (event_type == TTS_EVENT_END ||
37 event_type == TTS_EVENT_INTERRUPTED ||
38 event_type == TTS_EVENT_CANCELLED ||
39 event_type == TTS_EVENT_ERROR);
40 }
41
42 //
43 // UtteranceContinuousParameters
44 //
45
46
UtteranceContinuousParameters()47 UtteranceContinuousParameters::UtteranceContinuousParameters()
48 : rate(-1),
49 pitch(-1),
50 volume(-1) {}
51
52
53 //
54 // VoiceData
55 //
56
57
VoiceData()58 VoiceData::VoiceData()
59 : gender(TTS_GENDER_NONE),
60 remote(false),
61 native(false) {}
62
~VoiceData()63 VoiceData::~VoiceData() {}
64
65
66 //
67 // Utterance
68 //
69
70 // static
71 int Utterance::next_utterance_id_ = 0;
72
Utterance(Profile * profile)73 Utterance::Utterance(Profile* profile)
74 : profile_(profile),
75 id_(next_utterance_id_++),
76 src_id_(-1),
77 gender_(TTS_GENDER_NONE),
78 can_enqueue_(false),
79 char_index_(0),
80 finished_(false) {
81 options_.reset(new DictionaryValue());
82 }
83
~Utterance()84 Utterance::~Utterance() {
85 DCHECK(finished_);
86 }
87
OnTtsEvent(TtsEventType event_type,int char_index,const std::string & error_message)88 void Utterance::OnTtsEvent(TtsEventType event_type,
89 int char_index,
90 const std::string& error_message) {
91 if (char_index >= 0)
92 char_index_ = char_index;
93 if (IsFinalTtsEventType(event_type))
94 finished_ = true;
95
96 if (event_delegate_)
97 event_delegate_->OnTtsEvent(this, event_type, char_index, error_message);
98 if (finished_)
99 event_delegate_.reset();
100 }
101
Finish()102 void Utterance::Finish() {
103 finished_ = true;
104 }
105
set_options(const Value * options)106 void Utterance::set_options(const Value* options) {
107 options_.reset(options->DeepCopy());
108 }
109
110 //
111 // TtsController
112 //
113
114 // static
GetInstance()115 TtsController* TtsController::GetInstance() {
116 return Singleton<TtsController>::get();
117 }
118
TtsController()119 TtsController::TtsController()
120 : current_utterance_(NULL),
121 paused_(false),
122 platform_impl_(NULL) {
123 }
124
~TtsController()125 TtsController::~TtsController() {
126 if (current_utterance_) {
127 current_utterance_->Finish();
128 delete current_utterance_;
129 }
130
131 // Clear any queued utterances too.
132 ClearUtteranceQueue(false); // Don't sent events.
133 }
134
SpeakOrEnqueue(Utterance * utterance)135 void TtsController::SpeakOrEnqueue(Utterance* utterance) {
136 // If we're paused and we get an utterance that can't be queued,
137 // flush the queue but stay in the paused state.
138 if (paused_ && !utterance->can_enqueue()) {
139 Stop();
140 paused_ = true;
141 return;
142 }
143
144 if (paused_ || (IsSpeaking() && utterance->can_enqueue())) {
145 utterance_queue_.push(utterance);
146 } else {
147 Stop();
148 SpeakNow(utterance);
149 }
150 }
151
SpeakNow(Utterance * utterance)152 void TtsController::SpeakNow(Utterance* utterance) {
153 // Get all available voices and try to find a matching voice.
154 std::vector<VoiceData> voices;
155 GetVoices(utterance->profile(), &voices);
156 int index = GetMatchingVoice(utterance, voices);
157
158 // Select the matching voice, but if none was found, initialize an
159 // empty VoiceData with native = true, which will give the native
160 // speech synthesizer a chance to try to synthesize the utterance
161 // anyway.
162 VoiceData voice;
163 if (index >= 0 && index < static_cast<int>(voices.size()))
164 voice = voices[index];
165 else
166 voice.native = true;
167
168 if (!voice.native) {
169 #if !defined(OS_ANDROID)
170 DCHECK(!voice.extension_id.empty());
171 current_utterance_ = utterance;
172 utterance->set_extension_id(voice.extension_id);
173 ExtensionTtsEngineSpeak(utterance, voice);
174 bool sends_end_event =
175 voice.events.find(TTS_EVENT_END) != voice.events.end();
176 if (!sends_end_event) {
177 utterance->Finish();
178 delete utterance;
179 current_utterance_ = NULL;
180 SpeakNextUtterance();
181 }
182 #endif
183 } else {
184 // It's possible for certain platforms to send start events immediately
185 // during |speak|.
186 current_utterance_ = utterance;
187 GetPlatformImpl()->clear_error();
188 bool success = GetPlatformImpl()->Speak(
189 utterance->id(),
190 utterance->text(),
191 utterance->lang(),
192 voice,
193 utterance->continuous_parameters());
194 if (!success)
195 current_utterance_ = NULL;
196
197 // If the native voice wasn't able to process this speech, see if
198 // the browser has built-in TTS that isn't loaded yet.
199 if (!success &&
200 GetPlatformImpl()->LoadBuiltInTtsExtension(utterance->profile())) {
201 utterance_queue_.push(utterance);
202 return;
203 }
204
205 if (!success) {
206 utterance->OnTtsEvent(TTS_EVENT_ERROR, kInvalidCharIndex,
207 GetPlatformImpl()->error());
208 delete utterance;
209 return;
210 }
211 }
212 }
213
Stop()214 void TtsController::Stop() {
215 paused_ = false;
216 if (current_utterance_ && !current_utterance_->extension_id().empty()) {
217 #if !defined(OS_ANDROID)
218 ExtensionTtsEngineStop(current_utterance_);
219 #endif
220 } else {
221 GetPlatformImpl()->clear_error();
222 GetPlatformImpl()->StopSpeaking();
223 }
224
225 if (current_utterance_)
226 current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
227 std::string());
228 FinishCurrentUtterance();
229 ClearUtteranceQueue(true); // Send events.
230 }
231
Pause()232 void TtsController::Pause() {
233 paused_ = true;
234 if (current_utterance_ && !current_utterance_->extension_id().empty()) {
235 #if !defined(OS_ANDROID)
236 ExtensionTtsEnginePause(current_utterance_);
237 #endif
238 } else if (current_utterance_) {
239 GetPlatformImpl()->clear_error();
240 GetPlatformImpl()->Pause();
241 }
242 }
243
Resume()244 void TtsController::Resume() {
245 paused_ = false;
246 if (current_utterance_ && !current_utterance_->extension_id().empty()) {
247 #if !defined(OS_ANDROID)
248 ExtensionTtsEngineResume(current_utterance_);
249 #endif
250 } else if (current_utterance_) {
251 GetPlatformImpl()->clear_error();
252 GetPlatformImpl()->Resume();
253 } else {
254 SpeakNextUtterance();
255 }
256 }
257
OnTtsEvent(int utterance_id,TtsEventType event_type,int char_index,const std::string & error_message)258 void TtsController::OnTtsEvent(int utterance_id,
259 TtsEventType event_type,
260 int char_index,
261 const std::string& error_message) {
262 // We may sometimes receive completion callbacks "late", after we've
263 // already finished the utterance (for example because another utterance
264 // interrupted or we got a call to Stop). This is normal and we can
265 // safely just ignore these events.
266 if (!current_utterance_ || utterance_id != current_utterance_->id()) {
267 return;
268 }
269 current_utterance_->OnTtsEvent(event_type, char_index, error_message);
270 if (current_utterance_->finished()) {
271 FinishCurrentUtterance();
272 SpeakNextUtterance();
273 }
274 }
275
GetVoices(Profile * profile,std::vector<VoiceData> * out_voices)276 void TtsController::GetVoices(Profile* profile,
277 std::vector<VoiceData>* out_voices) {
278 #if !defined(OS_ANDROID)
279 if (profile)
280 GetExtensionVoices(profile, out_voices);
281 #endif
282
283 TtsPlatformImpl* platform_impl = GetPlatformImpl();
284 if (platform_impl && platform_impl->PlatformImplAvailable())
285 platform_impl->GetVoices(out_voices);
286 }
287
IsSpeaking()288 bool TtsController::IsSpeaking() {
289 return current_utterance_ != NULL || GetPlatformImpl()->IsSpeaking();
290 }
291
FinishCurrentUtterance()292 void TtsController::FinishCurrentUtterance() {
293 if (current_utterance_) {
294 if (!current_utterance_->finished())
295 current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
296 std::string());
297 delete current_utterance_;
298 current_utterance_ = NULL;
299 }
300 }
301
SpeakNextUtterance()302 void TtsController::SpeakNextUtterance() {
303 if (paused_)
304 return;
305
306 // Start speaking the next utterance in the queue. Keep trying in case
307 // one fails but there are still more in the queue to try.
308 while (!utterance_queue_.empty() && !current_utterance_) {
309 Utterance* utterance = utterance_queue_.front();
310 utterance_queue_.pop();
311 SpeakNow(utterance);
312 }
313 }
314
RetrySpeakingQueuedUtterances()315 void TtsController::RetrySpeakingQueuedUtterances() {
316 if (current_utterance_ == NULL && !utterance_queue_.empty())
317 SpeakNextUtterance();
318 }
319
ClearUtteranceQueue(bool send_events)320 void TtsController::ClearUtteranceQueue(bool send_events) {
321 while (!utterance_queue_.empty()) {
322 Utterance* utterance = utterance_queue_.front();
323 utterance_queue_.pop();
324 if (send_events)
325 utterance->OnTtsEvent(TTS_EVENT_CANCELLED, kInvalidCharIndex,
326 std::string());
327 else
328 utterance->Finish();
329 delete utterance;
330 }
331 }
332
SetPlatformImpl(TtsPlatformImpl * platform_impl)333 void TtsController::SetPlatformImpl(
334 TtsPlatformImpl* platform_impl) {
335 platform_impl_ = platform_impl;
336 }
337
QueueSize()338 int TtsController::QueueSize() {
339 return static_cast<int>(utterance_queue_.size());
340 }
341
GetPlatformImpl()342 TtsPlatformImpl* TtsController::GetPlatformImpl() {
343 if (!platform_impl_)
344 platform_impl_ = TtsPlatformImpl::GetInstance();
345 return platform_impl_;
346 }
347
GetMatchingVoice(const Utterance * utterance,std::vector<VoiceData> & voices)348 int TtsController::GetMatchingVoice(
349 const Utterance* utterance, std::vector<VoiceData>& voices) {
350 // Make two passes: the first time, do strict language matching
351 // ('fr-FR' does not match 'fr-CA'). The second time, do prefix
352 // language matching ('fr-FR' matches 'fr' and 'fr-CA')
353 for (int pass = 0; pass < 2; ++pass) {
354 for (size_t i = 0; i < voices.size(); ++i) {
355 const VoiceData& voice = voices[i];
356
357 if (!utterance->extension_id().empty() &&
358 utterance->extension_id() != voice.extension_id) {
359 continue;
360 }
361
362 if (!voice.name.empty() &&
363 !utterance->voice_name().empty() &&
364 voice.name != utterance->voice_name()) {
365 continue;
366 }
367 if (!voice.lang.empty() && !utterance->lang().empty()) {
368 std::string voice_lang = voice.lang;
369 std::string utterance_lang = utterance->lang();
370 if (pass == 1) {
371 voice_lang = TrimLanguageCode(voice_lang);
372 utterance_lang = TrimLanguageCode(utterance_lang);
373 }
374 if (voice_lang != utterance_lang) {
375 continue;
376 }
377 }
378 if (voice.gender != TTS_GENDER_NONE &&
379 utterance->gender() != TTS_GENDER_NONE &&
380 voice.gender != utterance->gender()) {
381 continue;
382 }
383
384 if (utterance->required_event_types().size() > 0) {
385 bool has_all_required_event_types = true;
386 for (std::set<TtsEventType>::const_iterator iter =
387 utterance->required_event_types().begin();
388 iter != utterance->required_event_types().end();
389 ++iter) {
390 if (voice.events.find(*iter) == voice.events.end()) {
391 has_all_required_event_types = false;
392 break;
393 }
394 }
395 if (!has_all_required_event_types)
396 continue;
397 }
398
399 return static_cast<int>(i);
400 }
401 }
402
403 return -1;
404 }
405
VoicesChanged()406 void TtsController::VoicesChanged() {
407 for (std::set<VoicesChangedDelegate*>::iterator iter =
408 voices_changed_delegates_.begin();
409 iter != voices_changed_delegates_.end(); ++iter) {
410 (*iter)->OnVoicesChanged();
411 }
412 }
413
AddVoicesChangedDelegate(VoicesChangedDelegate * delegate)414 void TtsController::AddVoicesChangedDelegate(VoicesChangedDelegate* delegate) {
415 voices_changed_delegates_.insert(delegate);
416 }
417
RemoveVoicesChangedDelegate(VoicesChangedDelegate * delegate)418 void TtsController::RemoveVoicesChangedDelegate(
419 VoicesChangedDelegate* delegate) {
420 voices_changed_delegates_.erase(delegate);
421 }
422