1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_ 6 #define CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_ 7 8 #include <queue> 9 #include <set> 10 #include <string> 11 #include <vector> 12 13 #include "base/memory/scoped_ptr.h" 14 #include "base/memory/singleton.h" 15 #include "base/memory/weak_ptr.h" 16 #include "url/gurl.h" 17 18 class Utterance; 19 class TtsPlatformImpl; 20 class Profile; 21 22 namespace base { 23 class Value; 24 } 25 26 // Events sent back from the TTS engine indicating the progress. 27 enum TtsEventType { 28 TTS_EVENT_START, 29 TTS_EVENT_END, 30 TTS_EVENT_WORD, 31 TTS_EVENT_SENTENCE, 32 TTS_EVENT_MARKER, 33 TTS_EVENT_INTERRUPTED, 34 TTS_EVENT_CANCELLED, 35 TTS_EVENT_ERROR, 36 TTS_EVENT_PAUSE, 37 TTS_EVENT_RESUME 38 }; 39 40 enum TtsGenderType { 41 TTS_GENDER_NONE, 42 TTS_GENDER_MALE, 43 TTS_GENDER_FEMALE 44 }; 45 46 // Returns true if this event type is one that indicates an utterance 47 // is finished and can be destroyed. 48 bool IsFinalTtsEventType(TtsEventType event_type); 49 50 // The continuous parameters that apply to a given utterance. 51 struct UtteranceContinuousParameters { 52 UtteranceContinuousParameters(); 53 54 double rate; 55 double pitch; 56 double volume; 57 }; 58 59 // Information about one voice. 60 struct VoiceData { 61 VoiceData(); 62 ~VoiceData(); 63 64 std::string name; 65 std::string lang; 66 TtsGenderType gender; 67 std::string extension_id; 68 std::set<TtsEventType> events; 69 70 // If true, the synthesis engine is a remote network resource. 71 // It may be higher latency and may incur bandwidth costs. 72 bool remote; 73 74 // If true, this is implemented by this platform's subclass of 75 // TtsPlatformImpl. If false, this is implemented by an extension. 76 bool native; 77 std::string native_voice_identifier; 78 }; 79 80 // Class that wants to receive events on utterances. 81 class UtteranceEventDelegate { 82 public: ~UtteranceEventDelegate()83 virtual ~UtteranceEventDelegate() {} 84 virtual void OnTtsEvent(Utterance* utterance, 85 TtsEventType event_type, 86 int char_index, 87 const std::string& error_message) = 0; 88 }; 89 90 // Class that wants to be notified when the set of 91 // voices has changed. 92 class VoicesChangedDelegate { 93 public: ~VoicesChangedDelegate()94 virtual ~VoicesChangedDelegate() {} 95 virtual void OnVoicesChanged() = 0; 96 }; 97 98 // One speech utterance. 99 class Utterance { 100 public: 101 // Construct an utterance given a profile and a completion task to call 102 // when the utterance is done speaking. Before speaking this utterance, 103 // its other parameters like text, rate, pitch, etc. should all be set. 104 explicit Utterance(Profile* profile); 105 ~Utterance(); 106 107 // Sends an event to the delegate. If the event type is TTS_EVENT_END 108 // or TTS_EVENT_ERROR, deletes the utterance. If |char_index| is -1, 109 // uses the last good value. 110 void OnTtsEvent(TtsEventType event_type, 111 int char_index, 112 const std::string& error_message); 113 114 // Finish an utterance without sending an event to the delegate. 115 void Finish(); 116 117 // Getters and setters for the text to speak and other speech options. set_text(const std::string & text)118 void set_text(const std::string& text) { text_ = text; } text()119 const std::string& text() const { return text_; } 120 121 void set_options(const base::Value* options); options()122 const base::Value* options() const { return options_.get(); } 123 set_src_extension_id(const std::string & src_extension_id)124 void set_src_extension_id(const std::string& src_extension_id) { 125 src_extension_id_ = src_extension_id; 126 } src_extension_id()127 const std::string& src_extension_id() { return src_extension_id_; } 128 set_src_id(int src_id)129 void set_src_id(int src_id) { src_id_ = src_id; } src_id()130 int src_id() { return src_id_; } 131 set_src_url(const GURL & src_url)132 void set_src_url(const GURL& src_url) { src_url_ = src_url; } src_url()133 const GURL& src_url() { return src_url_; } 134 set_voice_name(const std::string & voice_name)135 void set_voice_name(const std::string& voice_name) { 136 voice_name_ = voice_name; 137 } voice_name()138 const std::string& voice_name() const { return voice_name_; } 139 set_lang(const std::string & lang)140 void set_lang(const std::string& lang) { 141 lang_ = lang; 142 } lang()143 const std::string& lang() const { return lang_; } 144 set_gender(TtsGenderType gender)145 void set_gender(TtsGenderType gender) { 146 gender_ = gender; 147 } gender()148 TtsGenderType gender() const { return gender_; } 149 set_continuous_parameters(const UtteranceContinuousParameters & params)150 void set_continuous_parameters(const UtteranceContinuousParameters& params) { 151 continuous_parameters_ = params; 152 } continuous_parameters()153 const UtteranceContinuousParameters& continuous_parameters() { 154 return continuous_parameters_; 155 } 156 set_can_enqueue(bool can_enqueue)157 void set_can_enqueue(bool can_enqueue) { can_enqueue_ = can_enqueue; } can_enqueue()158 bool can_enqueue() const { return can_enqueue_; } 159 set_required_event_types(const std::set<TtsEventType> & types)160 void set_required_event_types(const std::set<TtsEventType>& types) { 161 required_event_types_ = types; 162 } required_event_types()163 const std::set<TtsEventType>& required_event_types() const { 164 return required_event_types_; 165 } 166 set_desired_event_types(const std::set<TtsEventType> & types)167 void set_desired_event_types(const std::set<TtsEventType>& types) { 168 desired_event_types_ = types; 169 } desired_event_types()170 const std::set<TtsEventType>& desired_event_types() const { 171 return desired_event_types_; 172 } 173 extension_id()174 const std::string& extension_id() const { return extension_id_; } set_extension_id(const std::string & extension_id)175 void set_extension_id(const std::string& extension_id) { 176 extension_id_ = extension_id; 177 } 178 event_delegate()179 UtteranceEventDelegate* event_delegate() const { 180 return event_delegate_.get(); 181 } set_event_delegate(base::WeakPtr<UtteranceEventDelegate> event_delegate)182 void set_event_delegate( 183 base::WeakPtr<UtteranceEventDelegate> event_delegate) { 184 event_delegate_ = event_delegate; 185 } 186 187 // Getters and setters for internal state. profile()188 Profile* profile() const { return profile_; } id()189 int id() const { return id_; } finished()190 bool finished() const { return finished_; } 191 192 private: 193 // The profile that initiated this utterance. 194 Profile* profile_; 195 196 // The extension ID of the extension providing TTS for this utterance, or 197 // empty if native TTS is being used. 198 std::string extension_id_; 199 200 // The unique ID of this utterance, used to associate callback functions 201 // with utterances. 202 int id_; 203 204 // The id of the next utterance, so we can associate requests with 205 // responses. 206 static int next_utterance_id_; 207 208 // The text to speak. 209 std::string text_; 210 211 // The full options arg passed to tts.speak, which may include fields 212 // other than the ones we explicitly parse, below. 213 scoped_ptr<base::Value> options_; 214 215 // The extension ID of the extension that called speak() and should 216 // receive events. 217 std::string src_extension_id_; 218 219 // The source extension's ID of this utterance, so that it can associate 220 // events with the appropriate callback. 221 int src_id_; 222 223 // The URL of the page where the source extension called speak. 224 GURL src_url_; 225 226 // The delegate to be called when an utterance event is fired. 227 base::WeakPtr<UtteranceEventDelegate> event_delegate_; 228 229 // The parsed options. 230 std::string voice_name_; 231 std::string lang_; 232 TtsGenderType gender_; 233 UtteranceContinuousParameters continuous_parameters_; 234 bool can_enqueue_; 235 std::set<TtsEventType> required_event_types_; 236 std::set<TtsEventType> desired_event_types_; 237 238 // The index of the current char being spoken. 239 int char_index_; 240 241 // True if this utterance received an event indicating it's done. 242 bool finished_; 243 }; 244 245 // Singleton class that manages text-to-speech for the TTS and TTS engine 246 // extension APIs, maintaining a queue of pending utterances and keeping 247 // track of all state. 248 class TtsController { 249 public: 250 // Get the single instance of this class. 251 static TtsController* GetInstance(); 252 253 // Returns true if we're currently speaking an utterance. 254 bool IsSpeaking(); 255 256 // Speak the given utterance. If the utterance's can_enqueue flag is true 257 // and another utterance is in progress, adds it to the end of the queue. 258 // Otherwise, interrupts any current utterance and speaks this one 259 // immediately. 260 void SpeakOrEnqueue(Utterance* utterance); 261 262 // Stop all utterances and flush the queue. Implies leaving pause mode 263 // as well. 264 void Stop(); 265 266 // Pause the speech queue. Some engines may support pausing in the middle 267 // of an utterance. 268 void Pause(); 269 270 // Resume speaking. 271 void Resume(); 272 273 // Handle events received from the speech engine. Events are forwarded to 274 // the callback function, and in addition, completion and error events 275 // trigger finishing the current utterance and starting the next one, if 276 // any. 277 void OnTtsEvent(int utterance_id, 278 TtsEventType event_type, 279 int char_index, 280 const std::string& error_message); 281 282 // Return a list of all available voices, including the native voice, 283 // if supported, and all voices registered by extensions. 284 void GetVoices(Profile* profile, std::vector<VoiceData>* out_voices); 285 286 // Called by TtsExtensionLoaderChromeOs::LoadTtsExtension when it 287 // finishes loading the built-in TTS component extension. 288 void RetrySpeakingQueuedUtterances(); 289 290 // Called by the extension system or platform implementation when the 291 // list of voices may have changed and should be re-queried. 292 void VoicesChanged(); 293 294 // Add a delegate that wants to be notified when the set of voices changes. 295 void AddVoicesChangedDelegate(VoicesChangedDelegate* delegate); 296 297 // Remove delegate that wants to be notified when the set of voices changes. 298 void RemoveVoicesChangedDelegate(VoicesChangedDelegate* delegate); 299 300 // For unit testing. 301 void SetPlatformImpl(TtsPlatformImpl* platform_impl); 302 int QueueSize(); 303 304 protected: 305 TtsController(); 306 virtual ~TtsController(); 307 308 private: 309 // Get the platform TTS implementation (or injected mock). 310 TtsPlatformImpl* GetPlatformImpl(); 311 312 // Start speaking the given utterance. Will either take ownership of 313 // |utterance| or delete it if there's an error. Returns true on success. 314 void SpeakNow(Utterance* utterance); 315 316 // Clear the utterance queue. If send_events is true, will send 317 // TTS_EVENT_CANCELLED events on each one. 318 void ClearUtteranceQueue(bool send_events); 319 320 // Finalize and delete the current utterance. 321 void FinishCurrentUtterance(); 322 323 // Start speaking the next utterance in the queue. 324 void SpeakNextUtterance(); 325 326 // Given an utterance and a vector of voices, return the 327 // index of the voice that best matches the utterance. 328 int GetMatchingVoice(const Utterance* utterance, 329 std::vector<VoiceData>& voices); 330 331 friend struct DefaultSingletonTraits<TtsController>; 332 333 // The current utterance being spoken. 334 Utterance* current_utterance_; 335 336 // Whether the queue is paused or not. 337 bool paused_; 338 339 // A queue of utterances to speak after the current one finishes. 340 std::queue<Utterance*> utterance_queue_; 341 342 // A set of delegates that want to be notified when the voices change. 343 std::set<VoicesChangedDelegate*> voices_changed_delegates_; 344 345 // A pointer to the platform implementation of text-to-speech, for 346 // dependency injection. 347 TtsPlatformImpl* platform_impl_; 348 349 DISALLOW_COPY_AND_ASSIGN(TtsController); 350 }; 351 352 #endif // CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_ 353