• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_
6 #define CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_
7 
8 #include <queue>
9 #include <set>
10 #include <string>
11 #include <vector>
12 
13 #include "base/memory/scoped_ptr.h"
14 #include "base/memory/singleton.h"
15 #include "base/memory/weak_ptr.h"
16 #include "url/gurl.h"
17 
18 class Utterance;
19 class TtsPlatformImpl;
20 class Profile;
21 
22 namespace base {
23 class Value;
24 }
25 
26 // Events sent back from the TTS engine indicating the progress.
27 enum TtsEventType {
28   TTS_EVENT_START,
29   TTS_EVENT_END,
30   TTS_EVENT_WORD,
31   TTS_EVENT_SENTENCE,
32   TTS_EVENT_MARKER,
33   TTS_EVENT_INTERRUPTED,
34   TTS_EVENT_CANCELLED,
35   TTS_EVENT_ERROR,
36   TTS_EVENT_PAUSE,
37   TTS_EVENT_RESUME
38 };
39 
40 enum TtsGenderType {
41   TTS_GENDER_NONE,
42   TTS_GENDER_MALE,
43   TTS_GENDER_FEMALE
44 };
45 
46 // Returns true if this event type is one that indicates an utterance
47 // is finished and can be destroyed.
48 bool IsFinalTtsEventType(TtsEventType event_type);
49 
50 // The continuous parameters that apply to a given utterance.
51 struct UtteranceContinuousParameters {
52   UtteranceContinuousParameters();
53 
54   double rate;
55   double pitch;
56   double volume;
57 };
58 
59 // Information about one voice.
60 struct VoiceData {
61   VoiceData();
62   ~VoiceData();
63 
64   std::string name;
65   std::string lang;
66   TtsGenderType gender;
67   std::string extension_id;
68   std::set<TtsEventType> events;
69 
70   // If true, the synthesis engine is a remote network resource.
71   // It may be higher latency and may incur bandwidth costs.
72   bool remote;
73 
74   // If true, this is implemented by this platform's subclass of
75   // TtsPlatformImpl. If false, this is implemented by an extension.
76   bool native;
77   std::string native_voice_identifier;
78 };
79 
80 // Class that wants to receive events on utterances.
81 class UtteranceEventDelegate {
82  public:
~UtteranceEventDelegate()83   virtual ~UtteranceEventDelegate() {}
84   virtual void OnTtsEvent(Utterance* utterance,
85                           TtsEventType event_type,
86                           int char_index,
87                           const std::string& error_message) = 0;
88 };
89 
90 // Class that wants to be notified when the set of
91 // voices has changed.
92 class VoicesChangedDelegate {
93  public:
~VoicesChangedDelegate()94   virtual ~VoicesChangedDelegate() {}
95   virtual void OnVoicesChanged() = 0;
96 };
97 
98 // One speech utterance.
99 class Utterance {
100  public:
101   // Construct an utterance given a profile and a completion task to call
102   // when the utterance is done speaking. Before speaking this utterance,
103   // its other parameters like text, rate, pitch, etc. should all be set.
104   explicit Utterance(Profile* profile);
105   ~Utterance();
106 
107   // Sends an event to the delegate. If the event type is TTS_EVENT_END
108   // or TTS_EVENT_ERROR, deletes the utterance. If |char_index| is -1,
109   // uses the last good value.
110   void OnTtsEvent(TtsEventType event_type,
111                   int char_index,
112                   const std::string& error_message);
113 
114   // Finish an utterance without sending an event to the delegate.
115   void Finish();
116 
117   // Getters and setters for the text to speak and other speech options.
set_text(const std::string & text)118   void set_text(const std::string& text) { text_ = text; }
text()119   const std::string& text() const { return text_; }
120 
121   void set_options(const base::Value* options);
options()122   const base::Value* options() const { return options_.get(); }
123 
set_src_extension_id(const std::string & src_extension_id)124   void set_src_extension_id(const std::string& src_extension_id) {
125     src_extension_id_ = src_extension_id;
126   }
src_extension_id()127   const std::string& src_extension_id() { return src_extension_id_; }
128 
set_src_id(int src_id)129   void set_src_id(int src_id) { src_id_ = src_id; }
src_id()130   int src_id() { return src_id_; }
131 
set_src_url(const GURL & src_url)132   void set_src_url(const GURL& src_url) { src_url_ = src_url; }
src_url()133   const GURL& src_url() { return src_url_; }
134 
set_voice_name(const std::string & voice_name)135   void set_voice_name(const std::string& voice_name) {
136     voice_name_ = voice_name;
137   }
voice_name()138   const std::string& voice_name() const { return voice_name_; }
139 
set_lang(const std::string & lang)140   void set_lang(const std::string& lang) {
141     lang_ = lang;
142   }
lang()143   const std::string& lang() const { return lang_; }
144 
set_gender(TtsGenderType gender)145   void set_gender(TtsGenderType gender) {
146     gender_ = gender;
147   }
gender()148   TtsGenderType gender() const { return gender_; }
149 
set_continuous_parameters(const UtteranceContinuousParameters & params)150   void set_continuous_parameters(const UtteranceContinuousParameters& params) {
151     continuous_parameters_ = params;
152   }
continuous_parameters()153   const UtteranceContinuousParameters& continuous_parameters() {
154     return continuous_parameters_;
155   }
156 
set_can_enqueue(bool can_enqueue)157   void set_can_enqueue(bool can_enqueue) { can_enqueue_ = can_enqueue; }
can_enqueue()158   bool can_enqueue() const { return can_enqueue_; }
159 
set_required_event_types(const std::set<TtsEventType> & types)160   void set_required_event_types(const std::set<TtsEventType>& types) {
161     required_event_types_ = types;
162   }
required_event_types()163   const std::set<TtsEventType>& required_event_types() const {
164     return required_event_types_;
165   }
166 
set_desired_event_types(const std::set<TtsEventType> & types)167   void set_desired_event_types(const std::set<TtsEventType>& types) {
168     desired_event_types_ = types;
169   }
desired_event_types()170   const std::set<TtsEventType>& desired_event_types() const {
171     return desired_event_types_;
172   }
173 
extension_id()174   const std::string& extension_id() const { return extension_id_; }
set_extension_id(const std::string & extension_id)175   void set_extension_id(const std::string& extension_id) {
176     extension_id_ = extension_id;
177   }
178 
event_delegate()179   UtteranceEventDelegate* event_delegate() const {
180     return event_delegate_.get();
181   }
set_event_delegate(base::WeakPtr<UtteranceEventDelegate> event_delegate)182   void set_event_delegate(
183       base::WeakPtr<UtteranceEventDelegate> event_delegate) {
184     event_delegate_ = event_delegate;
185   }
186 
187   // Getters and setters for internal state.
profile()188   Profile* profile() const { return profile_; }
id()189   int id() const { return id_; }
finished()190   bool finished() const { return finished_; }
191 
192  private:
193   // The profile that initiated this utterance.
194   Profile* profile_;
195 
196   // The extension ID of the extension providing TTS for this utterance, or
197   // empty if native TTS is being used.
198   std::string extension_id_;
199 
200   // The unique ID of this utterance, used to associate callback functions
201   // with utterances.
202   int id_;
203 
204   // The id of the next utterance, so we can associate requests with
205   // responses.
206   static int next_utterance_id_;
207 
208   // The text to speak.
209   std::string text_;
210 
211   // The full options arg passed to tts.speak, which may include fields
212   // other than the ones we explicitly parse, below.
213   scoped_ptr<base::Value> options_;
214 
215   // The extension ID of the extension that called speak() and should
216   // receive events.
217   std::string src_extension_id_;
218 
219   // The source extension's ID of this utterance, so that it can associate
220   // events with the appropriate callback.
221   int src_id_;
222 
223   // The URL of the page where the source extension called speak.
224   GURL src_url_;
225 
226   // The delegate to be called when an utterance event is fired.
227   base::WeakPtr<UtteranceEventDelegate> event_delegate_;
228 
229   // The parsed options.
230   std::string voice_name_;
231   std::string lang_;
232   TtsGenderType gender_;
233   UtteranceContinuousParameters continuous_parameters_;
234   bool can_enqueue_;
235   std::set<TtsEventType> required_event_types_;
236   std::set<TtsEventType> desired_event_types_;
237 
238   // The index of the current char being spoken.
239   int char_index_;
240 
241   // True if this utterance received an event indicating it's done.
242   bool finished_;
243 };
244 
245 // Singleton class that manages text-to-speech for the TTS and TTS engine
246 // extension APIs, maintaining a queue of pending utterances and keeping
247 // track of all state.
248 class TtsController {
249  public:
250   // Get the single instance of this class.
251   static TtsController* GetInstance();
252 
253   // Returns true if we're currently speaking an utterance.
254   bool IsSpeaking();
255 
256   // Speak the given utterance. If the utterance's can_enqueue flag is true
257   // and another utterance is in progress, adds it to the end of the queue.
258   // Otherwise, interrupts any current utterance and speaks this one
259   // immediately.
260   void SpeakOrEnqueue(Utterance* utterance);
261 
262   // Stop all utterances and flush the queue. Implies leaving pause mode
263   // as well.
264   void Stop();
265 
266   // Pause the speech queue. Some engines may support pausing in the middle
267   // of an utterance.
268   void Pause();
269 
270   // Resume speaking.
271   void Resume();
272 
273   // Handle events received from the speech engine. Events are forwarded to
274   // the callback function, and in addition, completion and error events
275   // trigger finishing the current utterance and starting the next one, if
276   // any.
277   void OnTtsEvent(int utterance_id,
278                   TtsEventType event_type,
279                   int char_index,
280                   const std::string& error_message);
281 
282   // Return a list of all available voices, including the native voice,
283   // if supported, and all voices registered by extensions.
284   void GetVoices(Profile* profile, std::vector<VoiceData>* out_voices);
285 
286   // Called by TtsExtensionLoaderChromeOs::LoadTtsExtension when it
287   // finishes loading the built-in TTS component extension.
288   void RetrySpeakingQueuedUtterances();
289 
290   // Called by the extension system or platform implementation when the
291   // list of voices may have changed and should be re-queried.
292   void VoicesChanged();
293 
294   // Add a delegate that wants to be notified when the set of voices changes.
295   void AddVoicesChangedDelegate(VoicesChangedDelegate* delegate);
296 
297   // Remove delegate that wants to be notified when the set of voices changes.
298   void RemoveVoicesChangedDelegate(VoicesChangedDelegate* delegate);
299 
300   // For unit testing.
301   void SetPlatformImpl(TtsPlatformImpl* platform_impl);
302   int QueueSize();
303 
304  protected:
305   TtsController();
306   virtual ~TtsController();
307 
308  private:
309   // Get the platform TTS implementation (or injected mock).
310   TtsPlatformImpl* GetPlatformImpl();
311 
312   // Start speaking the given utterance. Will either take ownership of
313   // |utterance| or delete it if there's an error. Returns true on success.
314   void SpeakNow(Utterance* utterance);
315 
316   // Clear the utterance queue. If send_events is true, will send
317   // TTS_EVENT_CANCELLED events on each one.
318   void ClearUtteranceQueue(bool send_events);
319 
320   // Finalize and delete the current utterance.
321   void FinishCurrentUtterance();
322 
323   // Start speaking the next utterance in the queue.
324   void SpeakNextUtterance();
325 
326   // Given an utterance and a vector of voices, return the
327   // index of the voice that best matches the utterance.
328   int GetMatchingVoice(const Utterance* utterance,
329                        std::vector<VoiceData>& voices);
330 
331   friend struct DefaultSingletonTraits<TtsController>;
332 
333   // The current utterance being spoken.
334   Utterance* current_utterance_;
335 
336   // Whether the queue is paused or not.
337   bool paused_;
338 
339   // A queue of utterances to speak after the current one finishes.
340   std::queue<Utterance*> utterance_queue_;
341 
342   // A set of delegates that want to be notified when the voices change.
343   std::set<VoicesChangedDelegate*> voices_changed_delegates_;
344 
345   // A pointer to the platform implementation of text-to-speech, for
346   // dependency injection.
347   TtsPlatformImpl* platform_impl_;
348 
349   DISALLOW_COPY_AND_ASSIGN(TtsController);
350 };
351 
352 #endif  // CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_
353