• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <math.h>
6 #include <sapi.h>
7 
8 #include "base/memory/singleton.h"
9 #include "base/strings/string_number_conversions.h"
10 #include "base/strings/utf_string_conversions.h"
11 #include "base/values.h"
12 #include "base/win/scoped_comptr.h"
13 #include "chrome/browser/speech/tts_controller.h"
14 #include "chrome/browser/speech/tts_platform.h"
15 
16 class TtsPlatformImplWin : public TtsPlatformImpl {
17  public:
PlatformImplAvailable()18   virtual bool PlatformImplAvailable() {
19     return true;
20   }
21 
22   virtual bool Speak(
23       int utterance_id,
24       const std::string& utterance,
25       const std::string& lang,
26       const VoiceData& voice,
27       const UtteranceContinuousParameters& params);
28 
29   virtual bool StopSpeaking();
30 
31   virtual void Pause();
32 
33   virtual void Resume();
34 
35   virtual bool IsSpeaking();
36 
37   virtual void GetVoices(std::vector<VoiceData>* out_voices) OVERRIDE;
38 
39   // Get the single instance of this class.
40   static TtsPlatformImplWin* GetInstance();
41 
42   static void __stdcall SpeechEventCallback(WPARAM w_param, LPARAM l_param);
43 
44  private:
45   TtsPlatformImplWin();
~TtsPlatformImplWin()46   virtual ~TtsPlatformImplWin() {}
47 
48   void OnSpeechEvent();
49 
50   base::win::ScopedComPtr<ISpVoice> speech_synthesizer_;
51 
52   // These apply to the current utterance only.
53   std::wstring utterance_;
54   int utterance_id_;
55   int prefix_len_;
56   ULONG stream_number_;
57   int char_position_;
58   bool paused_;
59 
60   friend struct DefaultSingletonTraits<TtsPlatformImplWin>;
61 
62   DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplWin);
63 };
64 
65 // static
GetInstance()66 TtsPlatformImpl* TtsPlatformImpl::GetInstance() {
67   return TtsPlatformImplWin::GetInstance();
68 }
69 
Speak(int utterance_id,const std::string & src_utterance,const std::string & lang,const VoiceData & voice,const UtteranceContinuousParameters & params)70 bool TtsPlatformImplWin::Speak(
71     int utterance_id,
72     const std::string& src_utterance,
73     const std::string& lang,
74     const VoiceData& voice,
75     const UtteranceContinuousParameters& params) {
76   std::wstring prefix;
77   std::wstring suffix;
78 
79   if (!speech_synthesizer_.get())
80     return false;
81 
82   // TODO(dmazzoni): support languages other than the default: crbug.com/88059
83 
84   if (params.rate >= 0.0) {
85     // Map our multiplicative range of 0.1x to 10.0x onto Microsoft's
86     // linear range of -10 to 10:
87     //   0.1 -> -10
88     //   1.0 -> 0
89     //  10.0 -> 10
90     speech_synthesizer_->SetRate(static_cast<int32>(10 * log10(params.rate)));
91   }
92 
93   if (params.pitch >= 0.0) {
94     // The TTS api allows a range of -10 to 10 for speech pitch.
95     // TODO(dtseng): cleanup if we ever use any other properties that
96     // require xml.
97     std::wstring pitch_value =
98         base::IntToString16(static_cast<int>(params.pitch * 10 - 10));
99     prefix = L"<pitch absmiddle=\"" + pitch_value + L"\">";
100     suffix = L"</pitch>";
101   }
102 
103   if (params.volume >= 0.0) {
104     // The TTS api allows a range of 0 to 100 for speech volume.
105     speech_synthesizer_->SetVolume(static_cast<uint16>(params.volume * 100));
106   }
107 
108   // TODO(dmazzoni): convert SSML to SAPI xml. http://crbug.com/88072
109 
110   utterance_ = base::UTF8ToWide(src_utterance);
111   utterance_id_ = utterance_id;
112   char_position_ = 0;
113   std::wstring merged_utterance = prefix + utterance_ + suffix;
114   prefix_len_ = prefix.size();
115 
116   HRESULT result = speech_synthesizer_->Speak(
117       merged_utterance.c_str(),
118       SPF_ASYNC,
119       &stream_number_);
120   return (result == S_OK);
121 }
122 
StopSpeaking()123 bool TtsPlatformImplWin::StopSpeaking() {
124   if (speech_synthesizer_.get()) {
125     // Clear the stream number so that any further events relating to this
126     // utterance are ignored.
127     stream_number_ = 0;
128 
129     if (IsSpeaking()) {
130       // Stop speech by speaking the empty string with the purge flag.
131       speech_synthesizer_->Speak(L"", SPF_ASYNC | SPF_PURGEBEFORESPEAK, NULL);
132     }
133     if (paused_) {
134       speech_synthesizer_->Resume();
135       paused_ = false;
136     }
137   }
138   return true;
139 }
140 
Pause()141 void TtsPlatformImplWin::Pause() {
142   if (speech_synthesizer_.get() && utterance_id_ && !paused_) {
143     speech_synthesizer_->Pause();
144     paused_ = true;
145     TtsController::GetInstance()->OnTtsEvent(
146         utterance_id_, TTS_EVENT_PAUSE, char_position_, "");
147   }
148 }
149 
Resume()150 void TtsPlatformImplWin::Resume() {
151   if (speech_synthesizer_.get() && utterance_id_ && paused_) {
152     speech_synthesizer_->Resume();
153     paused_ = false;
154     TtsController::GetInstance()->OnTtsEvent(
155         utterance_id_, TTS_EVENT_RESUME, char_position_, "");
156   }
157 }
158 
IsSpeaking()159 bool TtsPlatformImplWin::IsSpeaking() {
160   if (speech_synthesizer_.get()) {
161     SPVOICESTATUS status;
162     HRESULT result = speech_synthesizer_->GetStatus(&status, NULL);
163     if (result == S_OK) {
164       if (status.dwRunningState == 0 ||  // 0 == waiting to speak
165           status.dwRunningState == SPRS_IS_SPEAKING) {
166         return true;
167       }
168     }
169   }
170   return false;
171 }
172 
GetVoices(std::vector<VoiceData> * out_voices)173 void TtsPlatformImplWin::GetVoices(
174     std::vector<VoiceData>* out_voices) {
175   // TODO: get all voices, not just default voice.
176   // http://crbug.com/88059
177   out_voices->push_back(VoiceData());
178   VoiceData& voice = out_voices->back();
179   voice.native = true;
180   voice.name = "native";
181   voice.events.insert(TTS_EVENT_START);
182   voice.events.insert(TTS_EVENT_END);
183   voice.events.insert(TTS_EVENT_MARKER);
184   voice.events.insert(TTS_EVENT_WORD);
185   voice.events.insert(TTS_EVENT_SENTENCE);
186   voice.events.insert(TTS_EVENT_PAUSE);
187   voice.events.insert(TTS_EVENT_RESUME);
188 }
189 
OnSpeechEvent()190 void TtsPlatformImplWin::OnSpeechEvent() {
191   TtsController* controller = TtsController::GetInstance();
192   SPEVENT event;
193   while (S_OK == speech_synthesizer_->GetEvents(1, &event, NULL)) {
194     if (event.ulStreamNum != stream_number_)
195       continue;
196 
197     switch (event.eEventId) {
198     case SPEI_START_INPUT_STREAM:
199       controller->OnTtsEvent(
200           utterance_id_, TTS_EVENT_START, 0, std::string());
201       break;
202     case SPEI_END_INPUT_STREAM:
203       char_position_ = utterance_.size();
204       controller->OnTtsEvent(
205           utterance_id_, TTS_EVENT_END, char_position_, std::string());
206       break;
207     case SPEI_TTS_BOOKMARK:
208       controller->OnTtsEvent(
209           utterance_id_, TTS_EVENT_MARKER, char_position_, std::string());
210       break;
211     case SPEI_WORD_BOUNDARY:
212       char_position_ = static_cast<ULONG>(event.lParam) - prefix_len_;
213       controller->OnTtsEvent(
214           utterance_id_, TTS_EVENT_WORD, char_position_,
215           std::string());
216       break;
217     case SPEI_SENTENCE_BOUNDARY:
218       char_position_ = static_cast<ULONG>(event.lParam) - prefix_len_;
219       controller->OnTtsEvent(
220           utterance_id_, TTS_EVENT_SENTENCE, char_position_,
221           std::string());
222       break;
223     }
224   }
225 }
226 
TtsPlatformImplWin()227 TtsPlatformImplWin::TtsPlatformImplWin()
228   : utterance_id_(0),
229     prefix_len_(0),
230     stream_number_(0),
231     char_position_(0),
232     paused_(false) {
233   speech_synthesizer_.CreateInstance(CLSID_SpVoice);
234   if (speech_synthesizer_.get()) {
235     ULONGLONG event_mask =
236         SPFEI(SPEI_START_INPUT_STREAM) |
237         SPFEI(SPEI_TTS_BOOKMARK) |
238         SPFEI(SPEI_WORD_BOUNDARY) |
239         SPFEI(SPEI_SENTENCE_BOUNDARY) |
240         SPFEI(SPEI_END_INPUT_STREAM);
241     speech_synthesizer_->SetInterest(event_mask, event_mask);
242     speech_synthesizer_->SetNotifyCallbackFunction(
243         TtsPlatformImplWin::SpeechEventCallback, 0, 0);
244   }
245 }
246 
247 // static
GetInstance()248 TtsPlatformImplWin* TtsPlatformImplWin::GetInstance() {
249   return Singleton<TtsPlatformImplWin,
250                    LeakySingletonTraits<TtsPlatformImplWin> >::get();
251 }
252 
253 // static
SpeechEventCallback(WPARAM w_param,LPARAM l_param)254 void TtsPlatformImplWin::SpeechEventCallback(
255     WPARAM w_param, LPARAM l_param) {
256   GetInstance()->OnSpeechEvent();
257 }
258