Implement text-to-speech support on Android, iOS, HTML5, Linux, macOS and Windows.

Implement TextServer word break method.
This commit is contained in:
bruvzg 2021-11-04 14:33:37 +02:00
parent 3e1b824c05
commit 6ab672d1ef
54 changed files with 3962 additions and 2 deletions

View file

@ -274,6 +274,90 @@ const char *DisplayServerJavaScript::godot2dom_cursor(DisplayServer::CursorShape
}
}
bool DisplayServerJavaScript::tts_is_speaking() const {
return godot_js_tts_is_speaking();
}
bool DisplayServerJavaScript::tts_is_paused() const {
return godot_js_tts_is_paused();
}
void DisplayServerJavaScript::update_voices_callback(int p_size, const char **p_voice) {
get_singleton()->voices.clear();
for (int i = 0; i < p_size; i++) {
Vector<String> tokens = String::utf8(p_voice[i]).split(";", true, 2);
if (tokens.size() == 2) {
Dictionary voice_d;
voice_d["name"] = tokens[1];
voice_d["id"] = tokens[1];
voice_d["language"] = tokens[0];
get_singleton()->voices.push_back(voice_d);
}
}
}
Array DisplayServerJavaScript::tts_get_voices() const {
godot_js_tts_get_voices(update_voices_callback);
return voices;
}
void DisplayServerJavaScript::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) {
if (p_interrupt) {
tts_stop();
}
if (p_text.is_empty()) {
tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, p_utterance_id);
return;
}
CharString string = p_text.utf8();
utterance_ids[p_utterance_id] = string;
godot_js_tts_speak(string.get_data(), p_voice.utf8().get_data(), CLAMP(p_volume, 0, 100), CLAMP(p_pitch, 0.f, 2.f), CLAMP(p_rate, 0.1f, 10.f), p_utterance_id, DisplayServerJavaScript::_js_utterance_callback);
}
void DisplayServerJavaScript::tts_pause() {
godot_js_tts_pause();
}
void DisplayServerJavaScript::tts_resume() {
godot_js_tts_resume();
}
void DisplayServerJavaScript::tts_stop() {
for (Map<int, CharString>::Element *E = utterance_ids.front(); E; E = E->next()) {
tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, E->key());
}
utterance_ids.clear();
godot_js_tts_stop();
}
void DisplayServerJavaScript::_js_utterance_callback(int p_event, int p_id, int p_pos) {
DisplayServerJavaScript *ds = (DisplayServerJavaScript *)DisplayServer::get_singleton();
if (ds->utterance_ids.has(p_id)) {
int pos = 0;
if ((TTSUtteranceEvent)p_event == DisplayServer::TTS_UTTERANCE_BOUNDARY) {
// Convert position from UTF-8 to UTF-32.
const CharString &string = ds->utterance_ids[p_id];
for (int i = 0; i < MIN(p_pos, string.length()); i++) {
uint8_t c = string[i];
if ((c & 0xe0) == 0xc0) {
i += 1;
} else if ((c & 0xf0) == 0xe0) {
i += 2;
} else if ((c & 0xf8) == 0xf0) {
i += 3;
}
pos++;
}
} else if ((TTSUtteranceEvent)p_event != DisplayServer::TTS_UTTERANCE_STARTED) {
ds->utterance_ids.erase(p_id);
}
ds->tts_post_utterance_event((TTSUtteranceEvent)p_event, p_id, pos);
}
}
void DisplayServerJavaScript::cursor_set_shape(CursorShape p_shape) {
ERR_FAIL_INDEX(p_shape, CURSOR_MAX);
if (cursor_shape == p_shape) {
@ -755,6 +839,8 @@ bool DisplayServerJavaScript::has_feature(Feature p_feature) const {
//case FEATURE_ORIENTATION:
case FEATURE_VIRTUAL_KEYBOARD:
return godot_js_display_vk_available() != 0;
case FEATURE_TEXT_TO_SPEECH:
return godot_js_display_tts_available() != 0;
default:
return false;
}

View file

@ -55,6 +55,8 @@ private:
EMSCRIPTEN_WEBGL_CONTEXT_HANDLE webgl_ctx = 0;
#endif
Map<int, CharString> utterance_ids;
WindowMode window_mode = WINDOW_MODE_WINDOWED;
ObjectID window_attached_instance_id = {};
@ -66,6 +68,8 @@ private:
String clipboard;
Point2 touches[32];
Array voices;
char canvas_id[256] = { 0 };
bool cursor_inside_canvas = true;
CursorShape cursor_shape = CURSOR_ARROW;
@ -89,6 +93,7 @@ private:
static void vk_input_text_callback(const char *p_text, int p_cursor);
static void gamepad_callback(int p_index, int p_connected, const char *p_id, const char *p_guid);
void process_joypads();
static void _js_utterance_callback(int p_event, int p_id, int p_pos);
static Vector<String> get_rendering_drivers_func();
static DisplayServer *create_func(const String &p_rendering_driver, WindowMode p_window_mode, VSyncMode p_vsync_mode, uint32_t p_flags, const Vector2i &p_resolution, Error &r_error);
@ -97,6 +102,7 @@ private:
static void request_quit_callback();
static void window_blur_callback();
static void update_voices_callback(int p_size, const char **p_voice);
static void update_clipboard_callback(const char *p_text);
static void send_window_event_callback(int p_notification);
static void drop_files_js_callback(char **p_filev, int p_filec);
@ -115,6 +121,16 @@ public:
virtual bool has_feature(Feature p_feature) const override;
virtual String get_name() const override;
// tts
virtual bool tts_is_speaking() const override;
virtual bool tts_is_paused() const override;
virtual Array tts_get_voices() const override;
virtual void tts_speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false) override;
virtual void tts_pause() override;
virtual void tts_resume() override;
virtual void tts_stop() override;
// cursor
virtual void cursor_set_shape(CursorShape p_shape) override;
virtual CursorShape cursor_get_shape() const override;

View file

@ -67,6 +67,15 @@ extern int godot_js_input_gamepad_sample_get(int p_idx, float r_btns[16], int32_
extern void godot_js_input_paste_cb(void (*p_callback)(const char *p_text));
extern void godot_js_input_drop_files_cb(void (*p_callback)(char **p_filev, int p_filec));
// TTS
extern int godot_js_tts_is_speaking();
extern int godot_js_tts_is_paused();
extern int godot_js_tts_get_voices(void (*p_callback)(int p_size, const char **p_voices));
extern void godot_js_tts_speak(const char *p_text, const char *p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, void (*p_callback)(int p_event, int p_id, int p_pos));
extern void godot_js_tts_pause();
extern void godot_js_tts_resume();
extern void godot_js_tts_stop();
// Display
extern int godot_js_display_screen_dpi_get();
extern double godot_js_display_pixel_ratio_get();
@ -109,6 +118,7 @@ extern void godot_js_display_notification_cb(void (*p_callback)(int p_notificati
// Display Virtual Keyboard
extern int godot_js_display_vk_available();
extern int godot_js_display_tts_available();
extern void godot_js_display_vk_cb(void (*p_input)(const char *p_text, int p_cursor));
extern void godot_js_display_vk_show(const char *p_text, int p_multiline, int p_start, int p_end);
extern void godot_js_display_vk_hide();

View file

@ -330,6 +330,91 @@ const GodotDisplay = {
return 0;
},
godot_js_tts_is_speaking__sig: 'i',
godot_js_tts_is_speaking: function () {
return window.speechSynthesis.speaking;
},
godot_js_tts_is_paused__sig: 'i',
godot_js_tts_is_paused: function () {
return window.speechSynthesis.paused;
},
godot_js_tts_get_voices__sig: 'vi',
godot_js_tts_get_voices: function (p_callback) {
const func = GodotRuntime.get_func(p_callback);
try {
const arr = [];
const voices = window.speechSynthesis.getVoices();
for (let i = 0; i < voices.length; i++) {
arr.push(`${voices[i].lang};${voices[i].name}`);
}
const c_ptr = GodotRuntime.allocStringArray(arr);
func(arr.length, c_ptr);
GodotRuntime.freeStringArray(c_ptr, arr.length);
} catch (e) {
// Fail graciously.
}
},
godot_js_tts_speak__sig: 'viiiffii',
godot_js_tts_speak: function (p_text, p_voice, p_volume, p_pitch, p_rate, p_utterance_id, p_callback) {
const func = GodotRuntime.get_func(p_callback);
function listener_end(evt) {
evt.currentTarget.cb(1 /*TTS_UTTERANCE_ENDED*/, evt.currentTarget.id, 0);
}
function listener_start(evt) {
evt.currentTarget.cb(0 /*TTS_UTTERANCE_STARTED*/, evt.currentTarget.id, 0);
}
function listener_error(evt) {
evt.currentTarget.cb(2 /*TTS_UTTERANCE_CANCELED*/, evt.currentTarget.id, 0);
}
function listener_bound(evt) {
evt.currentTarget.cb(3 /*TTS_UTTERANCE_BOUNDARY*/, evt.currentTarget.id, evt.charIndex);
}
const utterance = new SpeechSynthesisUtterance(GodotRuntime.parseString(p_text));
utterance.rate = p_rate;
utterance.pitch = p_pitch;
utterance.volume = p_volume / 100.0;
utterance.addEventListener('end', listener_end);
utterance.addEventListener('start', listener_start);
utterance.addEventListener('error', listener_error);
utterance.addEventListener('boundary', listener_bound);
utterance.id = p_utterance_id;
utterance.cb = func;
const voice = GodotRuntime.parseString(p_voice);
const voices = window.speechSynthesis.getVoices();
for (let i = 0; i < voices.length; i++) {
if (voices[i].name === voice) {
utterance.voice = voices[i];
break;
}
}
window.speechSynthesis.resume();
window.speechSynthesis.speak(utterance);
},
godot_js_tts_pause__sig: 'v',
godot_js_tts_pause: function () {
window.speechSynthesis.pause();
},
godot_js_tts_resume__sig: 'v',
godot_js_tts_resume: function () {
window.speechSynthesis.resume();
},
godot_js_tts_stop__sig: 'v',
godot_js_tts_stop: function () {
window.speechSynthesis.cancel();
window.speechSynthesis.resume();
},
godot_js_display_alert__sig: 'vi',
godot_js_display_alert: function (p_text) {
window.alert(GodotRuntime.parseString(p_text)); // eslint-disable-line no-alert
@ -625,6 +710,11 @@ const GodotDisplay = {
return GodotDisplayVK.available();
},
godot_js_display_tts_available__sig: 'i',
godot_js_display_tts_available: function () {
return 'speechSynthesis' in window;
},
godot_js_display_vk_cb__sig: 'vi',
godot_js_display_vk_cb: function (p_input_cb) {
const input_cb = GodotRuntime.get_func(p_input_cb);