Add text to speech module

This commit is contained in:
Heiner Lohaus 2024-04-09 05:54:47 +02:00
parent 24345bc07b
commit d4a92bb8df
7 changed files with 114 additions and 15 deletions

View File

@ -21,6 +21,7 @@ from .ChatgptFree import ChatgptFree
from .ChatgptNext import ChatgptNext
from .ChatgptX import ChatgptX
from .DeepInfra import DeepInfra
from .DuckDuckGo import DuckDuckGo
from .FlowGpt import FlowGpt
from .FreeChatgpt import FreeChatgpt
from .FreeGpt import FreeGpt

View File

@ -37,6 +37,7 @@
import llamaTokenizer from "llama-tokenizer-js"
</script>
<script src="https://unpkg.com/gpt-tokenizer/dist/cl100k_base.js" async></script>
<script src="/static/js/text_to_speech/index.js" async></script>
<script>
const user_image = '<img src="/static/img/user.png" alt="your avatar">';
const gpt_image = '<img src="/static/img/gpt.png" alt="your avatar">';

View File

@ -315,6 +315,7 @@ body {
display: flex;
flex-direction: column;
gap: 10px;
flex-wrap: wrap;
}
.message .content,
@ -332,6 +333,10 @@ body {
max-width: 400px;
}
.message .content .audio{
display: flex;
}
.message .user i {
position: absolute;
bottom: -6px;
@ -357,7 +362,8 @@ body {
cursor: pointer;
}
.message .count .fa-clipboard {
.message .count .fa-clipboard,
.message .count .fa-volume-high {
z-index: 1000;
cursor: pointer;
}
@ -374,6 +380,10 @@ body {
color: var(--accent);
}
.message .count .fa-volume-high.active {
color: var(--accent);
}
.message .assistant:hover .fa-xmark,
.message .user:hover .fa-xmark {
display: block;
@ -1136,4 +1146,14 @@ a:-webkit-any-link {
.hidden {
display: none;
}
.blink {
animation: blinker 1s step-start infinite;
}
@keyframes blinker {
50% {
opacity: 0;
}
}

View File

@ -64,6 +64,7 @@ const highlight = (container) => {
);
}
let stopped = false;
const register_message_buttons = async () => {
document.querySelectorAll(".message .fa-xmark").forEach(async (el) => {
if (!("click" in el.dataset)) {
@ -90,6 +91,72 @@ const register_message_buttons = async () => {
})
}
});
document.querySelectorAll(".message .fa-volume-high").forEach(async (el) => {
if (!("click" in el.dataset)) {
el.dataset.click = "true";
el.addEventListener("click", async () => {
if ("active" in el.classList || window.doSpeech || stopped) {
stopped = true;
return;
}
el.classList.add("blink")
el.classList.add("active")
const message_el = el.parentElement.parentElement.parentElement;
const content_el = el.parentElement.parentElement;
let speechText = await get_message(window.conversation_id, message_el.dataset.index);
speechText = speechText.replaceAll(/\[(.+)\]\(.+\)/gm, "($1)");
speechText = speechText.replaceAll(/\(http.+\)/gm, "");
speechText = speechText.replaceAll("`", "").replaceAll("#", "")
speechText = speechText.replaceAll(
/<!-- generated images start -->[\s\S]+<!-- generated images end -->/gm,
""
)
const lines = speechText.trim().split(/\n|\.|;/);
let ended = true;
window.onSpeechResponse = (url) => {
if (url) {
var sound = document.createElement('audio');
sound.controls = 'controls';
sound.src = url;
sound.type = 'audio/wav';
if (ended) {
sound.autoplay = true;
}
sound.onended = function() {
ended = true;
};
sound.onplay = function() {
ended = false;
};
var container = document.createElement('div');
container.classList.add("audio");
container.appendChild(sound);
content_el.appendChild(container);
}
if (lines.length < 1 || stopped) {
el.classList.remove("blink");
el.classList.remove("active");
return;
}
while (lines.length > 0) {
let line = lines.shift();
var reg = new RegExp('^[0-9]$');
if (line && !reg.test(line)) {
return handleGenerateSpeech(line);
}
}
if (!line) {
el.classList.remove("blink")
el.classList.remove("active")
}
}
let line = lines.shift();
return handleGenerateSpeech(line);
});
}
});
}
const delete_conversations = async () => {
@ -145,7 +212,11 @@ const handle_ask = async () => {
: ''
}
</div>
<div class="count">${count_words_and_tokens(message, get_selected_model())} <i class="fa-regular fa-clipboard"></i></div>
<div class="count">
${count_words_and_tokens(message, get_selected_model())}
<i class="fa-solid fa-volume-high"></i>
<i class="fa-regular fa-clipboard"></i>
</div>
</div>
</div>
`;
@ -479,7 +550,11 @@ const load_conversation = async (conversation_id, scroll=true) => {
<div class="content">
${provider}
<div class="content_inner">${markdown_render(item.content)}</div>
<div class="count">${count_words_and_tokens(item.content, next_provider?.model)} <i class="fa-regular fa-clipboard"></i></div>
<div class="count">
${count_words_and_tokens(item.content, next_provider?.model)}
<i class="fa-solid fa-volume-high"></i>
<i class="fa-regular fa-clipboard"></i>
</div>
</div>
</div>
`;
@ -1149,10 +1224,12 @@ if (SpeechRecognition) {
}
let startValue;
let lastValue;
let timeoutHandle;
recognition.onstart = function() {
microLabel.classList.add("recognition");
startValue = messageInput.value;
lastValue = "";
timeoutHandle = window.setTimeout(may_stop, 8000);
};
recognition.onend = function() {
@ -1163,25 +1240,22 @@ if (SpeechRecognition) {
return;
}
window.clearTimeout(timeoutHandle);
let notFinal = "";
event.results.forEach((result) => {
const newText = result[0].transcript;
if (newText) {
let newText;
Array.from(event.results).forEach((result) => {
newText = result[0].transcript;
if (newText && newText != lastValue) {
messageInput.value = `${startValue ? startValue+"\n" : ""}${newText.trim()}`;
if (result.isFinal) {
messageInput.value = `${startValue ? startValue+"\n" : ""}${newText.trim()}`;
lastValue = newText;
startValue = messageInput.value;
notFinal = "";
messageInput.focus();
} else {
notFinal += newText;
messageInput.value = `${startValue ? startValue+"\n" : ""}${notFinal.trim()}`;
}
messageInput.style.height = messageInput.scrollHeight + "px";
messageInput.scrollTop = messageInput.scrollHeight;
}
});
window.clearTimeout(timeoutHandle);
timeoutHandle = window.setTimeout(may_stop, notFinal ? 5000 : 8000);
timeoutHandle = window.setTimeout(may_stop, newText ? 8000 : 5000);
};
microLabel.addEventListener("click", () => {
@ -1189,8 +1263,8 @@ if (SpeechRecognition) {
window.clearTimeout(timeoutHandle);
recognition.stop();
} else {
const lang = document.getElementById("recognition-language")?.value || navigator.language;
recognition.lang = lang;
const lang = document.getElementById("recognition-language")?.value;
recognition.lang = lang || navigator.language;
recognition.start();
}
});

View File

@ -0,0 +1 @@
(()=>{var e,t,r,n,a={896:(e,t,r)=>{"use strict";var n=r(900);function a(e,t,r){for(let n=0;n<r.length;++n)e.setUint8(t+n,r.charCodeAt(n))}n._K2.allowLocalModels=!1;class s{static BASE_URL="https://huggingface.co/datasets/Xenova/cmu-arctic-xvectors-extracted/resolve/main/";static model_id="Xenova/speecht5_tts";static vocoder_id="Xenova/speecht5_hifigan";static tokenizer_instance=null;static model_instance=null;static vocoder_instance=null;static async getInstance(e=null){return null===this.tokenizer_instance&&(this.tokenizer=n.v6I.from_pretrained(this.model_id,{progress_callback:e})),null===this.model_instance&&(this.model_instance=n.fqH.from_pretrained(this.model_id,{quantized:!1,progress_callback:e})),null===this.vocoder_instance&&(this.vocoder_instance=n.oJL.from_pretrained(this.vocoder_id,{quantized:!1,progress_callback:e})),new Promise((async(e,t)=>{const r=await Promise.all([this.tokenizer,this.model_instance,this.vocoder_instance]);self.postMessage({status:"ready"}),e(r)}))}static async getSpeakerEmbeddings(e){const t=`${this.BASE_URL}${e}.bin`;return new n.qYS("float32",new Float32Array(await(await fetch(t)).arrayBuffer()),[1,512])}}const o=new Map;self.addEventListener("message",(async e=>{const[t,r,n]=await s.getInstance((e=>{self.postMessage(e)})),{input_ids:i}=t(e.data.text);let c,l=o.get(e.data.speaker_id);void 0===l&&(l=await s.getSpeakerEmbeddings(e.data.speaker_id),o.set(e.data.speaker_id,l));try{c=await r.generate_speech(i,l,{vocoder:n})}catch(e){throw self.postMessage({status:"error",exception:e}),e}const{waveform:d}=c,p=function(e){let t=44;const r=new ArrayBuffer(t+4*e.length),n=new DataView(r);a(n,0,"RIFF"),n.setUint32(4,36+4*e.length,!0),a(n,8,"WAVE"),a(n,12,"fmt "),n.setUint32(16,16,!0),n.setUint16(20,3,!0),n.setUint16(22,1,!0),n.setUint32(24,16e3,!0),n.setUint32(28,64e3,!0),n.setUint16(32,4,!0),n.setUint16(34,32,!0),a(n,36,"data"),n.setUint32(40,4*e.length,!0);for(let r=0;r<e.length;++r,t+=4)n.setFloat32(t,e[r],!0);return r}(d.data);self.postMessage({status:"complete",output:new Blob([p],{type:"audio/wav"})})}))},52:()=>{},143:()=>{},603:()=>{},806:()=>{},853:()=>{},9:()=>{},837:()=>{},499:()=>{}},s={};function o(e){var t=s[e];if(void 0!==t)return t.exports;var r=s[e]={exports:{}};return a[e](r,r.exports,o),r.exports}o.m=a,o.x=()=>{var e=o.O(void 0,[900],(()=>o(896)));return o.O(e)},e=[],o.O=(t,r,n,a)=>{if(!r){var s=1/0;for(d=0;d<e.length;d++){for(var[r,n,a]=e[d],i=!0,c=0;c<r.length;c++)(!1&a||s>=a)&&Object.keys(o.O).every((e=>o.O[e](r[c])))?r.splice(c--,1):(i=!1,a<s&&(s=a));if(i){e.splice(d--,1);var l=n();void 0!==l&&(t=l)}}return t}a=a||0;for(var d=e.length;d>0&&e[d-1][2]>a;d--)e[d]=e[d-1];e[d]=[r,n,a]},r=Object.getPrototypeOf?e=>Object.getPrototypeOf(e):e=>e.__proto__,o.t=function(e,n){if(1&n&&(e=this(e)),8&n)return e;if("object"==typeof e&&e){if(4&n&&e.__esModule)return e;if(16&n&&"function"==typeof e.then)return e}var a=Object.create(null);o.r(a);var s={};t=t||[null,r({}),r([]),r(r)];for(var i=2&n&&e;"object"==typeof i&&!~t.indexOf(i);i=r(i))Object.getOwnPropertyNames(i).forEach((t=>s[t]=()=>e[t]));return s.default=()=>e,o.d(a,s),a},o.d=(e,t)=>{for(var r in t)o.o(t,r)&&!o.o(e,r)&&Object.defineProperty(e,r,{enumerable:!0,get:t[r]})},o.f={},o.e=e=>Promise.all(Object.keys(o.f).reduce(((t,r)=>(o.f[r](e,t),t)),[])),o.u=e=>e+".index.js",o.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||new Function("return this")()}catch(e){if("object"==typeof window)return window}}(),o.o=(e,t)=>Object.prototype.hasOwnProperty.call(e,t),o.r=e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},(()=>{var e;o.g.importScripts&&(e=o.g.location+"");var t=o.g.document;if(!e&&t&&(t.currentScript&&(e=t.currentScript.src),!e)){var r=t.getElementsByTagName("script");if(r.length)for(var n=r.length-1;n>-1&&(!e||!/^http(s?):/.test(e));)e=r[n--].src}if(!e)throw new Error("Automatic publicPath is not supported in this browser");e=e.replace(/#.*$/,"").replace(/\?.*$/,"").replace(/\/[^\/]+$/,"/"),o.p=e})(),(()=>{var e={630:1};o.f.i=(t,r)=>{e[t]||importScripts(o.p+o.u(t))};var t=self.webpackChunk=self.webpackChunk||[],r=t.push.bind(t);t.push=t=>{var[n,a,s]=t;for(var i in a)o.o(a,i)&&(o.m[i]=a[i]);for(s&&s(o);n.length;)e[n.pop()]=1;r(t)}})(),n=o.x,o.x=()=>o.e(900).then(n),o.x()})();

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1 @@
(()=>{"use strict";var e={m:{},u:e=>e+".index.js"};e.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||new Function("return this")()}catch(e){if("object"==typeof window)return window}}(),e.o=(e,c)=>Object.prototype.hasOwnProperty.call(e,c),(()=>{var c;e.g.importScripts&&(c=e.g.location+"");var t=e.g.document;if(!c&&t&&(t.currentScript&&(c=t.currentScript.src),!c)){var a=t.getElementsByTagName("script");if(a.length)for(var r=a.length-1;r>-1&&(!c||!/^http(s?):/.test(c));)c=a[r--].src}if(!c)throw new Error("Automatic publicPath is not supported in this browser");c=c.replace(/#.*$/,"").replace(/\?.*$/,"").replace(/\/[^\/]+$/,"/"),e.p=c})(),e.b=document.baseURI||self.location.href;const c={};c.current||(c.current=new Worker(new URL(e.p+e.u(630),e.b),{type:void 0})),window.doSpeech=!1,c.current.addEventListener("message",(e=>{switch(e.data.status){case"error":window.onSpeechResponse(null),window.doSpeech=!1;break;case"complete":const c=URL.createObjectURL(e.data.output);window.onSpeechResponse(c),window.doSpeech=!1}})),window.SPEAKERS={"US female 1":"cmu_us_slt_arctic-wav-arctic_a0001","US female 2":"cmu_us_clb_arctic-wav-arctic_a0001","US male 1":"cmu_us_bdl_arctic-wav-arctic_a0003","US male 2":"cmu_us_rms_arctic-wav-arctic_a0003","Canadian male":"cmu_us_jmk_arctic-wav-arctic_a0002","Scottish male":"cmu_us_awb_arctic-wav-arctic_b0002","Indian male":"cmu_us_ksp_arctic-wav-arctic_a0007"},window.handleGenerateSpeech=(e,t="cmu_us_slt_arctic-wav-arctic_a0001")=>{window.doSpeech=!0,c.current.postMessage({text:e,speaker_id:t})},window.onSpeechResponse=e=>console.log(e)})();