mirror of https://github.com/xtekky/gpt4free.git
Add text to speech module
This commit is contained in:
parent
24345bc07b
commit
d4a92bb8df
|
@ -21,6 +21,7 @@ from .ChatgptFree import ChatgptFree
|
|||
from .ChatgptNext import ChatgptNext
|
||||
from .ChatgptX import ChatgptX
|
||||
from .DeepInfra import DeepInfra
|
||||
from .DuckDuckGo import DuckDuckGo
|
||||
from .FlowGpt import FlowGpt
|
||||
from .FreeChatgpt import FreeChatgpt
|
||||
from .FreeGpt import FreeGpt
|
||||
|
|
|
@ -37,6 +37,7 @@
|
|||
import llamaTokenizer from "llama-tokenizer-js"
|
||||
</script>
|
||||
<script src="https://unpkg.com/gpt-tokenizer/dist/cl100k_base.js" async></script>
|
||||
<script src="/static/js/text_to_speech/index.js" async></script>
|
||||
<script>
|
||||
const user_image = '<img src="/static/img/user.png" alt="your avatar">';
|
||||
const gpt_image = '<img src="/static/img/gpt.png" alt="your avatar">';
|
||||
|
|
|
@ -315,6 +315,7 @@ body {
|
|||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 10px;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
.message .content,
|
||||
|
@ -332,6 +333,10 @@ body {
|
|||
max-width: 400px;
|
||||
}
|
||||
|
||||
.message .content .audio{
|
||||
display: flex;
|
||||
}
|
||||
|
||||
.message .user i {
|
||||
position: absolute;
|
||||
bottom: -6px;
|
||||
|
@ -357,7 +362,8 @@ body {
|
|||
cursor: pointer;
|
||||
}
|
||||
|
||||
.message .count .fa-clipboard {
|
||||
.message .count .fa-clipboard,
|
||||
.message .count .fa-volume-high {
|
||||
z-index: 1000;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
@ -374,6 +380,10 @@ body {
|
|||
color: var(--accent);
|
||||
}
|
||||
|
||||
.message .count .fa-volume-high.active {
|
||||
color: var(--accent);
|
||||
}
|
||||
|
||||
.message .assistant:hover .fa-xmark,
|
||||
.message .user:hover .fa-xmark {
|
||||
display: block;
|
||||
|
@ -1136,4 +1146,14 @@ a:-webkit-any-link {
|
|||
|
||||
.hidden {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.blink {
|
||||
animation: blinker 1s step-start infinite;
|
||||
}
|
||||
|
||||
@keyframes blinker {
|
||||
50% {
|
||||
opacity: 0;
|
||||
}
|
||||
}
|
|
@ -64,6 +64,7 @@ const highlight = (container) => {
|
|||
);
|
||||
}
|
||||
|
||||
let stopped = false;
|
||||
const register_message_buttons = async () => {
|
||||
document.querySelectorAll(".message .fa-xmark").forEach(async (el) => {
|
||||
if (!("click" in el.dataset)) {
|
||||
|
@ -90,6 +91,72 @@ const register_message_buttons = async () => {
|
|||
})
|
||||
}
|
||||
});
|
||||
document.querySelectorAll(".message .fa-volume-high").forEach(async (el) => {
|
||||
if (!("click" in el.dataset)) {
|
||||
el.dataset.click = "true";
|
||||
el.addEventListener("click", async () => {
|
||||
if ("active" in el.classList || window.doSpeech || stopped) {
|
||||
stopped = true;
|
||||
return;
|
||||
}
|
||||
el.classList.add("blink")
|
||||
el.classList.add("active")
|
||||
const message_el = el.parentElement.parentElement.parentElement;
|
||||
const content_el = el.parentElement.parentElement;
|
||||
let speechText = await get_message(window.conversation_id, message_el.dataset.index);
|
||||
|
||||
speechText = speechText.replaceAll(/\[(.+)\]\(.+\)/gm, "($1)");
|
||||
speechText = speechText.replaceAll(/\(http.+\)/gm, "");
|
||||
speechText = speechText.replaceAll("`", "").replaceAll("#", "")
|
||||
speechText = speechText.replaceAll(
|
||||
/<!-- generated images start -->[\s\S]+<!-- generated images end -->/gm,
|
||||
""
|
||||
)
|
||||
|
||||
const lines = speechText.trim().split(/\n|\.|;/);
|
||||
let ended = true;
|
||||
window.onSpeechResponse = (url) => {
|
||||
if (url) {
|
||||
var sound = document.createElement('audio');
|
||||
sound.controls = 'controls';
|
||||
sound.src = url;
|
||||
sound.type = 'audio/wav';
|
||||
if (ended) {
|
||||
sound.autoplay = true;
|
||||
}
|
||||
sound.onended = function() {
|
||||
ended = true;
|
||||
};
|
||||
sound.onplay = function() {
|
||||
ended = false;
|
||||
};
|
||||
var container = document.createElement('div');
|
||||
container.classList.add("audio");
|
||||
container.appendChild(sound);
|
||||
content_el.appendChild(container);
|
||||
}
|
||||
if (lines.length < 1 || stopped) {
|
||||
el.classList.remove("blink");
|
||||
el.classList.remove("active");
|
||||
return;
|
||||
}
|
||||
while (lines.length > 0) {
|
||||
let line = lines.shift();
|
||||
var reg = new RegExp('^[0-9]$');
|
||||
if (line && !reg.test(line)) {
|
||||
return handleGenerateSpeech(line);
|
||||
}
|
||||
}
|
||||
if (!line) {
|
||||
el.classList.remove("blink")
|
||||
el.classList.remove("active")
|
||||
}
|
||||
}
|
||||
let line = lines.shift();
|
||||
return handleGenerateSpeech(line);
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
const delete_conversations = async () => {
|
||||
|
@ -145,7 +212,11 @@ const handle_ask = async () => {
|
|||
: ''
|
||||
}
|
||||
</div>
|
||||
<div class="count">${count_words_and_tokens(message, get_selected_model())} <i class="fa-regular fa-clipboard"></i></div>
|
||||
<div class="count">
|
||||
${count_words_and_tokens(message, get_selected_model())}
|
||||
<i class="fa-solid fa-volume-high"></i>
|
||||
<i class="fa-regular fa-clipboard"></i>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
`;
|
||||
|
@ -479,7 +550,11 @@ const load_conversation = async (conversation_id, scroll=true) => {
|
|||
<div class="content">
|
||||
${provider}
|
||||
<div class="content_inner">${markdown_render(item.content)}</div>
|
||||
<div class="count">${count_words_and_tokens(item.content, next_provider?.model)} <i class="fa-regular fa-clipboard"></i></div>
|
||||
<div class="count">
|
||||
${count_words_and_tokens(item.content, next_provider?.model)}
|
||||
<i class="fa-solid fa-volume-high"></i>
|
||||
<i class="fa-regular fa-clipboard"></i>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
`;
|
||||
|
@ -1149,10 +1224,12 @@ if (SpeechRecognition) {
|
|||
}
|
||||
|
||||
let startValue;
|
||||
let lastValue;
|
||||
let timeoutHandle;
|
||||
recognition.onstart = function() {
|
||||
microLabel.classList.add("recognition");
|
||||
startValue = messageInput.value;
|
||||
lastValue = "";
|
||||
timeoutHandle = window.setTimeout(may_stop, 8000);
|
||||
};
|
||||
recognition.onend = function() {
|
||||
|
@ -1163,25 +1240,22 @@ if (SpeechRecognition) {
|
|||
return;
|
||||
}
|
||||
window.clearTimeout(timeoutHandle);
|
||||
let notFinal = "";
|
||||
event.results.forEach((result) => {
|
||||
const newText = result[0].transcript;
|
||||
if (newText) {
|
||||
let newText;
|
||||
Array.from(event.results).forEach((result) => {
|
||||
newText = result[0].transcript;
|
||||
if (newText && newText != lastValue) {
|
||||
messageInput.value = `${startValue ? startValue+"\n" : ""}${newText.trim()}`;
|
||||
if (result.isFinal) {
|
||||
messageInput.value = `${startValue ? startValue+"\n" : ""}${newText.trim()}`;
|
||||
lastValue = newText;
|
||||
startValue = messageInput.value;
|
||||
notFinal = "";
|
||||
messageInput.focus();
|
||||
} else {
|
||||
notFinal += newText;
|
||||
messageInput.value = `${startValue ? startValue+"\n" : ""}${notFinal.trim()}`;
|
||||
}
|
||||
messageInput.style.height = messageInput.scrollHeight + "px";
|
||||
messageInput.scrollTop = messageInput.scrollHeight;
|
||||
}
|
||||
});
|
||||
window.clearTimeout(timeoutHandle);
|
||||
timeoutHandle = window.setTimeout(may_stop, notFinal ? 5000 : 8000);
|
||||
timeoutHandle = window.setTimeout(may_stop, newText ? 8000 : 5000);
|
||||
};
|
||||
|
||||
microLabel.addEventListener("click", () => {
|
||||
|
@ -1189,8 +1263,8 @@ if (SpeechRecognition) {
|
|||
window.clearTimeout(timeoutHandle);
|
||||
recognition.stop();
|
||||
} else {
|
||||
const lang = document.getElementById("recognition-language")?.value || navigator.language;
|
||||
recognition.lang = lang;
|
||||
const lang = document.getElementById("recognition-language")?.value;
|
||||
recognition.lang = lang || navigator.language;
|
||||
recognition.start();
|
||||
}
|
||||
});
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
(()=>{var e,t,r,n,a={896:(e,t,r)=>{"use strict";var n=r(900);function a(e,t,r){for(let n=0;n<r.length;++n)e.setUint8(t+n,r.charCodeAt(n))}n._K2.allowLocalModels=!1;class s{static BASE_URL="https://huggingface.co/datasets/Xenova/cmu-arctic-xvectors-extracted/resolve/main/";static model_id="Xenova/speecht5_tts";static vocoder_id="Xenova/speecht5_hifigan";static tokenizer_instance=null;static model_instance=null;static vocoder_instance=null;static async getInstance(e=null){return null===this.tokenizer_instance&&(this.tokenizer=n.v6I.from_pretrained(this.model_id,{progress_callback:e})),null===this.model_instance&&(this.model_instance=n.fqH.from_pretrained(this.model_id,{quantized:!1,progress_callback:e})),null===this.vocoder_instance&&(this.vocoder_instance=n.oJL.from_pretrained(this.vocoder_id,{quantized:!1,progress_callback:e})),new Promise((async(e,t)=>{const r=await Promise.all([this.tokenizer,this.model_instance,this.vocoder_instance]);self.postMessage({status:"ready"}),e(r)}))}static async getSpeakerEmbeddings(e){const t=`${this.BASE_URL}${e}.bin`;return new n.qYS("float32",new Float32Array(await(await fetch(t)).arrayBuffer()),[1,512])}}const o=new Map;self.addEventListener("message",(async e=>{const[t,r,n]=await s.getInstance((e=>{self.postMessage(e)})),{input_ids:i}=t(e.data.text);let c,l=o.get(e.data.speaker_id);void 0===l&&(l=await s.getSpeakerEmbeddings(e.data.speaker_id),o.set(e.data.speaker_id,l));try{c=await r.generate_speech(i,l,{vocoder:n})}catch(e){throw self.postMessage({status:"error",exception:e}),e}const{waveform:d}=c,p=function(e){let t=44;const r=new ArrayBuffer(t+4*e.length),n=new DataView(r);a(n,0,"RIFF"),n.setUint32(4,36+4*e.length,!0),a(n,8,"WAVE"),a(n,12,"fmt "),n.setUint32(16,16,!0),n.setUint16(20,3,!0),n.setUint16(22,1,!0),n.setUint32(24,16e3,!0),n.setUint32(28,64e3,!0),n.setUint16(32,4,!0),n.setUint16(34,32,!0),a(n,36,"data"),n.setUint32(40,4*e.length,!0);for(let r=0;r<e.length;++r,t+=4)n.setFloat32(t,e[r],!0);return r}(d.data);self.postMessage({status:"complete",output:new Blob([p],{type:"audio/wav"})})}))},52:()=>{},143:()=>{},603:()=>{},806:()=>{},853:()=>{},9:()=>{},837:()=>{},499:()=>{}},s={};function o(e){var t=s[e];if(void 0!==t)return t.exports;var r=s[e]={exports:{}};return a[e](r,r.exports,o),r.exports}o.m=a,o.x=()=>{var e=o.O(void 0,[900],(()=>o(896)));return o.O(e)},e=[],o.O=(t,r,n,a)=>{if(!r){var s=1/0;for(d=0;d<e.length;d++){for(var[r,n,a]=e[d],i=!0,c=0;c<r.length;c++)(!1&a||s>=a)&&Object.keys(o.O).every((e=>o.O[e](r[c])))?r.splice(c--,1):(i=!1,a<s&&(s=a));if(i){e.splice(d--,1);var l=n();void 0!==l&&(t=l)}}return t}a=a||0;for(var d=e.length;d>0&&e[d-1][2]>a;d--)e[d]=e[d-1];e[d]=[r,n,a]},r=Object.getPrototypeOf?e=>Object.getPrototypeOf(e):e=>e.__proto__,o.t=function(e,n){if(1&n&&(e=this(e)),8&n)return e;if("object"==typeof e&&e){if(4&n&&e.__esModule)return e;if(16&n&&"function"==typeof e.then)return e}var a=Object.create(null);o.r(a);var s={};t=t||[null,r({}),r([]),r(r)];for(var i=2&n&&e;"object"==typeof i&&!~t.indexOf(i);i=r(i))Object.getOwnPropertyNames(i).forEach((t=>s[t]=()=>e[t]));return s.default=()=>e,o.d(a,s),a},o.d=(e,t)=>{for(var r in t)o.o(t,r)&&!o.o(e,r)&&Object.defineProperty(e,r,{enumerable:!0,get:t[r]})},o.f={},o.e=e=>Promise.all(Object.keys(o.f).reduce(((t,r)=>(o.f[r](e,t),t)),[])),o.u=e=>e+".index.js",o.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||new Function("return this")()}catch(e){if("object"==typeof window)return window}}(),o.o=(e,t)=>Object.prototype.hasOwnProperty.call(e,t),o.r=e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},(()=>{var e;o.g.importScripts&&(e=o.g.location+"");var t=o.g.document;if(!e&&t&&(t.currentScript&&(e=t.currentScript.src),!e)){var r=t.getElementsByTagName("script");if(r.length)for(var n=r.length-1;n>-1&&(!e||!/^http(s?):/.test(e));)e=r[n--].src}if(!e)throw new Error("Automatic publicPath is not supported in this browser");e=e.replace(/#.*$/,"").replace(/\?.*$/,"").replace(/\/[^\/]+$/,"/"),o.p=e})(),(()=>{var e={630:1};o.f.i=(t,r)=>{e[t]||importScripts(o.p+o.u(t))};var t=self.webpackChunk=self.webpackChunk||[],r=t.push.bind(t);t.push=t=>{var[n,a,s]=t;for(var i in a)o.o(a,i)&&(o.m[i]=a[i]);for(s&&s(o);n.length;)e[n.pop()]=1;r(t)}})(),n=o.x,o.x=()=>o.e(900).then(n),o.x()})();
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1 @@
|
|||
(()=>{"use strict";var e={m:{},u:e=>e+".index.js"};e.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||new Function("return this")()}catch(e){if("object"==typeof window)return window}}(),e.o=(e,c)=>Object.prototype.hasOwnProperty.call(e,c),(()=>{var c;e.g.importScripts&&(c=e.g.location+"");var t=e.g.document;if(!c&&t&&(t.currentScript&&(c=t.currentScript.src),!c)){var a=t.getElementsByTagName("script");if(a.length)for(var r=a.length-1;r>-1&&(!c||!/^http(s?):/.test(c));)c=a[r--].src}if(!c)throw new Error("Automatic publicPath is not supported in this browser");c=c.replace(/#.*$/,"").replace(/\?.*$/,"").replace(/\/[^\/]+$/,"/"),e.p=c})(),e.b=document.baseURI||self.location.href;const c={};c.current||(c.current=new Worker(new URL(e.p+e.u(630),e.b),{type:void 0})),window.doSpeech=!1,c.current.addEventListener("message",(e=>{switch(e.data.status){case"error":window.onSpeechResponse(null),window.doSpeech=!1;break;case"complete":const c=URL.createObjectURL(e.data.output);window.onSpeechResponse(c),window.doSpeech=!1}})),window.SPEAKERS={"US female 1":"cmu_us_slt_arctic-wav-arctic_a0001","US female 2":"cmu_us_clb_arctic-wav-arctic_a0001","US male 1":"cmu_us_bdl_arctic-wav-arctic_a0003","US male 2":"cmu_us_rms_arctic-wav-arctic_a0003","Canadian male":"cmu_us_jmk_arctic-wav-arctic_a0002","Scottish male":"cmu_us_awb_arctic-wav-arctic_b0002","Indian male":"cmu_us_ksp_arctic-wav-arctic_a0007"},window.handleGenerateSpeech=(e,t="cmu_us_slt_arctic-wav-arctic_a0001")=>{window.doSpeech=!0,c.current.postMessage({text:e,speaker_id:t})},window.onSpeechResponse=e=>console.log(e)})();
|
Loading…
Reference in New Issue