import { generateTextFromAudio } from 'backend/speech-to-text';
import { generateAudio } from 'backend/text-to-speech';
import { getAnswerfromAI } from 'backend/open-ai';
$w.onReady(function () {
$w("#audioRecorder1").onSave(async (event) => {
console.log(event);
const { url } = event.data;
await askQuestion(url);
});
//$w("#sendButton").onClick(createAudio);
});
async function askQuestion(url) {
try {
const textResponse = await generateTextFromAudio(url);
console.log(textResponse);
$w("#loadingAnimation1").show()
$w("#text34").text = textResponse.text; // Display the transcribed audio
$w("#loadingAnimation1").hide();
$w("#text34").show();
await createAudio(textResponse.text)
} catch (error) {
console.log(error);
$w("#text34").text = "There was an error, please try again.";
}
}
//above here for speech to text
//below here for text to speech
async function createAudio(voicePromt) {
//const prompt = $w("#userInput").value;
const answer = await getAnswerfromAI(voicePromt);
$w("#text35").text = answer;
$w("#text35").show();
const generatedAudioData = await generateAudio(answer);
const duration = estimateMP3Duration(generatedAudioData.sizeInBytes);
$w("#audioPlayer").src = fixAudioURL(generatedAudioData.fileUrl, duration);
$w("#audioPlayer").expand();
}
function estimateMP3Duration(fileSizeInBytes){
const averageBitrateKbps = 128; //Assume an average bitrate of 128 kbps
const fileSizeInBits = fileSizeInBytes * 8;
const estimatedDurationInSeconds = fileSizeInBits / (averageBitrateKbps * 1000);
return Math.ceil(estimatedDurationInSeconds);
}
function fixAudioURL(audioURL, estimatedDuration){
//Check if the URL contains "#duration=undefined"
if (audioURL.includes('#duration=undefined')){
audioURL = audioURL.replace('#duration=undefined',`#duration=${estimatedDuration}`);
}
return audioURL;
}
backend text-to-speach file:
import { fetch } from 'wix-fetch';
import { getSecret } from 'wix-secrets-backend';
import axios from 'axios';
import { mediaManager } from 'wix-media-backend';
export async function generateAudio(input) {
const endpoint = "https://api.openai.com/v1/audio/speech";
const apiKey = await getSecret("OPENAI-API-KEY1");
const body = {
model: "tts-1",
input,
voice: "alloy",
}
const headers = {
"Authorization": `Bearer ${apiKey}`,
"Content-Type": "application/json",
}
try {
const { data } = await axios.post(endpoint, body, { headers, responseType: 'arraybuffer' });
const buffer = Buffer.from(data);
const fileName = input.substring(20).replace(" ","-");
return await uploadToWix(buffer, fileName);
} catch (error) {
console.log("Error generating audio", error);
}
}
async function uploadToWix(buffer, fileName) {
return mediaManager.upload(
"/uploads/audio",
buffer,
`${fileName}.mp3`, {
"mediaOptions": {
"mimeType": "audio/mpeg",
"mediaType": "audio"
},
"metadataOptions": {
"isPrivate": false,
"isVisitorUpload": false,
}
}
);
}
Error:
TypeError: Cannot read properties of undefined (reading 'sizeInBytes')
at eval (j9kpd.js:46:61)
at l (make namespace object:7:1)
at Generator.eval [as _invoke] (make namespace object:7:1)
at Generator.eval [as next] (make namespace object:7:1)
at c (make namespace object:7:1)
at a (make namespace object:7:1)
It is interesting that if the text is short, it mostly creates speech. If the text is long, it gives an error. Here in the screenshot, the texts are my questions to OpenAI. If the OpenAI response is long, it gives me an error. If the response is short, it works most of the time.
Best,
Tunahan