I used the same code you used in the text-to-speech YouTube video. However, my code sometimes fails to produce the speech. Front End File:

import { generateTextFromAudio } from 'backend/speech-to-text';
import { generateAudio } from 'backend/text-to-speech';
import { getAnswerfromAI } from 'backend/open-ai';


$w.onReady(function () {
    $w("#audioRecorder1").onSave(async (event) => {
        console.log(event);
        const { url } = event.data;
        await askQuestion(url);
        
    });
    //$w("#sendButton").onClick(createAudio);

});
    

async function askQuestion(url) {
    try {
        const textResponse = await generateTextFromAudio(url);
        console.log(textResponse);
        $w("#loadingAnimation1").show()
        $w("#text34").text = textResponse.text; // Display the transcribed audio
        $w("#loadingAnimation1").hide();
        $w("#text34").show();

        await createAudio(textResponse.text)

    } catch (error) {
        console.log(error);
        $w("#text34").text = "There was an error, please try again.";
    }
}

//above here for speech to text
//below here for text to speech



async function createAudio(voicePromt) {
    //const prompt = $w("#userInput").value;
    const answer = await getAnswerfromAI(voicePromt);
    $w("#text35").text = answer;
    $w("#text35").show();
    const generatedAudioData = await generateAudio(answer);
    const duration = estimateMP3Duration(generatedAudioData.sizeInBytes);
    $w("#audioPlayer").src = fixAudioURL(generatedAudioData.fileUrl, duration);
    $w("#audioPlayer").expand();
}




function estimateMP3Duration(fileSizeInBytes){
    const averageBitrateKbps = 128; //Assume an average bitrate of 128 kbps
    const fileSizeInBits = fileSizeInBytes * 8;
    const estimatedDurationInSeconds = fileSizeInBits / (averageBitrateKbps * 1000);
    return Math.ceil(estimatedDurationInSeconds);
}

function fixAudioURL(audioURL, estimatedDuration){
    //Check if the URL contains "#duration=undefined"
    if (audioURL.includes('#duration=undefined')){
        audioURL = audioURL.replace('#duration=undefined',`#duration=${estimatedDuration}`);
    }
    return audioURL;
}

backend text-to-speach file:

import { fetch } from 'wix-fetch';
import { getSecret } from 'wix-secrets-backend';
import axios from 'axios';
import { mediaManager } from 'wix-media-backend';

export async function generateAudio(input) {
    const endpoint = "https://api.openai.com/v1/audio/speech";
    const apiKey = await getSecret("OPENAI-API-KEY1");
    const body = {
        model: "tts-1",
        input,
        voice: "alloy",
    }
    const headers = {
        "Authorization": `Bearer ${apiKey}`,
        "Content-Type": "application/json",
    }

    try {
        const { data } = await axios.post(endpoint, body, { headers, responseType: 'arraybuffer' });
        const buffer = Buffer.from(data);
        const fileName = input.substring(20).replace(" ","-");
        return await uploadToWix(buffer, fileName);
    } catch (error) {
        console.log("Error generating audio", error);
    }

}

async function uploadToWix(buffer, fileName) {
    return mediaManager.upload(
        "/uploads/audio",
        buffer,
        `${fileName}.mp3`, {
            "mediaOptions": {
                "mimeType": "audio/mpeg",
                "mediaType": "audio"
            },
            "metadataOptions": {
                "isPrivate": false,
                "isVisitorUpload": false,
            }
        }
    );
}

Error:

TypeError: Cannot read properties of undefined (reading 'sizeInBytes')

at eval (j9kpd.js:46:61)

at l (make namespace object:7:1)

at Generator.eval [as _invoke] (make namespace object:7:1)

at Generator.eval [as next] (make namespace object:7:1)

at c (make namespace object:7:1)

at a (make namespace object:7:1)

It is interesting that if the text is short, it mostly creates speech. If the text is long, it gives an error. Here in the screenshot, the texts are my questions to OpenAI. If the OpenAI response is long, it gives me an error. If the response is short, it works most of the time.

Best,

Tunahan

I used the same code you used in the text-to-speech YouTube video. However, my code sometimes fails to produce the speech. Front End File:

Become part of a thriving community

Let's Make Some Magic!

Subscribe to our newsletter