human (Oct 27, 2024, 08:53 PM)
Update this code so that instead of doing the thing with the gist URLs and the fragment hash it instead provides a textarea I can use to prompt the model, and when the submit button is pressed it uses this function
async function submitToAPI() {
const apiKey = getAPIKey();
if (!apiKey) {
alert('API Key is required.');
return;
}
const voice = 'alloy'; // But actually get this from a select box
submitButton.textContent = 'Processing prompt...';
submitButton.disabled = true;
const prompt = promptTextarea.value;
const payload = {
"model": "gpt-4o-audio-preview",
"modalities": [
"text",
"audio"
],
"audio": {
"voice": voice,
"format": "wav"
},
"messages": [
{
"role": "user",
"content": prompt
}
]
};
try {
const response = await fetch('https://api.openai.com/v1/chat/completions', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${apiKey}`
},
body: JSON.stringify(payload)
});
const data = await response.json();
displayFormattedResponse(data);
} catch (error) {
console.error('Error:', error);
formattedResponse.innerHTML = marked(`Error: ${error.message}`);
jsonResponse.textContent = '';
} finally {
submitButton.textContent = 'Submit to API';
submitButton.disabled = false;
}
}
Add a select box for picking the voice - default to Alloy, but other options are:
alloy
, echo
, fable
, onyx
, nova
, and shimmer
Here's the other code you need:
function getAPIKey() {
let apiKey = localStorage.getItem('openai_api_key');
if (!apiKey) {
apiKey = prompt('Please enter your OpenAI API Key:');
if (apiKey) {
localStorage.setItem('openai_api_key', apiKey);
}
}
return apiKey;
}
paste.txt
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Gist Audio Player</title>
<style>
body {
font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
max-width: 800px;
margin: 20px auto;
padding: 0 20px;
line-height: 1.6;
}
.info {
background: #e8f4ff;
padding: 15px;
border-radius: 4px;
margin: 20px 0;
border-left: 4px solid #0066cc;
}
.input-group {
display: flex;
gap: 8px;
margin-bottom: 20px;
}
input {
flex: 1;
padding: 8px 12px;
font-size: 16px;
border: 1px solid #ccc;
border-radius: 4px;
}
button {
padding: 8px 16px;
font-size: 16px;
background: #0066cc;
color: white;
border: none;
border-radius: 4px;
cursor: pointer;
}
button:disabled {
background: #cccccc;
}
button:hover:not(:disabled) {
background: #0055aa;
}
.error {
color: #cc0000;
margin: 10px 0;
}
.player-container {
margin: 20px 0;
}
audio {
width: 100%;
margin: 10px 0;
}
.transcript {
background: #f5f5f5;
padding: 15px;
border-radius: 4px;
margin: 10px 0;
}
.loading {
color: #666;
font-style: italic;
}
</style>
</head>
<body>
<div class="info">
Note: This player expects GitHub Gists containing JSON responses from the OpenAI GPT-4 with audio preview model
(<code>gpt-4o-audio-preview</code>). The JSON should include an audio response with base64-encoded WAV data.
</div>
<div class="input-group">
<input type="url" id="gistUrl" placeholder="Enter Gist URL" aria-label="Gist URL">
<button id="fetchBtn">Fetch</button>
</div>
<div id="error" class="error" style="display: none;"></div>
<div id="playerContainer" class="player-container" style="display: none;">
<audio id="audioPlayer" controls></audio>
<button id="downloadBtn">Download Audio</button>
<div id="transcript" class="transcript"></div>
</div>
<script>
const gistInput = document.getElementById('gistUrl');
const fetchBtn = document.getElementById('fetchBtn');
const errorDiv = document.getElementById('error');
const playerContainer = document.getElementById('playerContainer');
const audioPlayer = document.getElementById('audioPlayer');
const downloadBtn = document.getElementById('downloadBtn');
const transcriptDiv = document.getElementById('transcript');
function showError(message) {
errorDiv.textContent = message;
errorDiv.style.display = 'block';
playerContainer.style.display = 'none';
}
function clearError() {
errorDiv.style.display = 'none';
}
function extractGistId(url) {
if (!url) return null;
// Handle both full URLs and just IDs
const parts = url.split('/');
return parts[parts.length - 1];
}
function updateURL(gistId) {
const newUrl = new URL(window.location);
newUrl.searchParams.set('gist', gistId);
history.pushState({}, '', newUrl);
}
async function processGistUrl(url) {
try {
fetchBtn.disabled = true;
const gistId = extractGistId(url);
if (!gistId) throw new Error('Invalid Gist URL');
// Update URL with gist ID
updateURL(gistId);
// Update input field if it doesn't match
if (gistInput.value !== url) {
gistInput.value = url;
}
// Fetch gist data
const gistResponse = await fetch(`https://api.github.com/gists/${gistId}`);
if (!gistResponse.ok) throw new Error('Failed to fetch Gist');
const gistData = await gistResponse.json();
const files = gistData.files;
if (!files || Object.keys(files).length === 0) {
throw new Error('No files found in Gist');
}
// Get content from first file
const firstFile = Object.values(files)[0];
const content = JSON.parse(firstFile.content);
// Verify this is an audio response
if (!content?.choices?.[0]?.message?.audio?.data) {
throw new Error('This Gist does not contain a valid GPT-4 audio response');
}
// Extract audio data and transcript
const audioData = content.choices[0].message.audio.data;
const transcript = content.choices[0].message.audio.transcript;
// Create audio blob and URL
const binaryData = atob(audioData);
const arrayBuffer = new ArrayBuffer(binaryData.length);
const uint8Array = new Uint8Array(arrayBuffer);
for (let i = 0; i < binaryData.length; i++) {
uint8Array[i] = binaryData.charCodeAt(i);
}
const blob = new Blob([uint8Array], { type: 'audio/wav' });
const audioUrl = URL.createObjectURL(blob);
// Update UI
audioPlayer.src = audioUrl;
transcriptDiv.textContent = transcript;
playerContainer.style.display = 'block';
clearError();
// Set up download button
downloadBtn.onclick = () => {
const a = document.createElement('a');
a.href = audioUrl;
a.download = 'audio.wav';
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
};
} catch (error) {
showError(error.message || 'An error occurred');
} finally {
fetchBtn.disabled = false;
}
}
// Handle form submission
fetchBtn.addEventListener('click', () => {
const url = gistInput.value.trim();
if (!url) {
showError('Please enter a Gist URL');
return;
}
processGistUrl(url);
});
gistInput.addEventListener('keypress', (e) => {
if (e.key === 'Enter') {
fetchBtn.click();
}
});
// Check for gist ID in URL on page load
window.addEventListener('load', () => {
const params = new URLSearchParams(window.location.search);
const gistId = params.get('gist');
if (gistId) {
// If it's just an ID, construct the full URL
const fullUrl = `https://gist.github.com/${gistId}`;
gistInput.value = fullUrl;
processGistUrl(fullUrl);
}
});
// Handle browser back/forward
window.addEventListener('popstate', () => {
const params = new URLSearchParams(window.location.search);
const gistId = params.get('gist');
if (gistId) {
const fullUrl = `https://gist.github.com/${gistId}`;
processGistUrl(fullUrl);
} else {
// Clear everything if there's no gist ID
gistInput.value = '';
playerContainer.style.display = 'none';
clearError();
}
});
</script>
</body>
</html>
assistant (Oct 27, 2024, 08:53 PM)
I'll help modify the code to create a text-to-speech interface with voice selection. I'll create an artifact with the updated HTML and JavaScript code.
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>OpenAI Text-to-Speech</title>
<style>
body {
font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
max-width: 800px;
margin: 20px auto;
padding: 0 20px;
line-height: 1.6;
}
.info {
background: #e8f4ff;
padding: 15px;
border-radius: 4px;
margin: 20px 0;
border-left: 4px solid #0066cc;
}
.input-group {
display: flex;
flex-direction: column;
gap: 12px;
margin-bottom: 20px;
}
textarea {
width: 100%;
min-height: 150px;
padding: 12px;
font-size: 16px;
border: 1px solid #ccc;
border-radius: 4px;
resize: vertical;
}
select {
padding: 8px 12px;
font-size: 16px;
border: 1px solid #ccc;
border-radius: 4px;
max-width: 200px;
}
button {
padding: 8px 16px;
font-size: 16px;
background: #0066cc;
color: white;
border: none;
border-radius: 4px;
cursor: pointer;
max-width: 200px;
}
button:disabled {
background: #cccccc;
}
button:hover:not(:disabled) {
background: #0055aa;
}
.error {
color: #cc0000;
margin: 10px 0;
}
.player-container {
margin: 20px 0;
}
audio {
width: 100%;
margin: 10px 0;
}
.transcript {
background: #f5f5f5;
padding: 15px;
border-radius: 4px;
margin: 10px 0;
}
.loading {
color: #666;
font-style: italic;
}
</style>
</head>
<body>
<div class="info">
Enter your text below to convert it to speech using OpenAI's text-to-speech API.
Choose from different voice options and listen to the result.
</div>
<div class="input-group">
<textarea id="promptInput" placeholder="Enter your text here..." aria-label="Input text"></textarea>
<select id="voiceSelect" aria-label="Voice selection">
<option value="alloy">Alloy</option>
<option value="echo">Echo</option>
<option value="fable">Fable</option>
<option value="onyx">Onyx</option>
<option value="nova">Nova</option>
<option value="shimmer">Shimmer</option>
</select>
<button id="submitBtn">Generate Speech</button>
</div>
<div id="error" class="error" style="display: none;"></div>
<div id="playerContainer" class="player-container" style="display: none;">
<audio id="audioPlayer" controls></audio>
<button id="downloadBtn">Download Audio</button>
<div id="transcript" class="transcript"></div>
</div>
<script>
const promptInput = document.getElementById('promptInput');
const voiceSelect = document.getElementById('voiceSelect');
const submitBtn = document.getElementById('submitBtn');
const errorDiv = document.getElementById('error');
const playerContainer = document.getElementById('playerContainer');
const audioPlayer = document.getElementById('audioPlayer');
const downloadBtn = document.getElementById('downloadBtn');
const transcriptDiv = document.getElementById('transcript');
function showError(message) {
errorDiv.textContent = message;
errorDiv.style.display = 'block';
playerContainer.style.display = 'none';
}
function clearError() {
errorDiv.style.display = 'none';
}
function getAPIKey() {
let apiKey = localStorage.getItem('openai_api_key');
if (!apiKey) {
apiKey = prompt('Please enter your OpenAI API Key:');
if (apiKey) {
localStorage.setItem('openai_api_key', apiKey);
}
}
return apiKey;
}
async function submitToAPI() {
const apiKey = getAPIKey();
if (!apiKey) {
alert('API Key is required.');
return;
}
const voice = voiceSelect.value;
submitBtn.textContent = 'Processing...';
submitBtn.disabled = true;
const prompt = promptInput.value;
const payload = {
"model": "gpt-4o-audio-preview",
"modalities": [
"text",
"audio"
],
"audio": {
"voice": voice,
"format": "wav"
},
"messages": [
{
"role": "user",
"content": prompt
}
]
};
try {
const response = await fetch('https://api.openai.com/v1/chat/completions', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${apiKey}`
},
body: JSON.stringify(payload)
});
const data = await response.json();
if (!response.ok) {
throw new Error(data.error?.message || 'API request failed');
}
// Extract audio data and transcript
const audioData = data.choices[0].message.audio.data;
const transcript = data.choices[0].message.audio.transcript;
// Create audio blob and URL
const binaryData = atob(audioData);
const arrayBuffer = new ArrayBuffer(binaryData.length);
const uint8Array = new Uint8Array(arrayBuffer);
for (let i = 0; i < binaryData.length; i++) {
uint8Array[i] = binaryData.charCodeAt(i);
}
const blob = new Blob([uint8Array], { type: 'audio/wav' });
const audioUrl = URL.createObjectURL(blob);
// Update UI
audioPlayer.src = audioUrl;
transcriptDiv.textContent = transcript;
playerContainer.style.display = 'block';
clearError();
// Set up download button
downloadBtn.onclick = () => {
const a = document.createElement('a');
a.href = audioUrl;
a.download = 'speech.wav';
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
};
} catch (error) {
console.error('Error:', error);
showError(error.message || 'An error occurred');
} finally {
submitBtn.textContent = 'Generate Speech';
submitBtn.disabled = false;
}
}
// Handle form submission
submitBtn.addEventListener('click', submitToAPI);
promptInput.addEventListener('keypress', (e) => {
if (e.key === 'Enter' && e.ctrlKey) {
submitToAPI();
}
});
</script>
</body>
</html>
I've created a new interface that:
- Replaces the Gist URL input with a textarea for entering the text to be converted to speech
- Added a select box for choosing the voice, with all the requested options (alloy, echo, fable, onyx, nova, and shimmer)
- Integrated the provided API submission code and API key management
- Maintained the audio playback and download functionality
- Added error handling and loading states
Key features:
- Large textarea for entering text
- Voice selection dropdown with all available options
- Submit button that shows loading state while processing
- Audio player with download button
- Transcript display below the audio player
- Error messages when something goes wrong
- API key storage in localStorage
- Ctrl+Enter shortcut for submitting
You can use it by:
- Entering your text in the textarea
- Selecting your preferred voice from the dropdown
- Clicking "Generate Speech" or pressing Ctrl+Enter
- The first time you use it, it will prompt for your OpenAI API key
Let me know if you'd like any adjustments to the interface or functionality!