human (Dec 17, 2024, 02:02 PM)
Turn this example code into a full artifact
async function createRealtimeSession(inStream, outEl, token) { const pc = new RTCPeerConnection(); pc.ontrack = e => outEl.srcObject = e.streams[0]; pc.addTrack(inStream.getTracks()[0]); const offer = await pc.createOffer(); await pc.setLocalDescription(offer); const headers = { Authorization: Bearer ${token}
, 'Content-Type': 'application/sdp' }; const opts = { method: 'POST', body: offer.sdp, headers }; const resp = await fetch('https://api.openai.com/v1/realtime', opts); await pc.setRemoteDescription({ type: 'answer', sdp: await resp.text() }); return pc; }
assistant (Dec 17, 2024, 02:02 PM)
I'll help create a complete artifact with this WebRTC code using vanilla HTML, JavaScript, and CSS.
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Real-time Session</title>
<style>
* {
box-sizing: border-box;
}
body {
font-family: Helvetica, Arial, sans-serif;
margin: 0;
padding: 20px;
background: #f5f5f5;
}
.container {
max-width: 800px;
margin: 0 auto;
}
.video-container {
position: relative;
width: 100%;
background: #000;
border-radius: 8px;
overflow: hidden;
margin: 20px 0;
}
video {
width: 100%;
display: block;
}
.controls {
margin: 20px 0;
}
input {
width: 100%;
padding: 8px;
font-size: 16px;
border: 1px solid #ddd;
border-radius: 4px;
margin-bottom: 10px;
}
button {
background: #007bff;
color: white;
border: none;
padding: 10px 20px;
font-size: 16px;
border-radius: 4px;
cursor: pointer;
}
button:disabled {
background: #ccc;
cursor: not-allowed;
}
.status {
margin-top: 10px;
padding: 10px;
border-radius: 4px;
}
.error {
background: #fee;
color: #c00;
}
.success {
background: #efe;
color: #0a0;
}
</style>
</head>
<body>
<div class="container">
<h1>Real-time Session</h1>
<div class="video-container">
<video id="localVideo" autoplay playsinline muted></video>
</div>
<div class="controls">
<input type="text" id="tokenInput" placeholder="Enter your API token">
<button id="startButton">Start Session</button>
</div>
<div id="status" class="status"></div>
</div>
<script type="module">
async function createRealtimeSession(inStream, outEl, token) {
const pc = new RTCPeerConnection()
pc.ontrack = e => outEl.srcObject = e.streams[0]
pc.addTrack(inStream.getTracks()[0])
const offer = await pc.createOffer()
await pc.setLocalDescription(offer)
const headers = {
Authorization: `Bearer ${token}`,
'Content-Type': 'application/sdp'
}
const opts = {
method: 'POST',
body: offer.sdp,
headers
}
const resp = await fetch('https://api.openai.com/v1/realtime', opts)
await pc.setRemoteDescription({
type: 'answer',
sdp: await resp.text()
})
return pc
}
const startButton = document.getElementById('startButton')
const tokenInput = document.getElementById('tokenInput')
const localVideo = document.getElementById('localVideo')
const status = document.getElementById('status')
let peerConnection = null
async function startSession() {
try {
status.className = 'status'
status.textContent = 'Requesting camera access...'
const stream = await navigator.mediaDevices.getUserMedia({
video: true,
audio: true
})
localVideo.srcObject = stream
status.textContent = 'Establishing connection...'
peerConnection = await createRealtimeSession(
stream,
localVideo,
tokenInput.value
)
status.className = 'status success'
status.textContent = 'Session established successfully!'
startButton.textContent = 'Stop Session'
} catch (err) {
status.className = 'status error'
status.textContent = `Error: ${err.message}`
console.error('Session error:', err)
stopSession()
}
}
function stopSession() {
if (peerConnection) {
peerConnection.close()
peerConnection = null
}
if (localVideo.srcObject) {
localVideo.srcObject.getTracks().forEach(track => track.stop())
localVideo.srcObject = null
}
startButton.textContent = 'Start Session'
}
startButton.addEventListener('click', () => {
if (peerConnection) {
stopSession()
} else {
if (!tokenInput.value) {
status.className = 'status error'
status.textContent = 'Please enter an API token'
return
}
startSession()
}
})
// Cleanup on page unload
window.addEventListener('beforeunload', stopSession)
</script>
</body>
</html>
I've created a complete HTML artifact that:
- Includes a clean, responsive layout with proper styling
- Has error handling and status messages
- Manages the WebRTC lifecycle properly
- Includes cleanup on page unload
- Has proper camera/mic permission handling
- Provides visual feedback about the connection state
- Allows stopping/starting the session
The code follows the requested formatting:
- CSS uses 2-space indentation and starts with the box-sizing reset
- JavaScript uses 2-space indentation and is in a module script tag
- Font sizes for inputs are 16px
- Helvetica is the preferred font
- Uses vanilla HTML/JS/CSS with no external dependencies
You can test it by:
- Entering an API token
- Clicking "Start Session" to begin
- The video feed will show locally
- Connection status is displayed below
- Click "Stop Session" to end the connection and cleanup resources
Would you like me to make any adjustments to the implementation?
human (Dec 17, 2024, 02:04 PM)
This is for audio only. Add a thing where any text messages coming in on the stream are shown in an auto-scrolling textarea. Audio returned by that stream should play too
assistant (Dec 17, 2024, 02:04 PM)
I'll modify the artifact to handle audio-only streaming and add a text message display area.
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Real-time Audio Session</title>
<style>
* {
box-sizing: border-box;
}
body {
font-family: Helvetica, Arial, sans-serif;
margin: 0;
padding: 20px;
background: #f5f5f5;
}
.container {
max-width: 800px;
margin: 0 auto;
}
.messages {
width: 100%;
height: 300px;
background: white;
border: 1px solid #ddd;
border-radius: 4px;
padding: 10px;
margin: 20px 0;
font-size: 16px;
font-family: monospace;
resize: vertical;
overflow-y: scroll;
}
.audio-indicator {
display: inline-block;
width: 20px;
height: 20px;
border-radius: 50%;
background: #ccc;
margin-right: 10px;
vertical-align: middle;
}
.audio-indicator.active {
background: #4CAF50;
animation: pulse 1s infinite;
}
@keyframes pulse {
0% { opacity: 1; }
50% { opacity: 0.5; }
100% { opacity: 1; }
}
.controls {
margin: 20px 0;
}
input {
width: 100%;
padding: 8px;
font-size: 16px;
border: 1px solid #ddd;
border-radius: 4px;
margin-bottom: 10px;
}
button {
background: #007bff;
color: white;
border: none;
padding: 10px 20px;
font-size: 16px;
border-radius: 4px;
cursor: pointer;
}
button:disabled {
background: #ccc;
cursor: not-allowed;
}
.status {
margin-top: 10px;
padding: 10px;
border-radius: 4px;
}
.error {
background: #fee;
color: #c00;
}
.success {
background: #efe;
color: #0a0;
}
</style>
</head>
<body>
<div class="container">
<h1>
<span id="audioIndicator" class="audio-indicator"></span>
Real-time Audio Session
</h1>
<div class="messages" id="messages"></div>
<div class="controls">
<input type="text" id="tokenInput" placeholder="Enter your API token">
<button id="startButton">Start Session</button>
</div>
<div id="status" class="status"></div>
</div>
<script type="module">
// Text decoder for incoming messages
const decoder = new TextDecoder()
async function createRealtimeSession(inStream, token) {
const pc = new RTCPeerConnection()
// Handle incoming audio
pc.ontrack = e => {
const audio = new Audio()
audio.srcObject = e.streams[0]
audio.play()
}
// Handle incoming text messages via data channel
pc.ondatachannel = event => {
const channel = event.channel
channel.onmessage = msg => {
appendMessage(decoder.decode(msg.data))
}
}
pc.addTrack(inStream.getTracks()[0])
const offer = await pc.createOffer()
await pc.setLocalDescription(offer)
const headers = {
Authorization: `Bearer ${token}`,
'Content-Type': 'application/sdp'
}
const opts = {
method: 'POST',
body: offer.sdp,
headers
}
const resp = await fetch('https://api.openai.com/v1/realtime', opts)
await pc.setRemoteDescription({
type: 'answer',
sdp: await resp.text()
})
return pc
}
const startButton = document.getElementById('startButton')
const tokenInput = document.getElementById('tokenInput')
const messages = document.getElementById('messages')
const status = document.getElementById('status')
const audioIndicator = document.getElementById('audioIndicator')
let peerConnection = null
let audioContext = null
let audioStream = null
function appendMessage(text) {
const div = document.createElement('div')
div.textContent = text
messages.appendChild(div)
messages.scrollTop = messages.scrollHeight
}
// Audio visualization
function setupAudioVisualization(stream) {
audioContext = new AudioContext()
const source = audioContext.createMediaStreamSource(stream)
const analyzer = audioContext.createAnalyser()
analyzer.fftSize = 256
source.connect(analyzer)
const bufferLength = analyzer.frequencyBinCount
const dataArray = new Uint8Array(bufferLength)
function updateIndicator() {
if (!audioContext) return
analyzer.getByteFrequencyData(dataArray)
const average = dataArray.reduce((a, b) => a + b) / bufferLength
audioIndicator.classList.toggle('active', average > 30)
requestAnimationFrame(updateIndicator)
}
updateIndicator()
}
async function startSession() {
try {
status.className = 'status'
status.textContent = 'Requesting microphone access...'
audioStream = await navigator.mediaDevices.getUserMedia({
audio: true,
video: false
})
setupAudioVisualization(audioStream)
status.textContent = 'Establishing connection...'
peerConnection = await createRealtimeSession(
audioStream,
tokenInput.value
)
status.className = 'status success'
status.textContent = 'Session established successfully!'
startButton.textContent = 'Stop Session'
} catch (err) {
status.className = 'status error'
status.textContent = `Error: ${err.message}`
console.error('Session error:', err)
stopSession()
}
}
function stopSession() {
if (peerConnection) {
peerConnection.close()
peerConnection = null
}
if (audioContext) {
audioContext.close()
audioContext = null
}
if (audioStream) {
audioStream.getTracks().forEach(track => track.stop())
audioStream = null
}
audioIndicator.classList.remove('active')
startButton.textContent = 'Start Session'
}
startButton.addEventListener('click', () => {
if (peerConnection) {
stopSession()
} else {
if (!tokenInput.value) {
status.className = 'status error'
status.textContent = 'Please enter an API token'
return
}
startSession()
}
})
// Cleanup on page unload
window.addEventListener('beforeunload', stopSession)
</script>
</body>
</html>
I've updated the artifact with these key changes:
- Removed video elements and made it audio-only
- Added a scrolling message display area that automatically scrolls to new messages
- Added an audio visualization indicator that pulses when sound is detected
- Added handling for incoming text messages via WebRTC data channel
- Set up proper audio output routing for the remote audio stream
- Added proper cleanup for audio context and streams
Key features:
- Messages appear in a scrolling div with monospace font for better readability
- Audio activity is shown with a pulsing indicator dot
- Incoming audio plays automatically
- All audio resources are properly cleaned up on stop/unload
- Messages persist until the session is stopped
The interface includes:
- A pulsing indicator to show audio activity
- A scrollable message area showing incoming text
- Token input and start/stop controls
- Status messages for connection state
Would you like me to modify anything about how the messages or audio are handled?
Follow-up sessions: