CARVIEW |
Select Language
HTTP/2 200
server: GitHub.com
content-type: text/html; charset=utf-8
last-modified: Mon, 22 Sep 2025 15:25:21 GMT
access-control-allow-origin: *
etag: W/"68d16a61-14c0"
expires: Fri, 10 Oct 2025 20:10:11 GMT
cache-control: max-age=600
content-encoding: gzip
x-proxy-cache: MISS
x-github-request-id: AFD2:3FE33C:AFAA:D7E1:68E965BE
accept-ranges: bytes
age: 0
date: Fri, 10 Oct 2025 20:00:11 GMT
via: 1.1 varnish
x-served-by: cache-bom-vanm7210085-BOM
x-cache: MISS
x-cache-hits: 0
x-timer: S1760126412.597568,VS0,VE324
vary: Accept-Encoding
x-fastly-request-id: a1b9440c49ee61b3f292f27b7ad00cffb6fb7a3d
content-length: 1906
SSML JSON Schema
The Pronunciation Task Force develops specifications for hypertext markup language (HTML) author control of text-to-speech (TTS) presentation.
Appendix A. SSML JSON Schema
The JSON schema defines the specific SSML functions, properties, and values recommended for implementation in this proposal.
{
"$schema": "https://json-schema.org/draft-07/schema#",
"$id": "https://ets-research.org/ia11ylab/ia/json/ssml-json-schema-w3cptf.json",
"title": "SSML as a single attribute for inclusion in HTML",
"description": "JSON structure representing each SSML element as a JSON object. The SSML properties are dervived
from https://www.w3.org/TR/speech-synthesis11/. Several elements are excluded: mark, speak, p, w and the desc attribute.
Author: M. Hakkinen - ETS",
"type": "object",
"properties": {
"say-as": {
"description": "The unique identifier for a product",
"type": "object",
"properties": {
"interpret-as": { "type": "string",
"enum": ["date","time","telephone","characters","cardinal","ordinal"]},
"format": { "type": "string" },
"detail": {"type": "string"}
}
},
"phoneme": {
"description": "The Phoneme Function",
"type": "object",
"properties": {
"ph": { "type": "string"},
"alphabet": {"type": "string", "enum": ["ipa", "x-sampa"]}}
},
"sub": {
"description": "sub function", "type": "object",
"properties": {
"alias": {"type":"string"}}
},
"voice":{"description": "voice function", "type":"object",
"properties": {
"gender": {"type":"string",
"enum": ["female","male","neutral"]},
"age": {"type":"integer"},
"variant":{"type":"string"},
"name": {"type":"string"},
"languages": {"type":"string"}
}
},
"emphasis":{
"description": "speech emphasis level",
"type":"object",
"properties": {
"level": {"type":"string",
"enum": ["none","x-weak","weak","medium","strong","x-strong"]},
"time": {"type":"string",
"pattern":"^(-?(0|[1-9]\\d*)?(\\.\\d+)?(?<=\\d)ms|s)$"}
}
},
"prosody": {
"description": "speech prosody",
"type":"object",
"properties": {
"pitch": {"type":"string",
"pattern":"^x-low|low|medium|high|x-high|default|(-?(0|[1-9]\\d*)?(\\.\\d+)?(?<=\\d)Hz)$"},
"contour": {"type":"string"},
"range": {"type":"string",
"pattern":"^x-low|low|medium|high|x-high|default|(-?(0|[1-9]\\d*)?(\\.\\d+)?(?<=\\d)Hz)$"},
"rate": {"type":"string",
"pattern":"^x-slow|slow|medium|fast|x-xfast|default|(-?(0|[1-9]\\d*)?(\\.\\d+)?(?<=\\d)%)$"},
"duration": {"type": "string",
"pattern":"^(-?(0|[1-9]\\d*)?(\\.\\d+)?(?<=\\d)ms|s)$"},
"volume": {"type":"string",
"pattern":"^silent|x-soft|soft|medium|loud|x-loud|default|(+|-?(0|[1-9]\\d*)?(\\.\\d+)?(?<=\\d)dB)$"}
}
},
"break": {
"description": "break - insert a timed pause",
"type":"object",
"properties": {
"strength": {"type":"string",
"enum": ["none","x-weak","weak","medium","strong","x-strong"]},
"time": {"type":"string",
"pattern":"^(-?(0|[1-9]\\d*)?(\\.\\d+)?(?<=\\d)ms|s)$"}
}
},
"audio": {
"description":"audio element used to insert audio file into speech stream",
"type":"object",
"properties":{
"src": {"type":"uri"},
"fetchtimeout":{"type":"string",
"pattern":"^(-?(0|[1-9]\\d*)?(\\.\\d+)?(?<=\\d)ms|s)$"},
"fetchint":{"type":"string",
"enum": ["safe","prefetch"]},
"maxage":{"type":"string"},
"maxstale":{"type":"string"},
"clipBegin":{"type": "string",
"pattern":"^(-?(0|[1-9]\\d*)?(\\.\\d+)?(?<=\\d)ms|s)$"},
"clipEnd":{"type": "string",
"pattern":"^(-?(0|[1-9]\\d*)?(\\.\\d+)?(?<=\\d)ms|s)$"},
"repeatCount":{"type":"integer"
"repeatDur":{"type": "string",
"pattern":"^(-?(0|[1-9]\\d*)?(\\.\\d+)?(?<=\\d)ms|s)$"},
"soundLevel":{"type":"string",
"pattern":"^(+|-?(0|[1-9]\\d*)?(\\.\\d+)?(?<=\\d)dB)$"},
"speed":{
"type":"string",
"pattern":"^((0|[1-9]\\d*)?(\\.\\d+)?(?<=\\d)%)$"}
}
}
}
}