CARVIEW |
Model Loader API
More details about this document
- This version:
- https://webmachinelearning.github.io/model-loader/
- Issue Tracking:
- GitHub
- Editor:
- Jonathan Bingham (Google Inc.)
- Explainer:
- explainer.md
Copyright © 2024 the Contributors to the Model Loader API Specification, published by the Web Machine Learning Community Group under the W3C Community Contributor License Agreement (CLA). A human-readable summary is available.
Abstract
This document describes an API to load a custom pre-trained machine learning model.
Status of this document
This specification was published by the Web Machine Learning Community Group. It is not a W3C Standard nor is it on the W3C Standards Track. Please note that under the W3C Community Contributor License Agreement (CLA) there is a limited opt-out and other conditions apply. Learn more about W3C Community and Business Groups.
This incubation is on pause, see discussion for the latest updates.
1. Introduction
For the introduction and use cases, please see the explainer.md.
For illustration purposes, the API and examples use the TF Lite flatbuffer format.
2. API
enum { // Tensorflow-lite flatbuffer.
MLModelFormat };
"tflite" enum { // Let the backend selects the most suitable device.
MLDevicePreference , // The backend will use GPU to do model inference. If some operator is not // supported by GPU, it will fall back to CPU.
"auto" , // The backend will use CPU to do model inference.
"gpu" };
"cpu" enum { // Let the backend selects the most suitable behavior.
MLPowerPreference , // Prioritizes execution speed over power consumption.
"auto" , // Prioritizes power consumption over other considerations such as execution // speed.
"high-performance" , };
"low-power" dictionary { // Preferred kind of device to use.
MLContextOptions MLDevicePreference = "auto"; // Preference as related to power consumption.
devicePreference MLPowerPreference = "auto"; // Model format for the model loader API.
powerPreference MLModelFormat = "tflite"; // Number of thread to use. // "0" means the backend can determine it automatically.
modelFormat unsigned long = 0; }; [
numThreads Exposed =Window ]interface {
ML Promise <MLContext >(
createContext optional MLContextOptions = {}); };
options enum { // "Unknown" doesn’t mean "unsupported". The background can support more types // than which are explicitly listed here (e.g., TfLite has complex numbers). // We treat them as "unknown" to avoid exposing too many details of the // backends from the beginning.
MLDataType ,
"unknown" ,
"int64" ,
"uint64" ,
"float64" ,
"int32" ,
"uint32" ,
"float32" ,
"int16" ,
"uint16" ,
"float16" ,
"int8" ,
"uint8" , };
"bool" dictionary {
MLTensor required ArrayBufferView ;
data required sequence <unsigned long >; };
dimensions dictionary {
MLTensorInfo required DOMString ;
name required MLDataType ;
type required sequence <unsigned long >; }; [
dimensions SecureContext ,Exposed =Window ]interface {
MLModel Promise <record <DOMString ,MLTensor >>(
compute record <DOMString ,MLTensor >);
inputs sequence <MLTensorInfo >();
inputs sequence <MLTensorInfo >(); }; [
outputs Exposed =Window ]interface {
MLModelLoader (
constructor MLContext );
context Promise <MLModel >(
load ArrayBuffer ); };
modelBuffer
3. Examples
// First, create an MLContext. This is consistent with the WebNN API. And we will // add two new fields, “numThread” and "modelFormat". const context= await navigator. ml. createContext( { devicePreference: "cpu" , powerPreference: "low-power" , numThread: 0 , // the default 0 means // "decide automatically". modelFormat: "tflite" }); // Then create the model loader using the ML context. loader= new MLModelLoader( context); // In the first version, we only support loading models from ArrayBuffers. We // believe this covers most of the usage cases. Web developers can download the // model, e.g., by the fetch API. We can add new "load" functions in the future // if they are really needed. const modelUrl= 'https://path/to/model/file' ; const modelBuffer= await fetch( modelUrl) . then( response=> response. arrayBuffer()); // Load the model. model= await loader. load( modelBuffer); // Use the `model.compute` function to get the output of the model from some // inputs. Example ways of using this function includes, // 1. When there is only one input tensor of the model, one can simply input the // tensor, without specifying the name of it (the user can still designate this // input tensor by name if they like). z= await model. compute({ data: new Float32Array([ 10 ]), dimensions: [ 1 ]) }); // 2. When there are multiple input tensors, the user has to designate the name // of the input tensors by their names. z= await model. compute({ x: { data: new Float32Array([ 10 ]), dimensions: [ 1 ] }, y: { data: new Float32Array([ 20 ]), dimensions: [ 1 ] } }); // 3. The client can also specify the output tensor. This is consistent with the // WebNN API and can be useful, e.g., when the output tensor is a GPU buffer. At // this time, the function will return an empty promise. The dimension of the // output tensor specified must match the dimensions of the output tensor of the // model. z_buffer= ml. tensor({ data: new Float64Array( 1 ), dimensions: [ 1 ] }); await model. compute({ data: new Float32Array([ 10 ]), dimensions: [ 1 ] }, z_buffer); // For the output tensor(s), // Similar to the input arguments, if there is only one output tensor, the // `compute` function returns a tensor in case 1 and 2, and there is no need to // specify the name of the output tensor in case 3. But if there are multiple // output tensors, the output in case 1 and 2 will be a map from tensor name to // tensors, and in case 3, the output argument must be a map from tensor name to // tensors too. // For case 1 and 2, where the actual output data locate will depend on the // context: if it is CPU context, the output tensor’s buffer will be RAM buffer(s) // and if the context is GPU context, the output tensor’s buffer will be GPU // buffer(s).
Conformance
Document conventions
Conformance requirements are expressed with a combination of descriptive assertions and RFC 2119 terminology. The key words “MUST”, “MUST NOT”, “REQUIRED”, “SHALL”, “SHALL NOT”, “SHOULD”, “SHOULD NOT”, “RECOMMENDED”, “MAY”, and “OPTIONAL” in the normative parts of this document are to be interpreted as described in RFC 2119. However, for readability, these words do not appear in all uppercase letters in this specification.
All of the text of this specification is normative except sections explicitly marked as non-normative, examples, and notes. [RFC2119]
Examples in this specification are introduced with the words “for example”
or are set apart from the normative text
with class="example"
,
like this:
Informative notes begin with the word “Note”
and are set apart from the normative text
with class="note"
,
like this:
Note, this is an informative note.
Index
Terms defined by this specification
-
"auto"
- enum-value for MLDevicePreference, in § 2
- enum-value for MLPowerPreference, in § 2
- "bool", in § 2
- compute(inputs), in § 2
- constructor(context), in § 2
- "cpu", in § 2
- createContext(), in § 2
- createContext(options), in § 2
- data, in § 2
- devicePreference, in § 2
-
dimensions
- dict-member for MLTensor, in § 2
- dict-member for MLTensorInfo, in § 2
- "float16", in § 2
- "float32", in § 2
- "float64", in § 2
- "gpu", in § 2
- "high-performance", in § 2
- inputs(), in § 2
- "int16", in § 2
- "int32", in § 2
- "int64", in § 2
- "int8", in § 2
- load(modelBuffer), in § 2
- "low-power", in § 2
- ML, in § 2
- MLContextOptions, in § 2
- MLDataType, in § 2
- MLDevicePreference, in § 2
- MLModel, in § 2
- MLModelFormat, in § 2
- MLModelLoader, in § 2
- MLModelLoader(context), in § 2
- MLPowerPreference, in § 2
- MLTensor, in § 2
- MLTensorInfo, in § 2
- modelFormat, in § 2
- name, in § 2
- numThreads, in § 2
- outputs(), in § 2
- powerPreference, in § 2
- "tflite", in § 2
- type, in § 2
- "uint16", in § 2
- "uint32", in § 2
- "uint64", in § 2
- "uint8", in § 2
- "unknown", in § 2
Terms defined by reference
-
[WEBIDL] defines the following terms:
- ArrayBuffer
- ArrayBufferView
- DOMString
- Exposed
- Promise
- SecureContext
- record
- sequence
- unsigned long
-
[WEBNN] defines the following terms:
- MLContext
References
Normative References
- [RFC2119]
- S. Bradner. Key words for use in RFCs to Indicate Requirement Levels. March 1997. Best Current Practice. URL: https://datatracker.ietf.org/doc/html/rfc2119
- [WEBIDL]
- Edgar Chen; Timothy Gu. Web IDL Standard. Living Standard. URL: https://webidl.spec.whatwg.org/
- [WEBNN]
- Ningxin Hu; Chai Chaoweeraprasit. Web Neural Network API. URL: https://webmachinelearning.github.io/webnn/
IDL Index
enum { // Tensorflow-lite flatbuffer.
MLModelFormat };
"tflite" enum { // Let the backend selects the most suitable device.
MLDevicePreference , // The backend will use GPU to do model inference. If some operator is not // supported by GPU, it will fall back to CPU.
"auto" , // The backend will use CPU to do model inference.
"gpu" };
"cpu" enum { // Let the backend selects the most suitable behavior.
MLPowerPreference , // Prioritizes execution speed over power consumption.
"auto" , // Prioritizes power consumption over other considerations such as execution // speed.
"high-performance" , };
"low-power" dictionary { // Preferred kind of device to use.
MLContextOptions MLDevicePreference = "auto"; // Preference as related to power consumption.
devicePreference MLPowerPreference = "auto"; // Model format for the model loader API.
powerPreference MLModelFormat = "tflite"; // Number of thread to use. // "0" means the backend can determine it automatically.
modelFormat unsigned long = 0; }; [
numThreads Exposed =Window ]interface {
ML Promise <MLContext >(
createContext optional MLContextOptions = {}); };
options enum { // "Unknown" doesn’t mean "unsupported". The background can support more types // than which are explicitly listed here (e.g., TfLite has complex numbers). // We treat them as "unknown" to avoid exposing too many details of the // backends from the beginning.
MLDataType ,
"unknown" ,
"int64" ,
"uint64" ,
"float64" ,
"int32" ,
"uint32" ,
"float32" ,
"int16" ,
"uint16" ,
"float16" ,
"int8" ,
"uint8" , };
"bool" dictionary {
MLTensor required ArrayBufferView ;
data required sequence <unsigned long >; };
dimensions dictionary {
MLTensorInfo required DOMString ;
name required MLDataType ;
type required sequence <unsigned long >; }; [
dimensions SecureContext ,Exposed =Window ]interface {
MLModel Promise <record <DOMString ,MLTensor >>(
compute record <DOMString ,MLTensor >);
inputs sequence <MLTensorInfo >();
inputs sequence <MLTensorInfo >(); }; [
outputs Exposed =Window ]interface {
MLModelLoader (
constructor MLContext );
context Promise <MLModel >(
load ArrayBuffer ); };
modelBuffer