HTTP/2 200
content-type: application/octet-stream
x-guploader-uploadid: ABgVH89tyd8XFXziDN2QAeLc264NrInmYnQ8Txm3mkhfYg43ciKn4UQDqFHBwFYr4t86h3Kpz7uUIKM
expires: Fri, 25 Jul 2025 02:09:15 GMT
date: Fri, 25 Jul 2025 01:09:15 GMT
cache-control: public, max-age=3600
last-modified: Sat, 12 Feb 2022 02:16:43 GMT
etag: "499e25f57da3b2880da9ea220c2af2de"
x-goog-generation: 1644632203337222
x-goog-metageneration: 1
x-goog-stored-content-encoding: identity
x-goog-stored-content-length: 31343
x-goog-hash: crc32c=vdkd5g==
x-goog-hash: md5=SZ4l9X2jsogNqeoiDCry3g==
x-goog-storage-class: MULTI_REGIONAL
accept-ranges: bytes
content-length: 31343
server: UploadServer
alt-svc: h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "Tce3stUlHN0L"
},
"source": [
"##### Copyright 2021 The TensorFlow Authors."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"cellView": "form",
"execution": {
"iopub.execute_input": "2021-07-30T12:26:32.358174Z",
"iopub.status.busy": "2021-07-30T12:26:32.357541Z",
"iopub.status.idle": "2021-07-30T12:26:32.360847Z",
"shell.execute_reply": "2021-07-30T12:26:32.360282Z"
},
"id": "tuOe1ymfHZPu"
},
"outputs": [],
"source": [
"#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
"# you may not use this file except in compliance with the License.\n",
"# You may obtain a copy of the License at\n",
"#\n",
"# https://www.apache.org/licenses/LICENSE-2.0\n",
"#\n",
"# Unless required by applicable law or agreed to in writing, software\n",
"# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
"# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
"# See the License for the specific language governing permissions and\n",
"# limitations under the License."
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "qFdPvlXBOdUN"
},
"source": [
"# Apache ORC Reader"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "MfBg1C5NB3X0"
},
"source": [
"
"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "xHxb-dlhMIzW"
},
"source": [
"## Overview\n",
"\n",
"Apache ORC is a popular columnar storage format. tensorflow-io package provides a default implementation of reading [Apache ORC](https://orc.apache.org/) files."
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "MUXex9ctTuDB"
},
"source": [
"## Setup"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "1Eh-iCRVBm0p"
},
"source": [
"Install required packages, and restart runtime\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"execution": {
"iopub.execute_input": "2021-07-30T12:26:32.369396Z",
"iopub.status.busy": "2021-07-30T12:26:32.368790Z",
"iopub.status.idle": "2021-07-30T12:26:35.338226Z",
"shell.execute_reply": "2021-07-30T12:26:35.338655Z"
},
"id": "g7cxbf1-skn6"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting tensorflow-io\r\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
" Using cached tensorflow_io-0.19.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (22.7 MB)\r\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: tensorflow<2.6.0,>=2.5.0 in /tmpfs/src/tf_docs_env/lib/python3.7/site-packages (from tensorflow-io) (2.5.0)\r\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting tensorflow-io-gcs-filesystem==0.19.1\r\n",
" Using cached tensorflow_io_gcs_filesystem-0.19.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (2.3 MB)\r\n",
"Requirement already satisfied: opt-einsum~=3.3.0 in /tmpfs/src/tf_docs_env/lib/python3.7/site-packages (from tensorflow<2.6.0,>=2.5.0->tensorflow-io) (3.3.0)\r\n",
"Requirement already satisfied: wrapt~=1.12.1 in /tmpfs/src/tf_docs_env/lib/python3.7/site-packages (from tensorflow<2.6.0,>=2.5.0->tensorflow-io) (1.12.1)\r\n",
"Requirement already satisfied: h5py~=3.1.0 in /tmpfs/src/tf_docs_env/lib/python3.7/site-packages (from tensorflow<2.6.0,>=2.5.0->tensorflow-io) (3.1.0)\r\n",
"Requirement already satisfied: typing-extensions~=3.7.4 in /tmpfs/src/tf_docs_env/lib/python3.7/site-packages (from tensorflow<2.6.0,>=2.5.0->tensorflow-io) (3.7.4.3)\r\n",
"Requirement already satisfied: keras-nightly~=2.5.0.dev in /tmpfs/src/tf_docs_env/lib/python3.7/site-packages (from tensorflow<2.6.0,>=2.5.0->tensorflow-io) (2.5.0.dev2021032900)\r\n",
"Requirement already satisfied: google-pasta~=0.2 in /tmpfs/src/tf_docs_env/lib/python3.7/site-packages (from tensorflow<2.6.0,>=2.5.0->tensorflow-io) (0.2.0)\r\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: wheel~=0.35 in /tmpfs/src/tf_docs_env/lib/python3.7/site-packages (from tensorflow<2.6.0,>=2.5.0->tensorflow-io) (0.36.2)\r\n",
"Requirement already satisfied: absl-py~=0.10 in /home/kbuilder/.local/lib/python3.7/site-packages (from tensorflow<2.6.0,>=2.5.0->tensorflow-io) (0.12.0)\r\n",
"Requirement already satisfied: keras-preprocessing~=1.1.2 in /tmpfs/src/tf_docs_env/lib/python3.7/site-packages (from tensorflow<2.6.0,>=2.5.0->tensorflow-io) (1.1.2)\r\n",
"Requirement already satisfied: flatbuffers~=1.12.0 in /tmpfs/src/tf_docs_env/lib/python3.7/site-packages (from tensorflow<2.6.0,>=2.5.0->tensorflow-io) (1.12)\r\n",
"Requirement already satisfied: astunparse~=1.6.3 in /tmpfs/src/tf_docs_env/lib/python3.7/site-packages (from tensorflow<2.6.0,>=2.5.0->tensorflow-io) (1.6.3)\r\n",
"Requirement already satisfied: grpcio~=1.34.0 in /tmpfs/src/tf_docs_env/lib/python3.7/site-packages (from tensorflow<2.6.0,>=2.5.0->tensorflow-io) (1.34.1)\r\n",
"Requirement already satisfied: numpy~=1.19.2 in /tmpfs/src/tf_docs_env/lib/python3.7/site-packages (from tensorflow<2.6.0,>=2.5.0->tensorflow-io) (1.19.5)\r\n",
"Requirement already satisfied: termcolor~=1.1.0 in /home/kbuilder/.local/lib/python3.7/site-packages (from tensorflow<2.6.0,>=2.5.0->tensorflow-io) (1.1.0)\r\n",
"Requirement already satisfied: tensorflow-estimator<2.6.0,>=2.5.0rc0 in /tmpfs/src/tf_docs_env/lib/python3.7/site-packages (from tensorflow<2.6.0,>=2.5.0->tensorflow-io) (2.5.0)\r\n",
"Requirement already satisfied: gast==0.4.0 in /tmpfs/src/tf_docs_env/lib/python3.7/site-packages (from tensorflow<2.6.0,>=2.5.0->tensorflow-io) (0.4.0)\r\n",
"Requirement already satisfied: six~=1.15.0 in /tmpfs/src/tf_docs_env/lib/python3.7/site-packages (from tensorflow<2.6.0,>=2.5.0->tensorflow-io) (1.15.0)\r\n",
"Requirement already satisfied: tensorboard~=2.5 in /tmpfs/src/tf_docs_env/lib/python3.7/site-packages (from tensorflow<2.6.0,>=2.5.0->tensorflow-io) (2.5.0)\r\n",
"Requirement already satisfied: protobuf>=3.9.2 in /home/kbuilder/.local/lib/python3.7/site-packages (from tensorflow<2.6.0,>=2.5.0->tensorflow-io) (3.17.3)\r\n",
"Requirement already satisfied: cached-property in /tmpfs/src/tf_docs_env/lib/python3.7/site-packages (from h5py~=3.1.0->tensorflow<2.6.0,>=2.5.0->tensorflow-io) (1.5.2)\r\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: markdown>=2.6.8 in /tmpfs/src/tf_docs_env/lib/python3.7/site-packages (from tensorboard~=2.5->tensorflow<2.6.0,>=2.5.0->tensorflow-io) (3.3.4)\r\n",
"Requirement already satisfied: werkzeug>=0.11.15 in /tmpfs/src/tf_docs_env/lib/python3.7/site-packages (from tensorboard~=2.5->tensorflow<2.6.0,>=2.5.0->tensorflow-io) (2.0.1)\r\n",
"Requirement already satisfied: google-auth<2,>=1.6.3 in /tmpfs/src/tf_docs_env/lib/python3.7/site-packages (from tensorboard~=2.5->tensorflow<2.6.0,>=2.5.0->tensorflow-io) (1.34.0)\r\n",
"Requirement already satisfied: tensorboard-data-server<0.7.0,>=0.6.0 in /tmpfs/src/tf_docs_env/lib/python3.7/site-packages (from tensorboard~=2.5->tensorflow<2.6.0,>=2.5.0->tensorflow-io) (0.6.1)\r\n",
"Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in /tmpfs/src/tf_docs_env/lib/python3.7/site-packages (from tensorboard~=2.5->tensorflow<2.6.0,>=2.5.0->tensorflow-io) (0.4.5)\r\n",
"Requirement already satisfied: setuptools>=41.0.0 in /tmpfs/src/tf_docs_env/lib/python3.7/site-packages (from tensorboard~=2.5->tensorflow<2.6.0,>=2.5.0->tensorflow-io) (57.1.0)\r\n",
"Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in /tmpfs/src/tf_docs_env/lib/python3.7/site-packages (from tensorboard~=2.5->tensorflow<2.6.0,>=2.5.0->tensorflow-io) (1.8.0)\r\n",
"Requirement already satisfied: requests<3,>=2.21.0 in /home/kbuilder/.local/lib/python3.7/site-packages (from tensorboard~=2.5->tensorflow<2.6.0,>=2.5.0->tensorflow-io) (2.26.0)\r\n",
"Requirement already satisfied: cachetools<5.0,>=2.0.0 in /home/kbuilder/.local/lib/python3.7/site-packages (from google-auth<2,>=1.6.3->tensorboard~=2.5->tensorflow<2.6.0,>=2.5.0->tensorflow-io) (4.2.2)\r\n",
"Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/lib/python3/dist-packages (from google-auth<2,>=1.6.3->tensorboard~=2.5->tensorflow<2.6.0,>=2.5.0->tensorflow-io) (0.2.1)\r\n",
"Requirement already satisfied: rsa<5,>=3.1.4 in /tmpfs/src/tf_docs_env/lib/python3.7/site-packages (from google-auth<2,>=1.6.3->tensorboard~=2.5->tensorflow<2.6.0,>=2.5.0->tensorflow-io) (4.7.2)\r\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: requests-oauthlib>=0.7.0 in /tmpfs/src/tf_docs_env/lib/python3.7/site-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard~=2.5->tensorflow<2.6.0,>=2.5.0->tensorflow-io) (1.3.0)\r\n",
"Requirement already satisfied: importlib-metadata in /home/kbuilder/.local/lib/python3.7/site-packages (from markdown>=2.6.8->tensorboard~=2.5->tensorflow<2.6.0,>=2.5.0->tensorflow-io) (3.10.1)\r\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests<3,>=2.21.0->tensorboard~=2.5->tensorflow<2.6.0,>=2.5.0->tensorflow-io) (2018.1.18)\r\n",
"Requirement already satisfied: charset-normalizer~=2.0.0 in /home/kbuilder/.local/lib/python3.7/site-packages (from requests<3,>=2.21.0->tensorboard~=2.5->tensorflow<2.6.0,>=2.5.0->tensorflow-io) (2.0.3)\r\n",
"Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/lib/python3/dist-packages (from requests<3,>=2.21.0->tensorboard~=2.5->tensorflow<2.6.0,>=2.5.0->tensorflow-io) (1.22)\r\n",
"Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests<3,>=2.21.0->tensorboard~=2.5->tensorflow<2.6.0,>=2.5.0->tensorflow-io) (2.6)\r\n",
"Requirement already satisfied: oauthlib>=3.0.0 in /tmpfs/src/tf_docs_env/lib/python3.7/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard~=2.5->tensorflow<2.6.0,>=2.5.0->tensorflow-io) (3.1.1)\r\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: pyasn1>=0.1.3 in /usr/lib/python3/dist-packages (from rsa<5,>=3.1.4->google-auth<2,>=1.6.3->tensorboard~=2.5->tensorflow<2.6.0,>=2.5.0->tensorflow-io) (0.4.2)\r\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: zipp>=0.5 in /home/kbuilder/.local/lib/python3.7/site-packages (from importlib-metadata->markdown>=2.6.8->tensorboard~=2.5->tensorflow<2.6.0,>=2.5.0->tensorflow-io) (3.5.0)\r\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Installing collected packages: tensorflow-io-gcs-filesystem, tensorflow-io\r\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Successfully installed tensorflow-io-0.19.1 tensorflow-io-gcs-filesystem-0.19.1\r\n",
"\u001b[33mWARNING: You are using pip version 21.1.3; however, version 21.2.1 is available.\r\n",
"You should consider upgrading via the '/tmpfs/src/tf_docs_env/bin/python -m pip install --upgrade pip' command.\u001b[0m\r\n"
]
}
],
"source": [
"!pip install tensorflow-io"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"execution": {
"iopub.execute_input": "2021-07-30T12:26:35.343579Z",
"iopub.status.busy": "2021-07-30T12:26:35.342936Z",
"iopub.status.idle": "2021-07-30T12:26:37.026090Z",
"shell.execute_reply": "2021-07-30T12:26:37.026501Z"
},
"id": "IqR2PQG4ZaZ0"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2021-07-30 12:26:35.624072: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n"
]
}
],
"source": [
"import tensorflow as tf\n",
"import tensorflow_io as tfio"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "EyHfC3nEzseN"
},
"source": [
"### Download a sample dataset file in ORC"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ZjEeF6Fva8UO"
},
"source": [
"The dataset you will use here is the [Iris Data Set](https://archive.ics.uci.edu/ml/datasets/iris) from UCI. The data set contains 3 classes of 50 instances each, where each class refers to a type of iris plant. It has 4 attributes: (1) sepal length, (2) sepal width, (3) petal length, (4) petal width, and the last column contains the class label."
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"execution": {
"iopub.execute_input": "2021-07-30T12:26:37.038817Z",
"iopub.status.busy": "2021-07-30T12:26:37.032009Z",
"iopub.status.idle": "2021-07-30T12:26:37.525067Z",
"shell.execute_reply": "2021-07-30T12:26:37.524493Z"
},
"id": "zaiXjZiXzrHs"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" % Total % Received % Xferd Average Speed Time Time Time Current\r\n",
" Dload Upload Total Spent Left Speed\r\n",
"\r",
" 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\r",
"100 144 100 144 0 0 1180 0 --:--:-- --:--:-- --:--:-- 1180\r\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\r",
"100 3328 100 3328 0 0 13419 0 --:--:-- --:--:-- --:--:-- 13419\r",
"100 3328 100 3328 0 0 13419 0 --:--:-- --:--:-- --:--:-- 0\r\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"-rw-rw-r-- 1 kbuilder kokoro 3328 Jul 30 12:26 iris.orc\r\n"
]
}
],
"source": [
"!curl -OL https://github.com/tensorflow/io/raw/master/tests/test_orc/iris.orc\n",
"!ls -l iris.orc"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "7DG9JTJ0-bzg"
},
"source": [
"## Create a dataset from the file"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"execution": {
"iopub.execute_input": "2021-07-30T12:26:37.793906Z",
"iopub.status.busy": "2021-07-30T12:26:37.793137Z",
"iopub.status.idle": "2021-07-30T12:26:38.147473Z",
"shell.execute_reply": "2021-07-30T12:26:38.146862Z"
},
"id": "ppFAjXAYsj-z"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2021-07-30 12:26:37.779732: I tensorflow_io/core/kernels/cpu_check.cc:128] Your CPU supports instructions that this TensorFlow IO binary was not compiled to use: AVX2 AVX512F FMA\n",
"2021-07-30 12:26:37.887808: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1\n",
"2021-07-30 12:26:37.979733: E tensorflow/stream_executor/cuda/cuda_driver.cc:328] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected\n",
"2021-07-30 12:26:37.979781: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (kokoro-gcp-ubuntu-prod-1874323723): /proc/driver/nvidia/version does not exist\n",
"2021-07-30 12:26:37.980766: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F FMA\n",
"To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
"2021-07-30 12:26:37.984832: I tensorflow_io/core/kernels/orc/orc_kernels.cc:49] ORC file schema:struct
\n"
]
}
],
"source": [
"dataset = tfio.IODataset.from_orc(\"iris.orc\", capacity=15).batch(1)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "4xPr3f4LVdeN"
},
"source": [
"Examine the dataset:"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"execution": {
"iopub.execute_input": "2021-07-30T12:26:38.153526Z",
"iopub.status.busy": "2021-07-30T12:26:38.152683Z",
"iopub.status.idle": "2021-07-30T12:26:38.218262Z",
"shell.execute_reply": "2021-07-30T12:26:38.218679Z"
},
"id": "9B1QUKG70Lzs"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(, , , , )\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"2021-07-30 12:26:38.167628: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)\n",
"2021-07-30 12:26:38.168103: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequency: 2000170000 Hz\n"
]
}
],
"source": [
"for item in dataset.take(1):\n",
" print(item)\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "03qncHJPVNK3"
},
"source": [
"Let's walk through an end-to-end example of tf.keras model training with ORC dataset based on iris dataset."
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "tDkpKRMVcPfb"
},
"source": [
"### Data preprocessing"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "nDgkfWFRVjKz"
},
"source": [
"Configure which columns are features, and which column is label:"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"execution": {
"iopub.execute_input": "2021-07-30T12:26:38.224868Z",
"iopub.status.busy": "2021-07-30T12:26:38.224188Z",
"iopub.status.idle": "2021-07-30T12:26:38.307753Z",
"shell.execute_reply": "2021-07-30T12:26:38.307155Z"
},
"id": "R1OYAybz07dr"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2021-07-30 12:26:38.222712: I tensorflow_io/core/kernels/orc/orc_kernels.cc:49] ORC file schema:struct\n",
"2021-07-30 12:26:38.286470: I tensorflow_io/core/kernels/orc/orc_kernels.cc:49] ORC file schema:struct\n"
]
}
],
"source": [
"feature_cols = [\"sepal_length\", \"sepal_width\", \"petal_length\", \"petal_width\"]\n",
"label_cols = [\"species\"]\n",
"\n",
"# select feature columns\n",
"feature_dataset = tfio.IODataset.from_orc(\"iris.orc\", columns=feature_cols)\n",
"# select label columns\n",
"label_dataset = tfio.IODataset.from_orc(\"iris.orc\", columns=label_cols)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "GSYMP48vVvV0"
},
"source": [
"A util function to map species to float numbers for model training:"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"execution": {
"iopub.execute_input": "2021-07-30T12:26:38.313426Z",
"iopub.status.busy": "2021-07-30T12:26:38.312803Z",
"iopub.status.idle": "2021-07-30T12:26:38.315434Z",
"shell.execute_reply": "2021-07-30T12:26:38.315833Z"
},
"id": "TQvuE7OgVs1q"
},
"outputs": [],
"source": [
"vocab_init = tf.lookup.KeyValueTensorInitializer(\n",
" keys=tf.constant([\"virginica\", \"versicolor\", \"setosa\"]),\n",
" values=tf.constant([0, 1, 2], dtype=tf.int64))\n",
"vocab_table = tf.lookup.StaticVocabularyTable(\n",
" vocab_init,\n",
" num_oov_buckets=4)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"execution": {
"iopub.execute_input": "2021-07-30T12:26:38.325313Z",
"iopub.status.busy": "2021-07-30T12:26:38.322586Z",
"iopub.status.idle": "2021-07-30T12:26:38.462328Z",
"shell.execute_reply": "2021-07-30T12:26:38.461757Z"
},
"id": "lpf0w41iWAZ4"
},
"outputs": [],
"source": [
"label_dataset = label_dataset.map(vocab_table.lookup)\n",
"dataset = tf.data.Dataset.zip((feature_dataset, label_dataset))\n",
"dataset = dataset.batch(1)\n",
"\n",
"def pack_features_vector(features, labels):\n",
" \"\"\"Pack the features into a single array.\"\"\"\n",
" features = tf.stack(list(features), axis=1)\n",
" return features, labels\n",
"\n",
"dataset = dataset.map(pack_features_vector)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "R1Tyf3AodC2Y"
},
"source": [
"## Build, compile and train the model"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "oVB9Q0B-WDn4"
},
"source": [
"Finally, you are ready to build the model and train it! You will build a 3 layer keras model to predict the class of the iris plant from the dataset you just processed."
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"execution": {
"iopub.execute_input": "2021-07-30T12:26:38.474620Z",
"iopub.status.busy": "2021-07-30T12:26:38.467959Z",
"iopub.status.idle": "2021-07-30T12:26:40.183720Z",
"shell.execute_reply": "2021-07-30T12:26:40.184097Z"
},
"id": "tToy0FoOWG-9"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1/5\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\r",
" 1/Unknown - 0s 305ms/step - loss: 3.8325 - accuracy: 0.0000e+00"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\r",
" 49/Unknown - 0s 1ms/step - loss: 2.3545 - accuracy: 0.0000e+00 "
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\r",
" 102/Unknown - 0s 999us/step - loss: 1.3137 - accuracy: 0.4902 "
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\r",
"150/150 [==============================] - 0s 1ms/step - loss: 1.3479 - accuracy: 0.4800\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 2/5\n",
"\r",
" 1/150 [..............................] - ETA: 0s - loss: 1.5865 - accuracy: 0.0000e+00"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\r",
" 56/150 [==========>...................] - ETA: 0s - loss: 1.0680 - accuracy: 0.4286 "
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\r",
"111/150 [=====================>........] - ETA: 0s - loss: 0.9465 - accuracy: 0.5495"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\r",
"150/150 [==============================] - 0s 920us/step - loss: 0.8355 - accuracy: 0.6000\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 3/5\n",
"\r",
" 1/150 [..............................] - ETA: 0s - loss: 0.9062 - accuracy: 1.0000"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\r",
" 55/150 [==========>...................] - ETA: 0s - loss: 0.6422 - accuracy: 0.9091"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\r",
"108/150 [====================>.........] - ETA: 0s - loss: 0.7341 - accuracy: 0.7685"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\r",
"150/150 [==============================] - 0s 951us/step - loss: 0.6370 - accuracy: 0.7733\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 4/5\n",
"\r",
" 1/150 [..............................] - ETA: 0s - loss: 0.4476 - accuracy: 1.0000"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\r",
" 53/150 [=========>....................] - ETA: 0s - loss: 0.3792 - accuracy: 0.9434"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\r",
"106/150 [====================>.........] - ETA: 0s - loss: 0.5916 - accuracy: 0.7830"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\r",
"150/150 [==============================] - 0s 954us/step - loss: 0.5276 - accuracy: 0.7933\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 5/5\n",
"\r",
" 1/150 [..............................] - ETA: 0s - loss: 0.1860 - accuracy: 1.0000"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\r",
" 54/150 [=========>....................] - ETA: 0s - loss: 0.2919 - accuracy: 0.9259"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\r",
"108/150 [====================>.........] - ETA: 0s - loss: 0.5303 - accuracy: 0.7685"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\r",
"150/150 [==============================] - 0s 940us/step - loss: 0.4766 - accuracy: 0.7933\n"
]
},
{
"data": {
"text/plain": [
""
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = tf.keras.Sequential(\n",
" [\n",
" tf.keras.layers.Dense(\n",
" 10, activation=tf.nn.relu, input_shape=(4,)\n",
" ),\n",
" tf.keras.layers.Dense(10, activation=tf.nn.relu),\n",
" tf.keras.layers.Dense(3),\n",
" ]\n",
")\n",
"\n",
"model.compile(optimizer=\"adam\", loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=[\"accuracy\"])\n",
"model.fit(dataset, epochs=5)"
]
}
],
"metadata": {
"colab": {
"collapsed_sections": [
"Tce3stUlHN0L"
],
"name": "orc.ipynb",
"toc_visible": true
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.5"
}
},
"nbformat": 4,
"nbformat_minor": 0
}