diff --git a/docs/source/toc.yaml b/docs/source/toc.yaml
index 3140d57c0..cc57dc390 100644
--- a/docs/source/toc.yaml
+++ b/docs/source/toc.yaml
@@ -4,6 +4,20 @@ subtrees:
entries:
- file: README.md
title: Introduction
+ - file: examples/tutorials/index.md
+ title: Tutorials
+ entries:
+ - file: examples/tutorials/basic/index.md
+ title: Basic
+ - file: examples/tutorials/tf/index.md
+ title: TensorFlow
+ entries:
+ - file: examples/tutorials/tf/DLRM-Ranking-Model.ipynb
+ title: DLRM Ranking Model
+# - file: examples/tutorials/tf/TwoTower-Retrieval-Model.ipynb
+# title: TwoTower Retrieval Model
+ - file: examples/tutorials/pytorch/index.md
+ title: PyTorch
- file: guide/recommender_system_guide.rst
title: Recommender System Guide
- file: examples/index
diff --git a/examples/tutorials/README.md b/examples/tutorials/README.md
new file mode 100644
index 000000000..a65758ec0
--- /dev/null
+++ b/examples/tutorials/README.md
@@ -0,0 +1,3 @@
+# Tutorials
+
+
diff --git a/examples/tutorials/basic/README.md b/examples/tutorials/basic/README.md
new file mode 100644
index 000000000..c6d2eb525
--- /dev/null
+++ b/examples/tutorials/basic/README.md
@@ -0,0 +1 @@
+# Basic
\ No newline at end of file
diff --git a/examples/tutorials/pytorch/README.md b/examples/tutorials/pytorch/README.md
new file mode 100644
index 000000000..91d4e61bc
--- /dev/null
+++ b/examples/tutorials/pytorch/README.md
@@ -0,0 +1 @@
+# PyTorch
\ No newline at end of file
diff --git a/examples/tutorials/tf/DLRM-Ranking-Model.ipynb b/examples/tutorials/tf/DLRM-Ranking-Model.ipynb
new file mode 100644
index 000000000..40c649a90
--- /dev/null
+++ b/examples/tutorials/tf/DLRM-Ranking-Model.ipynb
@@ -0,0 +1,1077 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "a22c62bb",
+ "metadata": {},
+ "source": [
+ "# DLRM Ranking Model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "efd42658",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2023-07-05 10:26:20.573752: I tensorflow/core/platform/cpu_feature_guard.cc:183] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
+ "To enable the following instructions: SSE3 SSE4.1 SSE4.2 AVX, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+ "/usr/local/lib/python3.8/dist-packages/merlin/dtypes/mappings/torch.py:43: UserWarning: PyTorch dtype mappings did not load successfully due to an error: No module named 'torch'\n",
+ " warn(f\"PyTorch dtype mappings did not load successfully due to an error: {exc.msg}\")\n"
+ ]
+ }
+ ],
+ "source": [
+ "import os\n",
+ "\n",
+ "from merlin.datasets.entertainment import get_movielens\n",
+ "\n",
+ "input_path = os.environ.get(\"INPUT_DATA_DIR\", os.path.expanduser(\"~/merlin-framework/movielens/\"))\n",
+ "\n",
+ "get_movielens(variant=\"ml-1m\", path=input_path);"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "5351b22e",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "README ratings.dat users.dat\r\n",
+ "movies.dat train.parquet users_converted.parquet\r\n",
+ "movies_converted.parquet \u001b[0m\u001b[01;34mtransformed\u001b[0m/ valid.parquet\r\n"
+ ]
+ }
+ ],
+ "source": [
+ "ls {input_path}/ml-1m #noqa"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "c529e6e3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from merlin.core.dispatch import get_lib\n",
+ "\n",
+ "train = get_lib().read_parquet(f'{input_path}ml-1m/train.parquet')\n",
+ "valid = get_lib().read_parquet(f'{input_path}ml-1m/valid.parquet')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "9f0a2009",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " userId | \n",
+ " movieId | \n",
+ " rating | \n",
+ " timestamp | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 348678 | \n",
+ " 2043 | \n",
+ " 2976 | \n",
+ " 1 | \n",
+ " 974666164 | \n",
+ "
\n",
+ " \n",
+ " 348785 | \n",
+ " 2046 | \n",
+ " 318 | \n",
+ " 5 | \n",
+ " 974666372 | \n",
+ "
\n",
+ " \n",
+ " 447226 | \n",
+ " 2755 | \n",
+ " 2721 | \n",
+ " 5 | \n",
+ " 973192895 | \n",
+ "
\n",
+ " \n",
+ " 607075 | \n",
+ " 3685 | \n",
+ " 2804 | \n",
+ " 4 | \n",
+ " 966861784 | \n",
+ "
\n",
+ " \n",
+ " 249905 | \n",
+ " 1505 | \n",
+ " 1732 | \n",
+ " 3 | \n",
+ " 983671138 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " userId movieId rating timestamp\n",
+ "348678 2043 2976 1 974666164\n",
+ "348785 2046 318 5 974666372\n",
+ "447226 2755 2721 5 973192895\n",
+ "607075 3685 2804 4 966861784\n",
+ "249905 1505 1732 3 983671138"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "train.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "b728a155",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(,\n",
+ " )"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import nvtabular as nvt\n",
+ "from merlin.schema.tags import Tags\n",
+ "\n",
+ "train_ds = nvt.Dataset(train)\n",
+ "valid_ds = nvt.Dataset(valid)\n",
+ "\n",
+ "train_ds, valid_ds"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "id": "bebe2cf6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "categorical_features = ['userId', 'movieId'] >> nvt.ops.Categorify()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "id": "f5e97c6b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "target = ['rating'] >> nvt.ops.LambdaOp(lambda x: (x>3).astype('int')) >> nvt.ops.AddMetadata(tags=[Tags.BINARY_CLASSIFICATION, Tags.TARGET])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "id": "a2e10d81",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/svg+xml": [
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 35,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "output = categorical_features+target"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "22f033c1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "workflow = nvt.Workflow(output)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "e3b8c089",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "workflow.fit_transform(train_ds).to_parquet('train')\n",
+ "workflow.transform(valid_ds).to_parquet('valid')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "f821cf69",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "train_transformed = nvt.Dataset('train', engine='parquet')\n",
+ "valid_transformed = nvt.Dataset('valid', engine='parquet')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "49ee4a25",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " name | \n",
+ " tags | \n",
+ " dtype | \n",
+ " is_list | \n",
+ " is_ragged | \n",
+ " properties.num_buckets | \n",
+ " properties.freq_threshold | \n",
+ " properties.max_size | \n",
+ " properties.cat_path | \n",
+ " properties.embedding_sizes.cardinality | \n",
+ " properties.embedding_sizes.dimension | \n",
+ " properties.domain.min | \n",
+ " properties.domain.max | \n",
+ " properties.domain.name | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " userId | \n",
+ " (Tags.CATEGORICAL) | \n",
+ " DType(name='int64', element_type=<ElementType.... | \n",
+ " False | \n",
+ " False | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " .//categories/unique.userId.parquet | \n",
+ " 6043.0 | \n",
+ " 210.0 | \n",
+ " 0.0 | \n",
+ " 6042.0 | \n",
+ " userId | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " movieId | \n",
+ " (Tags.CATEGORICAL) | \n",
+ " DType(name='int64', element_type=<ElementType.... | \n",
+ " False | \n",
+ " False | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " .//categories/unique.movieId.parquet | \n",
+ " 3683.0 | \n",
+ " 159.0 | \n",
+ " 0.0 | \n",
+ " 3682.0 | \n",
+ " movieId | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " rating | \n",
+ " (Tags.BINARY_CLASSIFICATION, Tags.TARGET) | \n",
+ " DType(name='int64', element_type=<ElementType.... | \n",
+ " False | \n",
+ " False | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "[{'name': 'userId', 'tags': {}, 'properties': {'num_buckets': None, 'freq_threshold': 0.0, 'max_size': 0.0, 'cat_path': './/categories/unique.userId.parquet', 'embedding_sizes': {'cardinality': 6043.0, 'dimension': 210.0}, 'domain': {'min': 0, 'max': 6042, 'name': 'userId'}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'movieId', 'tags': {}, 'properties': {'num_buckets': None, 'freq_threshold': 0.0, 'max_size': 0.0, 'cat_path': './/categories/unique.movieId.parquet', 'embedding_sizes': {'cardinality': 3683.0, 'dimension': 159.0}, 'domain': {'min': 0, 'max': 3682, 'name': 'movieId'}}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'rating', 'tags': {, }, 'properties': {}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}]"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "train_transformed.schema"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "d44eb819",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " userId | \n",
+ " movieId | \n",
+ " rating | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 3143 | \n",
+ " 520 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 3171 | \n",
+ " 26 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 5936 | \n",
+ " 2190 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 988 | \n",
+ " 95 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 847 | \n",
+ " 173 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " userId movieId rating\n",
+ "0 3143 520 0\n",
+ "1 3171 26 1\n",
+ "2 5936 2190 1\n",
+ "3 988 95 1\n",
+ "4 847 173 0"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "train_transformed.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "150b4f0a",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.data_structures has been moved to tensorflow.python.trackable.data_structures. The old module will be deleted in version 2.11.\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.data_structures has been moved to tensorflow.python.trackable.data_structures. The old module will be deleted in version 2.11.\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[INFO]: sparse_operation_kit is imported\n",
+ "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11.\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.base has been moved to tensorflow.python.trackable.base. The old module will be deleted in version 2.11.\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[SOK INFO] Import /usr/local/lib/python3.8/dist-packages/merlin_sok-1.2.0-py3.8-linux-x86_64.egg/sparse_operation_kit/lib/libsok_experiment.so\n",
+ "[SOK INFO] Import /usr/local/lib/python3.8/dist-packages/merlin_sok-1.2.0-py3.8-linux-x86_64.egg/sparse_operation_kit/lib/libsok_experiment.so\n",
+ "[SOK INFO] Initialize finished, communication tool: horovod\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2023-07-05 10:26:54.777058: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:47] Overriding orig_value setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.\n",
+ "2023-07-05 10:26:54.777165: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1638] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 16255 MB memory: -> device: 0, name: Tesla V100-SXM2-32GB-LS, pci bus id: 0000:07:00.0, compute capability: 7.0\n",
+ "2023-07-05 10:26:54.780396: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:47] Overriding orig_value setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.\n",
+ "2023-07-05 10:26:54.780445: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1638] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 16255 MB memory: -> device: 1, name: Tesla V100-SXM2-32GB-LS, pci bus id: 0000:0a:00.0, compute capability: 7.0\n",
+ "2023-07-05 10:27:01.096455: I tensorflow/core/common_runtime/executor.cc:1209] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32\n",
+ "\t [[{{node Placeholder/_0}}]]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "778/782 [============================>.] - ETA: 0s - loss: 0.6216 - precision: 0.6355 - recall: 0.8855 - binary_accuracy: 0.6423 - auc: 0.6752 - regularization_loss: 0.0000e+00 - loss_batch: 0.6216"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2023-07-05 10:27:14.927580: I tensorflow/core/common_runtime/executor.cc:1209] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32\n",
+ "\t [[{{node Placeholder/_0}}]]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\r",
+ "782/782 [==============================] - 16s 9ms/step - loss: 0.6213 - precision: 0.6358 - recall: 0.8852 - binary_accuracy: 0.6427 - auc: 0.6759 - regularization_loss: 0.0000e+00 - loss_batch: 0.6211 - val_loss: 0.5438 - val_precision: 0.7231 - val_recall: 0.8400 - val_binary_accuracy: 0.7220 - val_auc: 0.7886 - val_regularization_loss: 0.0000e+00 - val_loss_batch: 0.5553\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import tensorflow\n",
+ "import merlin.models.tf as mm\n",
+ "\n",
+ "model = mm.DLRMModel(\n",
+ " train_transformed.schema,\n",
+ " embedding_dim=64,\n",
+ " bottom_block=mm.MLPBlock([128, 64]),\n",
+ " top_block=mm.MLPBlock([128, 64, 32]),\n",
+ " prediction_tasks=mm.BinaryClassificationTask('rating')\n",
+ ")\n",
+ "\n",
+ "opt = tensorflow.optimizers.legacy.Adam(learning_rate=1e-3)\n",
+ "model.compile(optimizer=opt)\n",
+ "model.fit(train_transformed, validation_data=valid_transformed, batch_size=1024, epochs=1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "5bba34be",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "workflow.remove_inputs(['rating'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "0b375c34",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from merlin.systems.dag.ensemble import Ensemble\n",
+ "from merlin.systems.dag.ops.workflow import TransformWorkflow\n",
+ "from merlin.systems.dag.ops.tensorflow import PredictTensorflow"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "686ca92f",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "WARNING:absl:Function `_wrapped_model` contains input name(s) movieId, userId with unsupported characters which will be renamed to movieid, userid in the SavedModel.\n",
+ "WARNING:absl:Found untraced functions such as model_context_layer_call_fn, model_context_layer_call_and_return_conditional_losses, prepare_list_features_layer_call_fn, prepare_list_features_layer_call_and_return_conditional_losses, output_layer_layer_call_fn while saving (showing 5 of 42). These functions will not be directly callable after loading.\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "INFO:tensorflow:Assets written to: /tmp/tmpf8xq84k4/assets\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:tensorflow:Assets written to: /tmp/tmpf8xq84k4/assets\n",
+ "WARNING:absl:Function `_wrapped_model` contains input name(s) movieId, userId with unsupported characters which will be renamed to movieid, userid in the SavedModel.\n",
+ "WARNING:absl:Found untraced functions such as model_context_layer_call_fn, model_context_layer_call_and_return_conditional_losses, prepare_list_features_layer_call_fn, prepare_list_features_layer_call_and_return_conditional_losses, output_layer_layer_call_fn while saving (showing 5 of 42). These functions will not be directly callable after loading.\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "INFO:tensorflow:Assets written to: ensemble4/1_predicttensorflowtriton/1/model.savedmodel/assets\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:tensorflow:Assets written to: ensemble4/1_predicttensorflowtriton/1/model.savedmodel/assets\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n"
+ ]
+ }
+ ],
+ "source": [
+ "from merlin.systems.dag.ensemble import Ensemble\n",
+ "\n",
+ "serving_operators = (\n",
+ " ['userId', 'movieId'] >> \n",
+ " TransformWorkflow(workflow) >>\n",
+ " PredictTensorflow(model)\n",
+ ")\n",
+ "ensemble = Ensemble(serving_operators, train_transformed.schema.remove_by_tag(Tags.TARGET))\n",
+ "\n",
+ "export_path = os.path.join(\"ensemble4\")\n",
+ "os.makedirs(export_path)\n",
+ "\n",
+ "ens_conf, node_confs = ensemble.export(export_path)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "fb793fcc",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'ensemble4'"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "export_path"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "id": "2ed9f170",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "client created.\n"
+ ]
+ }
+ ],
+ "source": [
+ "import tritonhttpclient\n",
+ "\n",
+ "try:\n",
+ " triton_client = tritonhttpclient.InferenceServerClient(url=\"localhost:8000\", verbose=True)\n",
+ " print(\"client created.\")\n",
+ "except Exception as e:\n",
+ " print(\"channel creation failed: \" + str(e))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "id": "d0879dfc",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "POST /v2/repository/index, headers None\n",
+ "\n",
+ "\n",
+ "bytearray(b'[{\"name\":\"0_transformworkflowtriton\",\"version\":\"1\",\"state\":\"READY\"},{\"name\":\"1_predicttensorflowtriton\",\"version\":\"1\",\"state\":\"READY\"},{\"name\":\"executor_model\",\"version\":\"1\",\"state\":\"READY\"}]')\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "[{'name': '0_transformworkflowtriton', 'version': '1', 'state': 'READY'},\n",
+ " {'name': '1_predicttensorflowtriton', 'version': '1', 'state': 'READY'},\n",
+ " {'name': 'executor_model', 'version': '1', 'state': 'READY'}]"
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "triton_client.get_model_repository_index()\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "id": "87cd5998",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "POST /v2/repository/models/executor_model/load, headers None\n",
+ "{}\n",
+ "\n",
+ "Loaded model 'executor_model'\n",
+ "CPU times: user 687 ms, sys: 210 ms, total: 897 ms\n",
+ "Wall time: 32 s\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%time\n",
+ "\n",
+ "triton_client.load_model(model_name=\"executor_model\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "id": "57c8072a",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " userId movieId\n",
+ "118806 768 2686\n",
+ "790460 4728 3160\n",
+ "451136 2777 11\n"
+ ]
+ }
+ ],
+ "source": [
+ "from merlin.core.dispatch import get_lib\n",
+ "df_lib = get_lib()\n",
+ "\n",
+ "valid = df_lib.read_parquet(\n",
+ " os.path.join(f'{input_path}ml-1m/valid.parquet'), columns=[\"userId\", \"movieId\"]\n",
+ ")\n",
+ "batch = valid[:3]\n",
+ "print(batch)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "id": "457f78a0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from merlin.schema import ColumnSchema, Schema\n",
+ "import merlin.dtypes as md\n",
+ "import merlin.systems.triton as merlin_triton\n",
+ "import tritonclient.grpc as grpcclient\n",
+ "\n",
+ "request_schema = Schema([\n",
+ " ColumnSchema(\"userId\", dtype=md.int64),\n",
+ " ColumnSchema(\"movieId\", dtype=md.int64),\n",
+ "])\n",
+ "\n",
+ "inputs = merlin_triton.convert_df_to_triton_input(request_schema, batch, grpcclient.InferInput)\n",
+ "\n",
+ "outputs = [\n",
+ " grpcclient.InferRequestedOutput(col)\n",
+ " for col in [\"rating/binary_classification_task\"]\n",
+ "]\n",
+ "\n",
+ "with grpcclient.InferenceServerClient(\"localhost:8001\") as client:\n",
+ " response = client.infer(\"executor_model\", inputs, request_id=\"1\", outputs=outputs)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "id": "249a6043",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[[0.7387646]\n",
+ " [0.6237726]\n",
+ " [0.7899361]] (3, 1)\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(response.as_numpy(\"rating/binary_classification_task\"), response.as_numpy(\"rating/binary_classification_task\").shape)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "id": "a3009009",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " name | \n",
+ " tags | \n",
+ " dtype | \n",
+ " is_list | \n",
+ " is_ragged | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " userId | \n",
+ " () | \n",
+ " DType(name='int64', element_type=<ElementType.... | \n",
+ " False | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " movieId | \n",
+ " () | \n",
+ " DType(name='int64', element_type=<ElementType.... | \n",
+ " False | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "[{'name': 'userId', 'tags': set(), 'properties': {}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}, {'name': 'movieId', 'tags': set(), 'properties': {}, 'dtype': DType(name='int64', element_type=, element_size=64, element_unit=None, signed=True, shape=Shape(dims=(Dimension(min=0, max=None),))), 'is_list': False, 'is_ragged': False}]"
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "workflow.input_schema"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "46798680",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2fd7f8d4",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.10"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/examples/tutorials/tf/README.md b/examples/tutorials/tf/README.md
new file mode 100644
index 000000000..b13015ce9
--- /dev/null
+++ b/examples/tutorials/tf/README.md
@@ -0,0 +1 @@
+# TensorFlow Tutorials
\ No newline at end of file