diff --git a/.nojekyll b/.nojekyll index cadad4d3..09174f77 100644 --- a/.nojekyll +++ b/.nojekyll @@ -1 +1 @@ -09c6edb9 \ No newline at end of file +9c9b93e0 \ No newline at end of file diff --git a/html/images/logo-sparklyr.png b/html/images/logo-sparklyr.png index bf7dd9ce..52fcbc75 100644 Binary files a/html/images/logo-sparklyr.png and b/html/images/logo-sparklyr.png differ diff --git a/html/images/logo-tensorflow.png b/html/images/logo-tensorflow.png index ddb17bcb..c0ecf986 100644 Binary files a/html/images/logo-tensorflow.png and b/html/images/logo-tensorflow.png differ diff --git a/html/keras.html b/html/keras.html index d4cfdbf1..5626d748 100644 --- a/html/keras.html +++ b/html/keras.html @@ -162,30 +162,30 @@

On this page

  • Installation
  • Training an Image Recognizer on MNIST Data
  • -
  • Working with keras models +
  • Working with Keras models
  • More layers
  • Preprocessing
  • Pre-trained models
  • Callbacks
  • @@ -235,8 +235,8 @@

    Deep Learning with Keras :: Cheatsheet

    Intro

    -

    Keras is a high-level neural networks API developed with a focus on enabling fast experimentation. It supports multiple back-ends, including TensorFlow, CNTK and Theano.

    -

    TensorFlow is a lower level mathematical library for building deep neural network architectures. The keras R package makes it easy to use Keras and TensorFlow in R.

    +

    Keras is a high-level neural networks API developed with a focus on enabling fast experimentation. It supports multiple back-ends, including TensorFlow, Jax and Torch.

    +

    Backends like TensorFlow are lower level mathematical libraries for building deep neural network architectures. The keras3 R package makes it easy to use Keras with any backend in R.

    1. Define: Model, Sequential model, Multi-GPU model
    2. Compile: Optimizer, Loss, Metrics
    3. @@ -245,17 +245,17 @@

      Intro

    4. Predict: Classes, Probability

    Read more at:
    -https://tensorflow.rstudio.com
    +https://keras.posit.co
    https://www.manning.com/books/deep-learning-with-r-second-edition

    Installation

    -

    The keras R package uses the Python keras library. You can install all the prerequisites directly from R: https://tensorflow.rstudio.com/install.

    +

    The keras3 R package uses the Python keras library. You can install all the prerequisites directly from R See ?keras3::install_keras for details and options.

    -
    library(keras)
    -install_keras()
    +
    library(keras3)
    +reticulate::install_python()
    +install_keras()
    -

    See ?install_keras for GPU instructions.

    -

    This installs the required libraries in an Anaconda environment or virtual environment r-tensorflow.

    +

    This installs the required libraries in virtual environment named ‘r-keras’. It will automatically detect if a GPU is available.

    Training an Image Recognizer on MNIST Data

    @@ -263,106 +263,136 @@

    # input layer: use MNIST images
     mnist <- dataset_mnist()
    -x_train <- mnist$train$x
    -y_train <- mnist$train$y 
    -x_test <- mnist$test$x
    -y_test <- mnist$test$y
    -
    -# reshape and rescale
    -x_train <- array_reshape(x_train, c(nrow(x_train), 784)) 
    -x_test <- array_reshape(x_test, c(nrow(x_test), 784)) 
    -x_train <- x_train / 255
    -x_test <- x_test / 255
    +x_train <- mnist$train$x;  y_train <- mnist$train$y
    +x_test <- mnist$test$x;  y_test <- mnist$test$y
    +
    +# reshape and rescale
    +x_train <- array_reshape(x_train, c(nrow(x_train), 784))
    +x_test <- array_reshape(x_test, c(nrow(x_test), 784))
    +x_train <- x_train / 255;  x_test <- x_test / 255
    +
    +y_train <- to_categorical(y_train, 10)
    +y_test <- to_categorical(y_test, 10)
     
    -y_train <- to_categorical(y_train, 10) 
    -y_test <- to_categorical(y_test, 10)
    -
    -# defining the model and layers
    -model <- keras_model_sequential() 
    -model %>%
    -  layer_dense(units = 256, activation = 'relu', input_shape = c(784)) %>%
    -  layer_dropout(rate = 0.4) %>% 
    -  layer_dense(units = 128, activation = 'relu') %>% 
    -  layer_dense(units = 10, activation = 'softmax')
    -  
    -# compile (define loss and optimizer)
    -model %>%
    -  compile(
    -    loss = 'categorical_crossentropy', 
    -    optimizer = optimizer_rmsprop(), 
    -    metrics = c('accuracy')
    -)
    +# defining the model and layers
    +model <-
    +  keras_model_sequential(input_shape = c(28, 28, 1))
    +model |>
    +  layer_conv_2d(filters = 32, kernel_size = c(3, 3),
    +                activation = "relu") |>
    +  layer_max_pooling_2d(pool_size = c(2, 2)) |>
    +  layer_conv_2d(filters = 64, kernel_size = c(3, 3),
    +                activation = "relu") |>
    +  layer_max_pooling_2d(pool_size = c(2, 2)) |>
    +  layer_flatten() |>
    +  layer_dropout(rate = 0.5) |>
    +  layer_dense(units = num_classes,
    +              activation = "softmax")
    +
    +# View the model summary
    +summary(model)
    +plot(model)
     
    -# train (fit)
    -model %>% fit(
    -  x_train, y_train,
    -  epochs = 30, batch_size = 128, 
    -  validation_split = 0.2
    -)
    -
    -model %>% evaluate(x_test, y_test) 
    -model %>% predict_classes(x_test)
    +# compile (define loss and optimizer) +model |> + compile( + loss = 'categorical_crossentropy', + optimizer = optimizer_rmsprop(), + metrics = c('accuracy') +) + +# train (fit) +model |> fit( + x_train, y_train, + epochs = 30, batch_size = 128, + validation_split = 0.2 +) +model |> evaluate(x_test, y_test) +model |> predict(x_test) + +# save the full model +save_model(model, "mnist-classifier.keras") + +# deploy for serving inference. +dir.create("serving-mnist-classifier") +export_savedmodel(modek, "serving-mnist-classifier/1") +rsconnect::deployTFModel("serving-mnist-classifier")

    -

    Working with keras models

    +

    Working with Keras models

    Define a Model

    - +
    +

    Functional API: keras_input() and keras_model()

    +

    Define a Functional Model with inputs and outputs.

    +
    inputs <- keras_input(<input-shape>)
    +outputs <- inputs |>
    +  layer_dense() |> layer_...
    +model <- keras_model(inputs, outputs)
    +
    +
    +

    Sequential API: keras_model_sequential()

    +

    Define a Sequential Model composed of a linear stack of layers

    +
    model <-
    +  keras_model_sequential(<input-shape>) |>
    +  layer_dense() |> layer_...
    +
    +
    +

    Subclassing API: Model()

    +

    Subclass the base Model class

    +

    Compile a Model

    - +

    compile(object, optimizer, loss, metrics = NULL): Configure a Keras model for training.

    Fit a Model

    +

    fit(object, x = NULL, y = NULL, batch_size = NULL, epochs = 10, verbose = 1, callbacks = NULL, ...): Train a Keras model for a fixed number of epochs (iterations)

    +

    Customize training:

    + +
    +
    +

    Inspect a Model

    Evaluate a Model

    Predict

    -
    -

    Other Model Operations

    +
    +

    Save/Load a Model

      -
    • summary(): Print a summary of a Keras model.

    • -
    • export_savedmodel(): Export a saved model.

    • -
    • get_layer(): Retrieves a layer based on either its name (unique) or index.

    • -
    • pop_layer(): Remove the last layer in a model.

    • -
    • save_model_hdf5(); load_model_hdf5(): Save/Load models using HDF5 files.

    • -
    • serialize_model(); unserialize_model(): Serialize a model to an R object.

    • -
    • clone_model(): Clone a model instance.

    • -
    • freeze_weights(); unfreeze_weights()

    • +
    • save_model(); load_model(): Save/Load models using the “.keras” file format.

    • +
    • save_model_weights(); load_model_weights(): Save/load model weights to/from “.h5” files.

    • +
    • save_model_config(); load_model_config(): Save/load model architecture to/from a “.json” file.

    Core Layers

      -
    • layer_input(): Input layer.

    • layer_dense(): Add a densely-connected NN layer to an output.

    • +
    • layer_einsum_dense(): Add a dense layer with arbitrary dimensionality.

    • layer_activation(): Apply an activation function to an output.

    • layer_dropout(): Applies Dropout to the input.

    • layer_reshape(): Reshapes an output to a certain shape.

    • @@ -402,90 +432,150 @@

      Pooling Layers

    • layer_global_average_pooling_1d(); layer_global_average_pooling_2d(); layer_global_average_pooling_3d(): Global average pooling.

    -
    -

    Activation Layers

    +
    +
    +

    Preprocessing

    +
    +

    Image Preprocessing

    +
    +

    Load Images

      -
    • layer_activation(object, activation): Apply an activation function to an output.

    • -
    • layer_activation_leaky_relu(): Leaky version of a rectified linear unit.

    • -
    • layer_activation_parametric_relu(): Parametric rectified linear unit.

    • -
    • layer_activation_thresholded_relu(): Thresholded rectified linear unit.

    • -
    • layer_activation_elu(): Exponential linear unit.

    • +
    • image_dataset_from_directory() Create a TF Dataset from image files in a directory.

    • +
    • image_load(), image_from_array(), image_to_array(), image_array_save(): Work with PIL Image instances

    -
    -

    Dropout Layers

    +
    +

    Transform Images

    +

    Operations that transform image tensors in deterministic ways.

    +
      +
    • op_image_crop()
    • +
    • op_image_extract_patches()
    • +
    • op_image_pad()
    • +
    • op_image_resize()
    • +
    • op_image_affine_transform()
    • +
    • op_image_map_coordinates()
    • +
    • op_image_rgb_to_grayscale()
    • +
    +

    Resize images without aspect ratio distortion.

      -
    • layer_dropout(): Applies dropout to the input.

    • -
    • layer_spatial_dropout_1d(); layer_spatial_dropout_2d(); layer_spatial_dropout_3d(): Spatial 1D to 3D version of dropout

    • +
    • image_smart_resize():
    -
    -

    Recurrent Layers

    +
    +

    Image Layers

    +

    Builtin image preprocessing layers. Note, any image operation function can also be used as a layer in a Model, or used in layer_lambda().

    +
    +
    Image Preprocessing Layers
      -
    • layer_simple_rnn(): Fully-connected RNN where the output is to be fed back to input.

    • -
    • layer_gru(): Gated recurrent unit - Cho et al.

    • -
    • layer_cudnn_gru(): Fast GRU implementation backed by CuDNN.

    • -
    • layer_lstm(): Long-Short Term Memory unit - Hochreiter 1997.

    • -
    • layer_cudnn_lstm(): Fast LSTM implementation backed by CuDNN.

    • +
    • layer_resizing()
    • +
    • layer_rescaling()
    • +
    • layer_center_crop()
    -
    -

    Locally Connected Layers

    +
    +
    Image Augmentation Layers
    +

    Preprocessing layers that randomly augment image inputs during training.

      -
    • layer_locally_connected_1d(); layer_locally_connected_2d(): Similar to convolution, but weights are not shared, i.e. different filters for each patch.
    • +
    • layer_random_crop()
    • +
    • layer_random_flip()
    • +
    • layer_random_translation()
    • +
    • layer_random_rotation()
    • +
    • layer_random_zoom()
    • +
    • layer_random_contrast()
    • +
    • layer_random_brightness()
    -
    -

    Preprocessing

    -
    -

    Sequence Preprocessing

    +
    +
    +

    Sequence Preprocesing

      -
    • pad_sequences(): Pads each sequence to the same length (length of the longest sequence).

    • -
    • skipgrams(): Generates skipgram word pairs.

    • -
    • make_sampling_table(): Generates word rank-based probabilistic sampling table.

    • +
    • timeseries_dataset_from_array(): Generate a TF Dataset of sliding windows over a timeseries provided as array.

    • +
    • audio_dataset_from_directory(): Generate a TF Dataset from audio files.

    • +
    • pad_sequences(): Pad sequences to the same length

    Text Preprocessing

      -
    • text_tokenizer(): Text tokenization utility.

    • -
    • fit_text_tokenizer(): Update tokenizer internal vocabulary.

    • -
    • save_text_tokenizer(); load_text_tokenizer(): Save a text tokenizer to an external file.

    • -
    • texts_to_sequences(); texts_to_sequences_generator(): Transforms each text in texts to sequence of integers.

    • -
    • texts_to_matrix(); sequences_to_matrix(): Convert a list of sequences into a matrix.

    • -
    • text_one_hot(): One-hot encode text to word indices.

    • -
    • text_hashing_trick(): Converts a text to a sequence of indexes in a fixed-size hashing space.

    • -
    • text_to_word_sequence(): Convert text to a sequence of words (or tokens).

    • +
    • text_dataset_from_directory(): Generate a TF Dataset from text files in a directory.

    • +
    • layer_text_vectorization(), get_vocabulary(), set_vocabulary(): Map text to integer sequences.

    • +
    +
    +
    +

    Numerical Features Preprocessing

    +
      +
    • layer_normalization(): Normalizes continuous features.

    • +
    • layer_discretization(): Buckets continuous features by ranges.

    • +
    +
    +
    +

    Categorical Features Preprocessing

    +
      +
    • layer_category_encoding(): Encode integer features.

    • +
    • layer_hashing(): Hash and bin categorical features.

    • +
    • layer_hashed_crossing(): Cross features using the “hashing trick”.

    • +
    • layer_string_lookup(): Map strings to (possibly encoded) indices.

    • +
    • layer_integer_lookup(): Map integers to (possibly encoded) indices.

    -
    -

    Image Proprocessing

    +
    +

    Tabular Data

    +

    One-stop utility for preprocessing and encoding structured data. Define a feature space from a list of table columns (features).

    +
    feature_space <- layer_feature_space(features = list(<features>))
    +

    Adapt the feature space to a dataset

    +
    adapt(feature_space, dataset)
    +

    Use the adapted feature_space preprocessing layer as a layer in a Keras Model, or in the data input pipeline with tfdatasets::dataset_map()

    +

    Available features:

      -
    • image_load(): Loads an image into PIL format.

    • -
    • flow_images_from_data(); flow_images_from_directory(): Generates batches of augmented/normalized data from images and labels, or a directory.

    • -
    • image_data_generator(): Generate minibatches of image data with real-time data augmentation.

    • -
    • fit_image_data_generator(): Fit image data generator internal statistics to some sample data.

    • -
    • generator_next(): Retrieve the next item.

    • -
    • image_to_array(); image_array_resize(); image_array_save(): 3D array representation.

    • +
    • feature_float()
    • +
    • feature_float_rescaled()
    • +
    • feature_float_normalized()
    • +
    • feature_float_discretized()
    • +
    • feature_integer_categorical()
    • +
    • feature_integer_hashed()
    • +
    • feature_string_categorical()
    • +
    • feature_string_hashed()
    • +
    • feature_cross()
    • +
    • feature_custom()

    Pre-trained models

    Keras applications are deep learning models that are made available alongside pre-trained weights. These models can be used for prediction, feature extraction, and fine-tuning.

    +

    MobileNetV3 Model, pre-trained on ImageNet

    +
      +
    • application_mobilenet_v3_large()
    • +
    • application_mobilenet_v3_small()
    • +
    +

    EfficientNetV2 Model, pre-trained on ImageNet

    +
      +
    • application_efficientnet_v2s()
    • +
    • application_efficientnet_v2m()
    • +
    • application_efficientnet_v2l()
    • +
    +

    Inception-ResNet v2 and v3 model, with weights trained on ImageNet

    +
      +
    • application_inception_resnet_v2()
    • +
    • application_inception_v3()
    • +
    +

    VGG16 and VGG19 models

    +
      +
    • application_vgg16()
    • +
    • application_vgg19()
    • +
    +

    ResNet50 model

      -
    • application_xception(); xception_preprocess_input(): Xception v1 model.

    • -
    • application_inception_v3(); inception_v3_preprocess_input(): Inception v3 model, with weights pre-trained on ImageNet.

    • -
    • application_inception_resnet_v2(); inception_resnet_v2_preprocess_input(): Inception-ResNet v2 model, with weights trained on ImageNet.

    • -
    • application_vgg16(); application_vgg19(): VGG16 and VGG19 models.

    • -
    • application_resnet50(): ResNet50 model.

    • -
    • application_mobilenet(); mobilenet_preprocess_input(); mobilenet_decode_predictions(); mobilenet_load_model_hdf5(): MobileNet model architecture.

    • +
    • application_resnet50():
    -

    ImageNet is a large database of images with labels, extensively used for deep learning.

    +

    NASNet model architecture

      -
    • imagenet_preprocess_input(); imagenet_decode_predictions(): Preprocesses a tensor encoding a batch of images for ImageNet, and decodes predictions.
    • +
    • application_nasnet_large()
    • +
    • application_nasnet_mobile()
    +

    ImageNet is a large database of images with labels, extensively used for deep learning

    +

    Preprocesses a tensor encoding a batch of images for an application, and decodes predictions from an application. - application_preprocess_inputs() - application_decode_predictions()

    Callbacks

    @@ -497,12 +587,12 @@

    Callbacks


    CC BY SA Posit Software, PBC • info@posit.coposit.co

    -

    Learn more at tensorflow.rstudio.com.

    -

    Updated: 2024-05.

    +

    Learn more at keras.posit.co.

    +

    Updated: 2024-06.

    -
    packageVersion("keras")
    +
    packageVersion("keras3")
    -
    [1] '2.15.0'
    +
    [1] '1.0.0'

    diff --git a/html/sparklyr.html b/html/sparklyr.html index 0bf91213..e3b0f7a9 100644 --- a/html/sparklyr.html +++ b/html/sparklyr.html @@ -416,9 +416,9 @@

    dplyr verbs

    Supported in Databricks Connect v2

    Translates into Spark SQL statements

    -
    copy_to(sc, mtcars) %>%
    -  mutate(trm = ifelse(am == 0, "auto", "man")) %>%
    -  group_by(trm) %>%
    +
    copy_to(sc, mtcars) |>
    +  mutate(trm = ifelse(am == 0, "auto", "man")) |>
    +  group_by(trm) |>
       summarise_all(mean)
    @@ -474,10 +474,10 @@

    Visualize

    dplyr + ggplot2

    Supported in Databricks Connect v2

    -
    copy_to(sc, mtcars) %>%
    -  group_by(cyl) %>%
    -  summarise(mpg_m = mean(mpg)) %>% # Summarize in Spark
    -  collect() %>%                    # Collect results in R
    +
    copy_to(sc, mtcars) |>
    +  group_by(cyl) |>
    +  summarise(mpg_m = mean(mpg)) |> # Summarize in Spark
    +  collect() |>                    # Collect results in R
       ggplot() +
       geom_col(aes(cyl, mpg_m))        # Create plot
    @@ -607,7 +607,7 @@

    Distributed R

    )
    -
    copy_to(sc, mtcars) %>%
    +
    copy_to(sc, mtcars) |>
       spark_apply(
         nrow, # R only function
         group_by = "am", 
    @@ -617,7 +617,7 @@ 

    Distributed R


    CC BY SA Posit Software, PBC • info@posit.coposit.co

    Learn more at spark.posit.co and therinspark.com.

    -

    Updated: 2024-05.

    +

    Updated: 2024-06.

    packageVersion("sparklyr")
    diff --git a/html/sparklyr_files/sparklyr/execute-results/html.json b/html/sparklyr_files/sparklyr/execute-results/html.json deleted file mode 100644 index 4b8e395b..00000000 --- a/html/sparklyr_files/sparklyr/execute-results/html.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "hash": "319f3fddc68fd7b995e659573b518077", - "result": { - "engine": "knitr", - "markdown": "---\ntitle: \"Data Science in Spark with Sparklyr :: Cheat Sheet\"\nformat: \n html:\n toc: true\n highlight-style: a11y-dark\neditor: visual\n---\n\n\n\n\n\n## Intro\n\nsparklyr is an R interface for **Apache Spark**, it provides a complete **dplyr** backend and the option to query directly using **Spark SQL** statement. With **sparklyr**, you can orchestrate distributed machine learning using either **Spark's MLlib** or **H2O** Sparkling Water. Starting with **version 1.044**, **RStudio Desktop**, **Server and Pro include integrated support for the sparklyr package**. You can create and manage connections to Spark clusters and local Spark instances from inside the IDE.\n\n### RStudio Integrates with sparklyr\n\nTODO Screenshots\n\nExpand to read about the sparklyr features in the RStudio IDE.\n\n#### Sparklyr features in the RStudio IDE\n\n- Open connection log\n- Disconnect\n- Open the Spark UI\n- Spark & Hive Tables\n- Preview 1K rows\n\n## Cluster Deployment\n\nIn a managed cluster, the driver node (RStudio, Spark, Hive) connects to the cluster manager (Yarn, Mesos) which connects to the worker nodes (Spark).\n\nIn a stand alone cluster the driver node (RStudio, Spark) connects directly to the worker nodes (Spark).\n\n## Data Science Toolchain with Spark + sparklyr\n\n1. Import\n - Export an R DataFrame\n\n - Read a file\n\n - Read existing Hive table\n2. Tidy/Wrangle\n - dplyr verb\n\n - Direct Spark SQL (DBI)\n\n - SDF function (Scala API)\n3. Understand\n - Transform - Transformer function\n\n - Visualize - Collect data into R for plotting\n\n - Model - Spark MLlib and H2O Extension\n4. Communicate\n - Collect data into R\n\n - Share plots, documents, and apps\n\n## Getting Started\n\n### Local Mode (no cluster required)\n\n1. Install a local version of Spark:\n\n\n\n\n\n ::: {.cell}\n \n ```{.r .cell-code}\n spark_install(\"2.0.1\")\n ```\n :::\n\n\n\n\n\n2. Open a connection:\n\n\n\n\n\n ::: {.cell}\n \n ```{.r .cell-code}\n sc <- spark_connect(master = \"local\")\n ```\n :::\n\n\n\n\n\n### On a Mesos Managed Cluster\n\n1. Install RStudio Server or Pro on one of the existing nodes\n\n2. Locate path to the cluster's Spark directory\n\n3. Open a connection\n\n\n\n\n\n ::: {.cell}\n \n ```{.r .cell-code}\n spark_connect(master = \"[mesos URL]\",\n version = \"1.6.2\", \n spark_home = [Cluster’s Spark path])\n ```\n :::\n\n\n\n\n\n### Using Livy (Experimental)\n\n1. The Livy REST application should be running on the cluster\n\n2. Connect to the cluster\n\n\n\n\n\n ::: {.cell}\n \n ```{.r .cell-code}\n sc <- spark_connect(method = \"livy\", \n master = \"http://host:port\")\n ```\n :::\n\n\n\n\n\n### On a Yarn Managed Cluster\n\n1. Install RStudio Server or RStudio Pro on one of the existing nodes, preferably an edge node\n\n2. Locate path to the cluster's Spark Home Directory, it normally is `/usr/lib/spark`\n\n3. Open a connection\n\n\n\n\n\n ::: {.cell}\n \n ```{.r .cell-code}\n spark_connect(master=\"yarn-client\", \n version = \"1.6.2\", \n spark_home = [Cluster’s Spark path])\n ```\n :::\n\n\n\n\n\n### On a Spark Standaline Cluster\n\n1. Install RStudio Server or RStudio Pro on one of the existing nodes or a server in the same LAN\n\n2. Install a local version of Spark:\n\n\n\n\n\n ::: {.cell}\n \n ```{.r .cell-code}\n spark_install(version = \"2.0.1\")\n ```\n :::\n\n\n\n\n\n3. Open a connection\n\n\n\n\n\n ::: {.cell}\n \n ```{.r .cell-code}\n spark_connect(master=\"spark:// host:port\",\n version = \"2.0.1\", \n spark_home = spark_home_dir())\n ```\n :::\n\n\n\n\n\n## Tuning Spark\n\n### Example Configuration\n\n\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\nconfig <- spark_config() \nconfig$spark.executor.cores <- 2\nconfig$spark.executor.memory <- \"4G\" \nsc <- spark_connect (master=\"yarn-client\", config = config, version = \"2.0.1\")\n```\n:::\n\n\n\n\n\n### Important Tuning Parameters (with defaults)\n\n- `spark.yarn.am.cores`\n- `spark.yarn.am.memory`: 512m\n- `spark.network.timeout`: 120s\n- `spark.executor.memory`: 1g\n- `spark.executor.cores`: 1\n- `spark.executor.instances`\n- `spark.executor.extraJavaOptions`\n- `spark.executor.heartbeatInterval`: 10s\n- `sparklyr.shell.executor-memory`\n- `sparklyr.shell.driver-memory`\n\n## Using sparklyr\n\n\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\nlibrary(sparklyr)\nlibrary(dplyr)\nlibrary(ggplot2)\nlibrary(tidyr)\nset.seed(100)\n\n#Install Spark locally\nspark_install(\"2.0.1\")\n\n# Connect to local version\nsc <- spark_connect(master = \"local\")\n\n# Copy data to Spark memory\nimport_iris <- copy_to(sc, \n iris, \n \"spark_iris\", \n overwrite = TRUE)\n\n# Partition data\npartition_iris <- sdf_partition(import_iris,\n training = 0.5, \n testing = 0.5)\n\n#Create a hive metadata for each partition\n\nsdf_register(partition_iris,\n c(\"spark_iris_training\", \"spark_iris_test\"))\n \nspark_connect(master = \"[mesos URL]\", \n version = \"1.6.2\", spark_home = [Cluster’s Spark path])\n\ntidy_iris <- tbl(sc, \"spark_iris_training\") %>% \n select(Species, Petal_Length, Petal_Width)\n\n# Spark ML Decision Tree Model\nmodel_iris <- tidy_iris %>%\n ml_decision_tree(response = \"Species\",\n features = c(\"Petal_Length\", \"Petal_Width\"))\n\n# Create reference to Spark table\ntest_iris <- tbl(sc, \"spark_iris_test\")\n\n# Bring data back into R memory for plotting\npred_iris <- sdf_predict(model_iris, test_iris) %>% \n collect\n\npred_iris %>% inner_join(data.frame(prediction = 0:2, lab = model_iris$model.parameters$labels)) %>%\n ggplot(aes(Petal_Length, Petal_Width, col = lab)) + geom_point()\n\n# Disconnect\nspark_disconnect(sc)\n```\n:::\n\n\n\n\n\n\n\n## Reactivity\n\n### Copy a Data Frame Into Spark\n\n- `sdf_copy_to(sc, x, name, memory, repartition, overwrite)`\n\n\n\n\n\n ::: {.cell}\n \n ```{.r .cell-code}\n sdf_copy_to(sc, iris, \"spark_iris\")\n ```\n :::\n\n\n\n\n\n### Import Into Spark From a File\n\nArguments that apply to all functions: `sc`, `name`, `path`, `options = list()`, `repartition = 0`, `memory = TRUE`, `overwrite = TRUE`\n\n- `spark_read_csv(header = TRUE, columns = NULL, infer_schema = TRUE, delimiter = \",\", quote = \"\\\"\", escape = \"\\\\\", charset = \"UTF-8\", null_value = NULL)`\n\n- `spark_read_json()`\n\n- `spark_read_parquet()`\n\n### Spark SQL Commands\n\n- `DBI::dbWriteTable(conn, value)`\n\n\n\n\n\n ::: {.cell}\n \n ```{.r .cell-code}\n DBI::dbWriteTable(sc, \"spark_iris\", iris)\n ```\n :::\n\n\n\n\n\n### From a Table in Hive\n\n- `tbl_cache(sc, name, force = TRUE)`: Loads the table into memory\n\n\n\n\n\n ::: {.cell}\n \n ```{.r .cell-code}\n my_var <- tbl_cache(sc, name= \"hive_iris\")\n ```\n :::\n\n\n\n\n\n- `dplyr::tbl(scr, ...)`: Creates a reference to the table without loading it into memory\n\n\n\n\n\n ::: {.cell}\n \n ```{.r .cell-code}\n my_var <- dplyr::tbl(sc, name= \"hive_iris\")\n ```\n :::\n\n\n\n\n\n## Wrangle\n\n### Spark SQL via dplyer Verbs\n\n- Translates into Spark SQL statements:\n\n\n\n\n\n ::: {.cell}\n \n ```{.r .cell-code}\n my_table <- my_var %>% \n filter(Species==\"setosa\") %>% \n sample_n(10)\n ```\n :::\n\n\n\n\n\n### Direct Spark SQL Commands\n\n- `DBI::dbGetQuery(conn, statement)`\n\n\n\n\n\n ::: {.cell}\n \n ```{.r .cell-code}\n my_table <- DBI::dbGetQuery(sc, \"SELECT * FROM iris LIMIT 10\")\n ```\n :::\n\n\n\n\n\n### Scala API via SDF Functions\n\n- `sdf_mutate(.data)`: Works like dplyr mutate function\n\n- `sdf_partition(x, ..., weights = NULL, seed = sample (.Machine$integer.max, 1))`\n\n\n\n\n\n ::: {.cell}\n \n ```{.r .cell-code}\n sdf_partition(x, training = 0.5, test = 0.5) sdf_register(x, name = NULL)\n ```\n :::\n\n\n\n\n\n- `sdf_register(x, name = NULL)`: Gives a Spark DataFrame a table name\n\n- `sdf_sample(x, fraction = 1, replacement = TRUE, seed = NULL)`\n\n- `sdf_sort(x, columns)`: Sorts by \\>=1 columns in ascending order\n\n- `sdf_with_unique_id(x, id = \"id\")`\n\n- `sdf_predict(object, newdata)`: Spark DataFrame with predicted values\n\n### ML Transformers\n\nExample:\n\n\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\nft_binarizer(my_table,\n input.col=\"Petal_Length\", \n output.col=\"petal_large\", \n threshold=1.2)\n```\n:::\n\n\n\n\n\nArguments that apply to all functions: `x`, `input.col = NULL`, `output.col = NULL`\n\n- `ft_binarizer(threshold = 0.5)`: Assigned values based on threshold\n\n- `ft_bucketizer(splits)`: Numeric column to discretized column\n\n- `ft_discrete_cosine_transform(inverse = FALSE)`: Time domain to frequency domain\n\n- `ft_elementwise_product(scaling.col)`: Element-wise product between 2 cols\n\n- `ft_index_to_string()`: Index labels back to label as strings\n\n- `ft_one_hot_encoder()`: Continuous to binary vectors\n\n- `ft_quantile_discretizer(n.buckets=5L)`: Continuous to binned categorical values\n\n- `ft_sql_transformer(sql)`\n\n- `ft_string_indexer(params = NULL)`: Column of labels into a column of label indices\n\n- `ft_vector_assembler()`: Combine vectors into single row-vector\n\n## Visulize & Communicate\n\n### Download Data to R Memory\n\nExample:\n\n\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\nr_table <- collect(my_table) \nplot(Petal_Width~Petal_Length, \n data=r_table)\n```\n:::\n\n\n\n\n\n- `dplyr::collect(x)`: Download a Spark DataFrame to an R DataFrame\n\n- `sdf_read_column(x, column)`: Returns contents of a single column to R\n\n### Save From Spark to File System\n\nArguments that apply to all functions: `x`, `path`\n\n- `spark_read_csv( header = TRUE, delimiter = \",\", quote = \"\\\"\", escape = \"\\\\\", charset = \"UTF-8\", null_value = NULL)`\n\n- `spark_read_json(mode = NULL)`\n\n- `spark_read_parquet(mode = NULL)`\n\n## Reading & Writing from Apache Spark\n\nWrite to Spark, from R with `sdf_copy_to()`, `dplyr::copy_to()`, or `DBI::sbWriteTable()`.\n\nRead from Spark, to R with `sdf_collect()`, `dplyr::collect()`, `sdf_read_column`.\n\n------------------------------------------------------------------------\n\nWrite to Spark, from Hive with `tbl_cache()` or `dplyr::tbl()`.\n\n------------------------------------------------------------------------\n\nWrite to Spark from the file system with `spark_read_()`.\n\nRead from Spark to the file system with `spark_write_()`.\n\n## Extensions\n\nCreate an R package that calls the full Spark API & provide interfaces to Park packages.\n\n### Core Types\n\n- `spark_connection()`: Connection between R and the Spark shell process\n\n- `spark_jobj()`: Instance og a remote Spark object\n\n- `spark_dataframe()`: Instance of a remote Spark DataFrame object\n\n### Call Spark From R\n\n- `invoke()`: Call a method on a Java object\n\n- `invoke_new()`: Create a new object by invoking a constructor\n\n- `invoke_static()`: Call a static method on an object\n\n### Machine Learning Extensions\n\n- `ml_create_dummy_variables()`\n\n- `ml_prepare_dataframe()`\n\n- `ml_prepare_response_features_intercept()`\n\n- `ml_options()`\n\n- `ml_model()`\n\n## Model (MLlib)\n\nExample:\n\n\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\nml_decision_tree(my_table, \n response = \"Species\", features = c(\"Petal_Length\" , \"Petal_Width\"))\n```\n:::\n\n\n\n\n\n- `ml_als_factorization(x, user.column = \"user\", rating.column = \"rating\", item.column = \"item\", rank = 10L, regularization.parameter = 0.1, iter.max = 10L, ml.options = ml_options())`\n\n- `ml_decision_tree(x, response, features, max.bins = 32L, max.depth = 5L, type = c(\"auto\", \"regression\", \"classification\"), ml.options = ml_options())`: Same options for: ml_gradient_boosted_trees\n\n- `ml_generalized_linear_regression(x, response, features, intercept = TRUE, family = gaussian(link = \"identity\"), iter.max = 100L, ml.options = ml_options())`\n\n- `ml_kmeans(x, centers, iter.max = 100, features = dplyr::tbl_vars(x), compute.cost = TRUE, tolerance = 1e-04, ml.options = ml_options())`\n\n- `ml_lda(x, features = dplyr::tbl_vars(x), k = length(features), alpha = (50/k) + 1, beta = 0.1 + 1, ml.options = ml_options())`\n\n- `ml_linear_regression(x, response, features, intercept = TRUE, alpha = 0, lambda = 0, iter.max = 100L, ml.options = ml_options())`: Same options for: ml_logistic_regression\n\n- `ml_multilayer_perceptron(x, response, features, layers, iter.max = 100, seed = sample(.Machine$integer.max, 1), ml.options = ml_options())`\n\n- `ml_naive_bayes(x, response, features, lambda = 0, ml.options = ml_options())`\n\n- `ml_one_vs_rest(x, classifier, response, features, ml.options = ml_options())`\n\n- `ml_pca(x, features = dplyr::tbl_vars(x), ml.options = ml_options())`\n\n- `ml_random_forest(x, response, features, max.bins = 32L, max.depth = 5L, num.trees = 20L, type = c(\"auto\", \"regression\", \"classification\"), ml.options = ml_options())`\n\n- `ml_survival_regression(x, response, features, intercept = TRUE,censor = \"censor\", iter.max = 100L, ml.options = ml_options())`\n\n- `ml_binary_classification_eval(predicted_tbl_spark, label, score, metric = \"areaUnderROC\")`\n\n- `ml_classification_eval(predicted_tbl_spark, label, predicted_lbl, metric = \"f1\")`\n\n- `ml_tree_feature_importance(sc, model)`\n", - "supporting": [], - "filters": [ - "rmarkdown/pagebreak.lua" - ], - "includes": {}, - "engineDependencies": {}, - "preserve": {}, - "postProcess": true - } -} \ No newline at end of file diff --git a/index.html b/index.html index 6864a379..7bbe3e91 100644 --- a/index.html +++ b/index.html @@ -214,7 +214,7 @@

    Posit Cheatsheets

    -
    + -
    + -
    + -
    + -
    + -
    + -
    + -
    + -
    + -
    + -
    + -
    + -
    + -
    + -
    + -
    + -
    + -
    +

    Hex logo for sparklyr - Neon shooting stars of various shapes and sizes flying across a black and grey background.

    diff --git a/keras.pdf b/keras.pdf index 66f3f596..9be6280b 100644 Binary files a/keras.pdf and b/keras.pdf differ diff --git a/pngs/sparklyr.png b/pngs/sparklyr.png index 549d10f7..7dc8bcc7 100644 Binary files a/pngs/sparklyr.png and b/pngs/sparklyr.png differ diff --git a/search.json b/search.json index 3d54633a..cd0cfe5e 100644 --- a/search.json +++ b/search.json @@ -256,7 +256,7 @@ "href": "html/sparklyr.html#dplyr-verbs", "title": "Data science in Spark with sparklyr :: Cheatsheet", "section": "dplyr verbs", - "text": "dplyr verbs\nSupported in Databricks Connect v2\nTranslates into Spark SQL statements\n\ncopy_to(sc, mtcars) %>%\n mutate(trm = ifelse(am == 0, \"auto\", \"man\")) %>%\n group_by(trm) %>%\n summarise_all(mean)" + "text": "dplyr verbs\nSupported in Databricks Connect v2\nTranslates into Spark SQL statements\n\ncopy_to(sc, mtcars) |>\n mutate(trm = ifelse(am == 0, \"auto\", \"man\")) |>\n group_by(trm) |>\n summarise_all(mean)" }, { "objectID": "html/sparklyr.html#tidyr", @@ -277,7 +277,7 @@ "href": "html/sparklyr.html#dplyr-ggplot2", "title": "Data science in Spark with sparklyr :: Cheatsheet", "section": "dplyr + ggplot2", - "text": "dplyr + ggplot2\nSupported in Databricks Connect v2\n\ncopy_to(sc, mtcars) %>%\n group_by(cyl) %>%\n summarise(mpg_m = mean(mpg)) %>% # Summarize in Spark\n collect() %>% # Collect results in R\n ggplot() +\n geom_col(aes(cyl, mpg_m)) # Create plot" + "text": "dplyr + ggplot2\nSupported in Databricks Connect v2\n\ncopy_to(sc, mtcars) |>\n group_by(cyl) |>\n summarise(mpg_m = mean(mpg)) |> # Summarize in Spark\n collect() |> # Collect results in R\n ggplot() +\n geom_col(aes(cyl, mpg_m)) # Create plot" }, { "objectID": "html/sparklyr.html#regression", @@ -949,42 +949,42 @@ "href": "html/keras.html#intro", "title": "Deep Learning with Keras :: Cheatsheet", "section": "Intro", - "text": "Intro\nKeras is a high-level neural networks API developed with a focus on enabling fast experimentation. It supports multiple back-ends, including TensorFlow, CNTK and Theano.\nTensorFlow is a lower level mathematical library for building deep neural network architectures. The keras R package makes it easy to use Keras and TensorFlow in R.\n\nDefine: Model, Sequential model, Multi-GPU model\nCompile: Optimizer, Loss, Metrics\nFit: Batch size, Epochs, Validation split\nEvaluate: Evaluate, Plot\nPredict: Classes, Probability\n\nRead more at:\nhttps://tensorflow.rstudio.com\nhttps://www.manning.com/books/deep-learning-with-r-second-edition\n\nInstallation\nThe keras R package uses the Python keras library. You can install all the prerequisites directly from R: https://tensorflow.rstudio.com/install.\n\nlibrary(keras)\ninstall_keras()\n\nSee ?install_keras for GPU instructions.\nThis installs the required libraries in an Anaconda environment or virtual environment r-tensorflow.\n\n\nTraining an Image Recognizer on MNIST Data\nThe “Hello, World!” of deep learning\n\n# input layer: use MNIST images\nmnist <- dataset_mnist()\nx_train <- mnist$train$x\ny_train <- mnist$train$y \nx_test <- mnist$test$x\ny_test <- mnist$test$y\n\n# reshape and rescale\nx_train <- array_reshape(x_train, c(nrow(x_train), 784)) \nx_test <- array_reshape(x_test, c(nrow(x_test), 784)) \nx_train <- x_train / 255\nx_test <- x_test / 255\n\ny_train <- to_categorical(y_train, 10) \ny_test <- to_categorical(y_test, 10)\n\n# defining the model and layers\nmodel <- keras_model_sequential() \nmodel %>%\n layer_dense(units = 256, activation = 'relu', input_shape = c(784)) %>%\n layer_dropout(rate = 0.4) %>% \n layer_dense(units = 128, activation = 'relu') %>% \n layer_dense(units = 10, activation = 'softmax')\n \n# compile (define loss and optimizer)\nmodel %>%\n compile(\n loss = 'categorical_crossentropy', \n optimizer = optimizer_rmsprop(), \n metrics = c('accuracy')\n)\n\n# train (fit)\nmodel %>% fit(\n x_train, y_train,\n epochs = 30, batch_size = 128, \n validation_split = 0.2\n)\n\nmodel %>% evaluate(x_test, y_test) \nmodel %>% predict_classes(x_test)" + "text": "Intro\nKeras is a high-level neural networks API developed with a focus on enabling fast experimentation. It supports multiple back-ends, including TensorFlow, Jax and Torch.\nBackends like TensorFlow are lower level mathematical libraries for building deep neural network architectures. The keras3 R package makes it easy to use Keras with any backend in R.\n\nDefine: Model, Sequential model, Multi-GPU model\nCompile: Optimizer, Loss, Metrics\nFit: Batch size, Epochs, Validation split\nEvaluate: Evaluate, Plot\nPredict: Classes, Probability\n\nRead more at:\nhttps://keras.posit.co\nhttps://www.manning.com/books/deep-learning-with-r-second-edition\n\nInstallation\nThe keras3 R package uses the Python keras library. You can install all the prerequisites directly from R See ?keras3::install_keras for details and options.\n\nlibrary(keras3)\nreticulate::install_python()\ninstall_keras()\n\nThis installs the required libraries in virtual environment named ‘r-keras’. It will automatically detect if a GPU is available.\n\n\nTraining an Image Recognizer on MNIST Data\nThe “Hello, World!” of deep learning\n\n# input layer: use MNIST images\nmnist <- dataset_mnist()\nx_train <- mnist$train$x; y_train <- mnist$train$y\nx_test <- mnist$test$x; y_test <- mnist$test$y\n\n# reshape and rescale\nx_train <- array_reshape(x_train, c(nrow(x_train), 784))\nx_test <- array_reshape(x_test, c(nrow(x_test), 784))\nx_train <- x_train / 255; x_test <- x_test / 255\n\ny_train <- to_categorical(y_train, 10)\ny_test <- to_categorical(y_test, 10)\n\n# defining the model and layers\nmodel <-\n keras_model_sequential(input_shape = c(28, 28, 1))\nmodel |>\n layer_conv_2d(filters = 32, kernel_size = c(3, 3),\n activation = \"relu\") |>\n layer_max_pooling_2d(pool_size = c(2, 2)) |>\n layer_conv_2d(filters = 64, kernel_size = c(3, 3),\n activation = \"relu\") |>\n layer_max_pooling_2d(pool_size = c(2, 2)) |>\n layer_flatten() |>\n layer_dropout(rate = 0.5) |>\n layer_dense(units = num_classes,\n activation = \"softmax\")\n\n# View the model summary\nsummary(model)\nplot(model)\n\n# compile (define loss and optimizer)\nmodel |> \n compile(\n loss = 'categorical_crossentropy',\n optimizer = optimizer_rmsprop(),\n metrics = c('accuracy')\n)\n\n# train (fit)\nmodel |> fit(\n x_train, y_train,\n epochs = 30, batch_size = 128,\n validation_split = 0.2\n)\nmodel |> evaluate(x_test, y_test)\nmodel |> predict(x_test)\n\n# save the full model\nsave_model(model, \"mnist-classifier.keras\")\n\n# deploy for serving inference.\ndir.create(\"serving-mnist-classifier\")\nexport_savedmodel(modek, \"serving-mnist-classifier/1\")\nrsconnect::deployTFModel(\"serving-mnist-classifier\")" }, { "objectID": "html/keras.html#working-with-keras-models", "href": "html/keras.html#working-with-keras-models", "title": "Deep Learning with Keras :: Cheatsheet", - "section": "Working with keras models", - "text": "Working with keras models\n\nDefine a Model\n\nkeras_model(): Keras Model.\nkeras_model_sequential(): Keras Model composed of a linear stack of layers.\nmulti_gpu_model(): Replicates a model on different GPUs.\n\n\n\nCompile a Model\n\ncompile(object, optimizer, loss, metrics = NULL): Configure a Keras model for training.\n\n\n\nFit a Model\n\nfit(object, x = NULL, y = NULL, batch_size = NULL, epochs = 10, verbose = 1, callbacks = NULL, ...): Train a Keras model for a fixed number of epochs (iterations).\nfit_generator(): Fits the model on data yielded batch-by-batch by a generator.\ntrain_on_batch(); test_on_batch(): Single gradient update or model evaluation over one batch of samples.\n\n\n\nEvaluate a Model\n\nevaluate(object, x = NULL, y = NULL, batch_size = NULL): Evaluate a Keras model.\nevaluate_generator(): Evaluates the model on a data generator.\n\n\n\nPredict\n\npredict(): Generate predictions from a Keras model.\npredict_proba(); predict_classes(): Generates probability or class probability predictions for the input samples.\npredict_on_batch(): Returns predictions for a single batch of samples.\npredict_generator(): Generates predictions for the input samples from a data generator.\n\n\n\nOther Model Operations\n\nsummary(): Print a summary of a Keras model.\nexport_savedmodel(): Export a saved model.\nget_layer(): Retrieves a layer based on either its name (unique) or index.\npop_layer(): Remove the last layer in a model.\nsave_model_hdf5(); load_model_hdf5(): Save/Load models using HDF5 files.\nserialize_model(); unserialize_model(): Serialize a model to an R object.\nclone_model(): Clone a model instance.\nfreeze_weights(); unfreeze_weights()\n\n\n\nCore Layers\n\nlayer_input(): Input layer.\nlayer_dense(): Add a densely-connected NN layer to an output.\nlayer_activation(): Apply an activation function to an output.\nlayer_dropout(): Applies Dropout to the input.\nlayer_reshape(): Reshapes an output to a certain shape.\nlayer_permute(): Permute the dimensions of an input according to a given pattern.\nlayer_repeat_vector(): Repeats the input n times.\nlayer_lambda(object, f): Wraps arbitrary expression as a layer.\nlayer_activity_regularization(): Layer that applies an update to the cost function based input activity.\nlayer_masking(): Masks a sequence by using a mask value to skip timesteps.\nlayer_flatten(): Flattens an input." + "section": "Working with Keras models", + "text": "Working with Keras models\n\nDefine a Model\n\nFunctional API: keras_input() and keras_model()\nDefine a Functional Model with inputs and outputs.\ninputs <- keras_input(<input-shape>)\noutputs <- inputs |>\n layer_dense() |> layer_...\nmodel <- keras_model(inputs, outputs)\n\n\nSequential API: keras_model_sequential()\nDefine a Sequential Model composed of a linear stack of layers\nmodel <-\n keras_model_sequential(<input-shape>) |>\n layer_dense() |> layer_...\n\n\nSubclassing API: Model()\nSubclass the base Model class\n\n\n\nCompile a Model\ncompile(object, optimizer, loss, metrics = NULL): Configure a Keras model for training.\n\n\nFit a Model\nfit(object, x = NULL, y = NULL, batch_size = NULL, epochs = 10, verbose = 1, callbacks = NULL, ...): Train a Keras model for a fixed number of epochs (iterations)\nCustomize training:\n\nProvide callbacks to fit():\nDefine a custom Callback().\nCall train_on_batch() in a custom training loop.\nSubclass Model() and implement a custom train_step method.\nWrite a fully custom training loop. Update weights with model$optimizer$apply(gradients, weights)\n\n\n\nInspect a Model\n\nprint(model): Print a summary of a Keras model\nplot(model, show_shapes = FALSE, show_dtype = FALSE, show_layer_names = FALSE, ...): Plot a Keras model\n\n\n\nEvaluate a Model\n\nevaluate(object, x = NULL, y = NULL, batch_size = NULL): Evaluate a Keras model.\n\n\n\nPredict\n\npredict(): Generate predictions from a Keras model.\npredict_on_batch(): Returns predictions for a single batch of samples.\n\n\n\nSave/Load a Model\n\nsave_model(); load_model(): Save/Load models using the “.keras” file format.\nsave_model_weights(); load_model_weights(): Save/load model weights to/from “.h5” files.\nsave_model_config(); load_model_config(): Save/load model architecture to/from a “.json” file.\n\n\n\nCore Layers\n\nlayer_dense(): Add a densely-connected NN layer to an output.\nlayer_einsum_dense(): Add a dense layer with arbitrary dimensionality.\nlayer_activation(): Apply an activation function to an output.\nlayer_dropout(): Applies Dropout to the input.\nlayer_reshape(): Reshapes an output to a certain shape.\nlayer_permute(): Permute the dimensions of an input according to a given pattern.\nlayer_repeat_vector(): Repeats the input n times.\nlayer_lambda(object, f): Wraps arbitrary expression as a layer.\nlayer_activity_regularization(): Layer that applies an update to the cost function based input activity.\nlayer_masking(): Masks a sequence by using a mask value to skip timesteps.\nlayer_flatten(): Flattens an input." }, { "objectID": "html/keras.html#more-layers", "href": "html/keras.html#more-layers", "title": "Deep Learning with Keras :: Cheatsheet", "section": "More layers", - "text": "More layers\n\nConvolutional Layers\n\nlayer_conv_1d(): 1D, e.g. temporal convolution.\nlayer_conv_2d_transpose(): Transposed 2D (deconvolution).\nlayer_conv_2d() : 2D, e.g. spatial convolution over images.\nlayer_conv_3d_transpose(): Transposed 3D (deconvolution).\nlayer_conv_3d(): 3D, e.g. spatial convolution over volumes.\nlayer_conv_lstm_2d(): Convolutional LSTM.\nlayer_separable_conv_2d(): Depthwise separable 2D.\nlayer_upsampling_1d(); layer_upsampling_2d(); layer_upsampling_3d(): Upsampling layer.\nlayer_zero_padding_1d(); layer_zero_padding_2d(); layer_zero_padding_3d(): Zero-padding layer.\nlayer_cropping_1d(); layer_cropping_2d(); layer_cropping_3d(): Cropping layer.\n\n\n\nPooling Layers\n\nlayer_max_pooling_1d(); layer_max_pooling_2d(); layer_max_pooling_3d(): Maximum pooling for 1D to 3D.\nlayer_average_pooling_1d(); layer_average_pooling_2d(); layer_average_pooling_3d(): Average pooling for 1D to 3D.\nlayer_global_max_pooling_1d(); layer_global_max_pooling_2d(); layer_global_max_pooling_3d(): Global maximum pooling.\nlayer_global_average_pooling_1d(); layer_global_average_pooling_2d(); layer_global_average_pooling_3d(): Global average pooling.\n\n\n\nActivation Layers\n\nlayer_activation(object, activation): Apply an activation function to an output.\nlayer_activation_leaky_relu(): Leaky version of a rectified linear unit.\nlayer_activation_parametric_relu(): Parametric rectified linear unit.\nlayer_activation_thresholded_relu(): Thresholded rectified linear unit.\nlayer_activation_elu(): Exponential linear unit.\n\n\n\nDropout Layers\n\nlayer_dropout(): Applies dropout to the input.\nlayer_spatial_dropout_1d(); layer_spatial_dropout_2d(); layer_spatial_dropout_3d(): Spatial 1D to 3D version of dropout\n\n\n\nRecurrent Layers\n\nlayer_simple_rnn(): Fully-connected RNN where the output is to be fed back to input.\nlayer_gru(): Gated recurrent unit - Cho et al.\nlayer_cudnn_gru(): Fast GRU implementation backed by CuDNN.\nlayer_lstm(): Long-Short Term Memory unit - Hochreiter 1997.\nlayer_cudnn_lstm(): Fast LSTM implementation backed by CuDNN.\n\n\n\nLocally Connected Layers\n\nlayer_locally_connected_1d(); layer_locally_connected_2d(): Similar to convolution, but weights are not shared, i.e. different filters for each patch." + "text": "More layers\n\nConvolutional Layers\n\nlayer_conv_1d(): 1D, e.g. temporal convolution.\nlayer_conv_2d_transpose(): Transposed 2D (deconvolution).\nlayer_conv_2d() : 2D, e.g. spatial convolution over images.\nlayer_conv_3d_transpose(): Transposed 3D (deconvolution).\nlayer_conv_3d(): 3D, e.g. spatial convolution over volumes.\nlayer_conv_lstm_2d(): Convolutional LSTM.\nlayer_separable_conv_2d(): Depthwise separable 2D.\nlayer_upsampling_1d(); layer_upsampling_2d(); layer_upsampling_3d(): Upsampling layer.\nlayer_zero_padding_1d(); layer_zero_padding_2d(); layer_zero_padding_3d(): Zero-padding layer.\nlayer_cropping_1d(); layer_cropping_2d(); layer_cropping_3d(): Cropping layer.\n\n\n\nPooling Layers\n\nlayer_max_pooling_1d(); layer_max_pooling_2d(); layer_max_pooling_3d(): Maximum pooling for 1D to 3D.\nlayer_average_pooling_1d(); layer_average_pooling_2d(); layer_average_pooling_3d(): Average pooling for 1D to 3D.\nlayer_global_max_pooling_1d(); layer_global_max_pooling_2d(); layer_global_max_pooling_3d(): Global maximum pooling.\nlayer_global_average_pooling_1d(); layer_global_average_pooling_2d(); layer_global_average_pooling_3d(): Global average pooling." }, { "objectID": "html/keras.html#preprocessing", "href": "html/keras.html#preprocessing", "title": "Deep Learning with Keras :: Cheatsheet", "section": "Preprocessing", - "text": "Preprocessing\n\nSequence Preprocessing\n\npad_sequences(): Pads each sequence to the same length (length of the longest sequence).\nskipgrams(): Generates skipgram word pairs.\nmake_sampling_table(): Generates word rank-based probabilistic sampling table.\n\n\n\nText Preprocessing\n\ntext_tokenizer(): Text tokenization utility.\nfit_text_tokenizer(): Update tokenizer internal vocabulary.\nsave_text_tokenizer(); load_text_tokenizer(): Save a text tokenizer to an external file.\ntexts_to_sequences(); texts_to_sequences_generator(): Transforms each text in texts to sequence of integers.\ntexts_to_matrix(); sequences_to_matrix(): Convert a list of sequences into a matrix.\ntext_one_hot(): One-hot encode text to word indices.\ntext_hashing_trick(): Converts a text to a sequence of indexes in a fixed-size hashing space.\ntext_to_word_sequence(): Convert text to a sequence of words (or tokens).\n\n\n\nImage Proprocessing\n\nimage_load(): Loads an image into PIL format.\nflow_images_from_data(); flow_images_from_directory(): Generates batches of augmented/normalized data from images and labels, or a directory.\nimage_data_generator(): Generate minibatches of image data with real-time data augmentation.\nfit_image_data_generator(): Fit image data generator internal statistics to some sample data.\ngenerator_next(): Retrieve the next item.\nimage_to_array(); image_array_resize(); image_array_save(): 3D array representation." + "text": "Preprocessing\n\nImage Preprocessing\n\nLoad Images\n\nimage_dataset_from_directory() Create a TF Dataset from image files in a directory.\nimage_load(), image_from_array(), image_to_array(), image_array_save(): Work with PIL Image instances\n\n\n\nTransform Images\nOperations that transform image tensors in deterministic ways.\n\nop_image_crop()\nop_image_extract_patches()\nop_image_pad()\nop_image_resize()\nop_image_affine_transform()\nop_image_map_coordinates()\nop_image_rgb_to_grayscale()\n\nResize images without aspect ratio distortion.\n\nimage_smart_resize():\n\n\n\nImage Layers\nBuiltin image preprocessing layers. Note, any image operation function can also be used as a layer in a Model, or used in layer_lambda().\n\nImage Preprocessing Layers\n\nlayer_resizing()\nlayer_rescaling()\nlayer_center_crop()\n\n\n\nImage Augmentation Layers\nPreprocessing layers that randomly augment image inputs during training.\n\nlayer_random_crop()\nlayer_random_flip()\nlayer_random_translation()\nlayer_random_rotation()\nlayer_random_zoom()\nlayer_random_contrast()\nlayer_random_brightness()\n\n\n\n\n\nSequence Preprocesing\n\ntimeseries_dataset_from_array(): Generate a TF Dataset of sliding windows over a timeseries provided as array.\naudio_dataset_from_directory(): Generate a TF Dataset from audio files.\npad_sequences(): Pad sequences to the same length\n\n\n\nText Preprocessing\n\ntext_dataset_from_directory(): Generate a TF Dataset from text files in a directory.\nlayer_text_vectorization(), get_vocabulary(), set_vocabulary(): Map text to integer sequences.\n\n\n\nNumerical Features Preprocessing\n\nlayer_normalization(): Normalizes continuous features.\nlayer_discretization(): Buckets continuous features by ranges.\n\n\n\nCategorical Features Preprocessing\n\nlayer_category_encoding(): Encode integer features.\nlayer_hashing(): Hash and bin categorical features.\nlayer_hashed_crossing(): Cross features using the “hashing trick”.\nlayer_string_lookup(): Map strings to (possibly encoded) indices.\nlayer_integer_lookup(): Map integers to (possibly encoded) indices.\n\n\n\nTabular Data\nOne-stop utility for preprocessing and encoding structured data. Define a feature space from a list of table columns (features).\nfeature_space <- layer_feature_space(features = list(<features>))\nAdapt the feature space to a dataset\nadapt(feature_space, dataset)\nUse the adapted feature_space preprocessing layer as a layer in a Keras Model, or in the data input pipeline with tfdatasets::dataset_map()\nAvailable features:\n\nfeature_float()\nfeature_float_rescaled()\nfeature_float_normalized()\nfeature_float_discretized()\nfeature_integer_categorical()\nfeature_integer_hashed()\nfeature_string_categorical()\nfeature_string_hashed()\nfeature_cross()\nfeature_custom()" }, { "objectID": "html/keras.html#pre-trained-models", "href": "html/keras.html#pre-trained-models", "title": "Deep Learning with Keras :: Cheatsheet", "section": "Pre-trained models", - "text": "Pre-trained models\nKeras applications are deep learning models that are made available alongside pre-trained weights. These models can be used for prediction, feature extraction, and fine-tuning.\n\napplication_xception(); xception_preprocess_input(): Xception v1 model.\napplication_inception_v3(); inception_v3_preprocess_input(): Inception v3 model, with weights pre-trained on ImageNet.\napplication_inception_resnet_v2(); inception_resnet_v2_preprocess_input(): Inception-ResNet v2 model, with weights trained on ImageNet.\napplication_vgg16(); application_vgg19(): VGG16 and VGG19 models.\napplication_resnet50(): ResNet50 model.\napplication_mobilenet(); mobilenet_preprocess_input(); mobilenet_decode_predictions(); mobilenet_load_model_hdf5(): MobileNet model architecture.\n\nImageNet is a large database of images with labels, extensively used for deep learning.\n\nimagenet_preprocess_input(); imagenet_decode_predictions(): Preprocesses a tensor encoding a batch of images for ImageNet, and decodes predictions." + "text": "Pre-trained models\nKeras applications are deep learning models that are made available alongside pre-trained weights. These models can be used for prediction, feature extraction, and fine-tuning.\nMobileNetV3 Model, pre-trained on ImageNet\n\napplication_mobilenet_v3_large()\napplication_mobilenet_v3_small()\n\nEfficientNetV2 Model, pre-trained on ImageNet\n\napplication_efficientnet_v2s()\napplication_efficientnet_v2m()\napplication_efficientnet_v2l()\n\nInception-ResNet v2 and v3 model, with weights trained on ImageNet\n\napplication_inception_resnet_v2()\napplication_inception_v3()\n\nVGG16 and VGG19 models\n\napplication_vgg16()\napplication_vgg19()\n\nResNet50 model\n\napplication_resnet50():\n\nNASNet model architecture\n\napplication_nasnet_large()\napplication_nasnet_mobile()\n\nImageNet is a large database of images with labels, extensively used for deep learning\nPreprocesses a tensor encoding a batch of images for an application, and decodes predictions from an application. - application_preprocess_inputs() - application_decode_predictions()" }, { "objectID": "html/keras.html#callbacks", "href": "html/keras.html#callbacks", "title": "Deep Learning with Keras :: Cheatsheet", "section": "Callbacks", - "text": "Callbacks\nA callback is a set of functions to be applied at given stages of the training procedure. You can use callbacks to get a view on internal states and statistics of the model during training.\n\nallback_early_stopping(): Stop training when a monitored quantity has stopped improving.\ncallback_learning_rate_scheduler(): Learning rate scheduler.\ncallback_tensorboard(): TensorBoard basic visualizations.\n\n\nCC BY SA Posit Software, PBC • info@posit.co • posit.co\nLearn more at tensorflow.rstudio.com.\nUpdated: 2024-05.\n\npackageVersion(\"keras\")\n\n[1] '2.15.0'" + "text": "Callbacks\nA callback is a set of functions to be applied at given stages of the training procedure. You can use callbacks to get a view on internal states and statistics of the model during training.\n\nallback_early_stopping(): Stop training when a monitored quantity has stopped improving.\ncallback_learning_rate_scheduler(): Learning rate scheduler.\ncallback_tensorboard(): TensorBoard basic visualizations.\n\n\nCC BY SA Posit Software, PBC • info@posit.co • posit.co\nLearn more at keras.posit.co.\nUpdated: 2024-06.\n\npackageVersion(\"keras3\")\n\n[1] '1.0.0'" }, { "objectID": "html/package-development.html", diff --git a/sitemap.xml b/sitemap.xml index 70145ecc..c3a38670 100644 --- a/sitemap.xml +++ b/sitemap.xml @@ -2,86 +2,86 @@ https://rstudio.github.io/cheatsheets/translations.html - 2024-06-05T21:25:52.033Z + 2024-06-05T22:19:34.594Z https://rstudio.github.io/cheatsheets/html/tidyr.html - 2024-06-05T21:25:50.425Z + 2024-06-05T22:19:32.986Z https://rstudio.github.io/cheatsheets/html/sparklyr.html - 2024-06-05T21:25:50.425Z + 2024-06-05T22:19:32.986Z https://rstudio.github.io/cheatsheets/html/shiny-python.html - 2024-06-05T21:25:50.425Z + 2024-06-05T22:19:32.986Z https://rstudio.github.io/cheatsheets/html/rmarkdown.html - 2024-06-05T21:25:50.425Z + 2024-06-05T22:19:32.986Z https://rstudio.github.io/cheatsheets/html/quarto.html - 2024-06-05T21:25:50.425Z + 2024-06-05T22:19:32.986Z https://rstudio.github.io/cheatsheets/html/plumber.html - 2024-06-05T21:25:50.425Z + 2024-06-05T22:19:32.986Z https://rstudio.github.io/cheatsheets/html/lubridate.html - 2024-06-05T21:25:50.421Z + 2024-06-05T22:19:32.986Z https://rstudio.github.io/cheatsheets/html/factors.html - 2024-06-05T21:25:50.397Z + 2024-06-05T22:19:32.962Z https://rstudio.github.io/cheatsheets/html/data-transformation.html - 2024-06-05T21:25:50.397Z + 2024-06-05T22:19:32.962Z https://rstudio.github.io/cheatsheets/contributed-cheatsheets.html - 2024-06-05T21:25:50.329Z + 2024-06-05T22:19:32.894Z https://rstudio.github.io/cheatsheets/html/data-import.html - 2024-06-05T21:25:50.397Z + 2024-06-05T22:19:32.962Z https://rstudio.github.io/cheatsheets/html/data-visualization.html - 2024-06-05T21:25:50.397Z + 2024-06-05T22:19:32.962Z https://rstudio.github.io/cheatsheets/html/keras.html - 2024-06-05T21:25:50.421Z + 2024-06-05T22:19:32.986Z https://rstudio.github.io/cheatsheets/html/package-development.html - 2024-06-05T21:25:50.425Z + 2024-06-05T22:19:32.986Z https://rstudio.github.io/cheatsheets/html/purrr.html - 2024-06-05T21:25:50.425Z + 2024-06-05T22:19:32.986Z https://rstudio.github.io/cheatsheets/html/reticulate.html - 2024-06-05T21:25:50.425Z + 2024-06-05T22:19:32.986Z https://rstudio.github.io/cheatsheets/html/rstudio-ide.html - 2024-06-05T21:25:50.425Z + 2024-06-05T22:19:32.986Z https://rstudio.github.io/cheatsheets/html/shiny.html - 2024-06-05T21:25:50.425Z + 2024-06-05T22:19:32.986Z https://rstudio.github.io/cheatsheets/html/strings.html - 2024-06-05T21:25:50.425Z + 2024-06-05T22:19:32.986Z https://rstudio.github.io/cheatsheets/index.html - 2024-06-05T21:25:50.425Z + 2024-06-05T22:19:32.990Z diff --git a/sparklyr.pdf b/sparklyr.pdf index b602a325..c30cef8e 100644 Binary files a/sparklyr.pdf and b/sparklyr.pdf differ