Docs (#1355)

* fix broken tutorials * Update docs for 1.0.9 release.
keras-team · Sep 27, 2020 · 265e496 · 265e496
1 parent 67991b5
commit 265e496
Show file tree

Hide file tree

Showing 13 changed files with 267 additions and 92 deletions.
diff --git a/docs/py/customized.py b/docs/py/customized.py
@@ -1,6 +1,6 @@
 """shell
 pip install autokeras
-pip install git+https://github.com/keras-team/[email protected].2rc1
+pip install git+https://github.com/keras-team/[email protected].2rc2
 """
 
 """

diff --git a/docs/py/export.py b/docs/py/export.py
@@ -9,7 +9,7 @@
 
 """shell
 pip install autokeras
-pip install git+https://github.com/keras-team/[email protected].2rc1
+pip install git+https://github.com/keras-team/[email protected].2rc2
 """
 
 import tensorflow as tf

diff --git a/docs/py/image_classification.py b/docs/py/image_classification.py
@@ -1,6 +1,6 @@
 """shell
 pip install autokeras
-pip install git+https://github.com/keras-team/[email protected].2rc1
+pip install git+https://github.com/keras-team/[email protected].2rc2
 """
 
 """
@@ -22,6 +22,8 @@
 The second step is to run the ImageClassifier.
 It is recommended have more trials for more complicated datasets.
 This is just a quick demo of MNIST, so we set max_trials to 1.
+For the same reason, we set epochs to 10.
+You can also leave the epochs unspecified for an adaptive number of epochs.
 """
 
 # Initialize the image classifier.
@@ -155,9 +157,7 @@
 #        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]])
 
 """
-We also support using tf.data.Dataset format for the training data. In this case, the
-images would have to be 3-dimentional. The labels have to be one-hot encoded for
-multi-class classification to be wrapped into tensorflow Dataset.
+We also support using tf.data.Dataset format for the training data.
 """
 
 train_set = tf.data.Dataset.from_tensor_slices(((x_train,), (y_train,)))

diff --git a/docs/py/image_regression.py b/docs/py/image_regression.py
@@ -1,6 +1,6 @@
 """shell
 pip install autokeras
-pip install git+https://github.com/keras-team/[email protected].2rc1
+pip install git+https://github.com/keras-team/[email protected].2rc2
 """
 
 """
@@ -29,6 +29,8 @@
 The second step is to run the ImageRegressor.
 It is recommended have more trials for more complicated datasets.
 This is just a quick demo of MNIST, so we set max_trials to 1.
+For the same reason, we set epochs to 2.
+You can also leave the epochs unspecified for an adaptive number of epochs.
 """
 
 # Initialize the image regressor.

diff --git a/docs/py/load.py b/docs/py/load.py
@@ -0,0 +1,169 @@
+"""shell
+pip install autokeras
+pip install git+https://github.com/keras-team/[email protected]
+"""
+
+"""
+If the data is too large to put in memory all at once, we can load it batch by batch into memory from disk with tf.data.Dataset.
+This [function](https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image_dataset_from_directory) can help you build such a tf.data.Dataset for image data.
+
+First, we download the data and extract the files.
+"""
+
+import tensorflow as tf
+import os
+
+# dataset_url = "https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz"
+# local_file_path = tf.keras.utils.get_file(origin=dataset_url, 
+                                          # fname='image_data', 
+                                          # extract=True)
+# # The file is extracted in the same directory as the downloaded file.
+# local_dir_path = os.path.dirname(local_file_path)
+# # After check mannually, we know the extracted data is in 'flower_photos'.
+# data_dir = os.path.join(local_dir_path, 'flower_photos')
+# print(data_dir)
+
+"""
+The directory should look like this. Each folder contains the images in the same class.
+
+```
+flowers_photos/
+  daisy/
+  dandelion/
+  roses/
+  sunflowers/
+  tulips/
+```
+
+We can split the data into training and testing as we load them.
+"""
+
+batch_size = 32
+img_height = 180
+img_width = 180
+
+# train_data = tf.keras.preprocessing.image_dataset_from_directory(
+    # data_dir,
+    # # Use 20% data as testing data.
+    # validation_split=0.2,
+    # subset="training",
+    # # Set seed to ensure the same split when loading testing data.
+    # seed=123,
+    # image_size=(img_height, img_width),
+    # batch_size=batch_size)
+
+# test_data = tf.keras.preprocessing.image_dataset_from_directory(
+    # data_dir,
+    # validation_split=0.2,
+    # subset="validation",
+    # seed=123,
+    # image_size=(img_height, img_width),
+    # batch_size=batch_size)
+
+"""
+Then we just do one quick demo of AutoKeras to make sure the dataset works.
+"""
+
+import autokeras as ak
+
+# clf = ak.ImageClassifier(overwrite=True, max_trials=1)
+# clf.fit(train_data, epochs=1)
+# print(clf.evaluate(test_data))
+
+"""
+You can also load text datasets in the same way.
+"""
+
+dataset_url = "http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz"
+
+local_file_path = tf.keras.utils.get_file(
+    fname="text_data", 
+    origin=dataset_url, 
+    extract=True,
+)
+# The file is extracted in the same directory as the downloaded file.
+local_dir_path = os.path.dirname(local_file_path)
+# After check mannually, we know the extracted data is in 'aclImdb'.
+data_dir = os.path.join(local_dir_path, 'aclImdb')
+# Remove the unused data folder.
+import shutil
+shutil.rmtree(os.path.join(data_dir, 'train/unsup'))
+
+
+"""
+For this dataset, the data is already split into train and test.
+We just load them separately.
+"""
+print(data_dir)
+train_data = tf.keras.preprocessing.text_dataset_from_directory(
+    os.path.join(data_dir, 'train'),
+    class_names=['pos', 'neg'],
+    validation_split=0.2,
+    subset="training",
+    # shuffle=False,
+    seed=123,
+    batch_size=batch_size)
+
+val_data = tf.keras.preprocessing.text_dataset_from_directory(
+    os.path.join(data_dir, 'train'),
+    class_names=['pos', 'neg'],
+    validation_split=0.2,
+    subset="validation",
+    # shuffle=False,
+    seed=123,
+    batch_size=batch_size)
+
+test_data = tf.keras.preprocessing.text_dataset_from_directory(
+    os.path.join(data_dir, 'test'),
+    class_names=['pos', 'neg'],
+    shuffle=False,
+    batch_size=batch_size)
+
+for x, y in train_data:
+    print(x.numpy()[0])
+    print(y.numpy()[0])
+    # record_x = x.numpy()
+    # record_y = y.numpy()
+    break
+
+for x, y in train_data:
+    print(x.numpy()[0])
+    print(y.numpy()[0])
+    break
+
+# train_data = tf.keras.preprocessing.text_dataset_from_directory(
+    # os.path.join(data_dir, 'train'),
+    # class_names=['pos', 'neg'],
+    # shuffle=True,
+    # seed=123,
+    # batch_size=batch_size)
+
+# for x, y in train_data:
+    # for i, a in enumerate(x.numpy()):
+        # for j, b in enumerate(record_x):
+            # if a == b:
+                # print('*')
+                # assert record_y[j] == y.numpy()[i]
+
+# import numpy as np
+# x_train = []
+# y_train = []
+# for x, y in train_data:
+    # for a in x.numpy():
+        # x_train.append(a)
+    # for a in y.numpy():
+        # y_train.append(a)
+
+# x_train = np.array(x_train)
+# y_train = np.array(y_train)
+
+# train_data = train_data.shuffle(1000, seed=123, reshuffle_each_iteration=False)
+
+
+clf = ak.TextClassifier(overwrite=True, max_trials=2)
+# clf.fit(train_data, validation_data=test_data)
+# clf.fit(train_data, validation_data=train_data)
+clf.fit(train_data, validation_data=val_data)
+# clf.fit(x_train, y_train)
+# clf.fit(train_data)
+print(clf.evaluate(test_data))
diff --git a/docs/py/multi.py b/docs/py/multi.py
@@ -1,6 +1,6 @@
 """shell
 pip install autokeras
-pip install git+https://github.com/keras-team/[email protected].2rc1
+pip install git+https://github.com/keras-team/[email protected].2rc2
 """
 
 """

diff --git a/docs/py/structured_data_classification.py b/docs/py/structured_data_classification.py
@@ -1,6 +1,6 @@
 """shell
 pip install autokeras
-pip install git+https://github.com/keras-team/[email protected].2rc1
+pip install git+https://github.com/keras-team/[email protected].2rc2
 """
 
 """
@@ -21,6 +21,8 @@
 """
 The second step is to run the
 [StructuredDataClassifier](/structured_data_classifier).
+As a quick demo, we set epochs to 10.
+You can also leave the epochs unspecified for an adaptive number of epochs.
 """
 
 # Initialize the structured data classifier.
@@ -49,7 +51,7 @@
 two-dimensional with numerical or categorical values.
 
 For the classification labels,
-AutoKeras accepts both plain labels, i.e.  strings or integers, and one-hot encoded
+AutoKeras accepts both plain labels, i.e. strings or integers, and one-hot encoded
 encoded labels, i.e. vectors of 0s and 1s.
 The labels can be numpy.ndarray, pandas.DataFrame, or pandas.Series.
 
@@ -70,7 +72,7 @@
 print(type(y_train)) # pandas.DataFrame
 
 # You can also use numpy.ndarray for x_train and y_train.
-x_train = x_train.to_numpy().astype(np.unicode)
+x_train = x_train.to_numpy()
 y_train = y_train.to_numpy()
 print(type(x_train)) # numpy.ndarray
 print(type(y_train)) # numpy.ndarray
@@ -92,13 +94,9 @@
 
 """
 The following code shows how to convert numpy.ndarray to tf.data.Dataset.
-Notably, the labels have to be one-hot encoded for multi-class
-classification to be wrapped into tensorflow Dataset.
-Since the Titanic dataset is binary
-classification, it should not be one-hot encoded.
 """
 
-train_set = tf.data.Dataset.from_tensor_slices((x_train, y_train))
+train_set = tf.data.Dataset.from_tensor_slices((x_train.astype(np.unicode), y_train))
 test_set = tf.data.Dataset.from_tensor_slices((x_test.to_numpy().astype(np.unicode), y_test))
 
 clf = ak.StructuredDataClassifier(

diff --git a/docs/py/structured_data_regression.py b/docs/py/structured_data_regression.py
@@ -1,6 +1,6 @@
 """shell
 pip install autokeras
-pip install git+https://github.com/keras-team/[email protected].2rc1
+pip install git+https://github.com/keras-team/[email protected].2rc2
 """
 
 """
@@ -31,6 +31,8 @@
 """
 The second step is to run the
 [StructuredDataRegressor](/structured_data_regressor).
+As a quick demo, we set epochs to 10.
+You can also leave the epochs unspecified for an adaptive number of epochs.
 """
 
 # Initialize the structured data regressor.