feat: add support for batch size scaling (#691)

* feat: add support for batch size scaling * chore: update changelog
jina-ai · Mar 16, 2023 · 16ae4ff · 16ae4ff
1 parent e649785
commit 16ae4ff
Show file tree

Hide file tree

Showing 4 changed files with 11 additions and 4 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,6 +10,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
+- Add support for batch size scaling. ([#691](https://github.com/jina-ai/finetuner/pull/691))
+
 - Add functions to retrieve evaluation metrics and example results. ([#687](https://github.com/jina-ai/finetuner/pull/687))
 
 ### Removed

diff --git a/docs/walkthrough/run-job.md b/docs/walkthrough/run-job.md
@@ -157,4 +157,8 @@ Finetuner constructs batches so that each batch contains the same number of clas
 as many items per class as configured via the `num_items_per_class` parameter.
 However, if it is not possible, e.g., because `batch_size` is not dividable by
 `num_items_per_class` or the training dataset does not contain enough classes,
-Finetuner tries to choose a similar value for `num_items_per_class` which is working.
+Finetuner tries to choose a similar value for `num_items_per_class` which is working.
+A larger `batch_size` results in faster training, though too large a `batch_ size` can result
+in out of memory errors. Typically, a `batch_size` of 64 or 128 are good options when you
+are unsure of how high you can set this value, however you can also choose to not set the `batch_size`
+at all, in which case the highest possible value will be calculated for you automatically.
diff --git a/finetuner/__init__.py b/finetuner/__init__.py
@@ -121,7 +121,7 @@ def fit(
     optimizer_options: Optional[Dict[str, Any]] = None,
     learning_rate: Optional[float] = None,
     epochs: int = 5,
-    batch_size: int = 64,
+    batch_size: Optional[int] = None,
     callbacks: Optional[List[callback.CallbackStubType]] = None,
     scheduler: Optional[str] = None,
     scheduler_options: Optional[Dict[str, Any]] = None,
@@ -189,7 +189,8 @@ def fit(
         <https://pytorch.org/docs/stable/optim.html>`_
     :param learning_rate: learning rate for the optimizer.
     :param epochs: Number of epochs for fine-tuning.
-    :param batch_size: Number of items to include in a batch.
+    :param batch_size: Number of items to include in a batch. If not set, the
+        batch size will be configured automatically.
     :param callbacks: List of callback stub objects.
         subpackage for available options, or run `finetuner.list_callbacks()`.
     :param scheduler: Name of a scheduler to use for learning rate scheduling.

diff --git a/finetuner/finetuner.py b/finetuner/finetuner.py
@@ -158,7 +158,7 @@ def create_run(
         optimizer_options: Optional[Dict[str, Any]] = None,
         learning_rate: Optional[float] = None,
         epochs: int = 5,
-        batch_size: int = 64,
+        batch_size: Optional[int] = None,
         callbacks: Optional[List[Any]] = None,
         scheduler: Optional[str] = None,
         scheduler_options: Optional[Dict[str, Any]] = None,