Skip to content

Commit

Permalink
Minor Documentation Fixes: TaskID for Example Custom Flow; Comment on…
Browse files Browse the repository at this point in the history
… Homepage; More documentation for `components` (#1243)

* fix task ID for Iris task

* update comment on homepage

* added additional documentation specific to the `components` parameter.

* add change to progress.rst

* Fix dataframe append being deprecated by replacing it with (backwards-compatible) pd.concat

* fix logging example and add new changes to progress.rst

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix comment too long

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
LennartPurucker and pre-commit-ci[bot] authored Apr 18, 2023
1 parent bb3793d commit fb9f9eb
Show file tree
Hide file tree
Showing 6 changed files with 27 additions and 5 deletions.
2 changes: 1 addition & 1 deletion doc/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ Example
('estimator', tree.DecisionTreeClassifier())
]
)
# Download the OpenML task for the german credit card dataset with 10-fold
# Download the OpenML task for the pendigits dataset with 10-fold
# cross-validation.
task = openml.tasks.get_task(32)
# Run the scikit-learn model on the task.
Expand Down
1 change: 1 addition & 0 deletions doc/progress.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ Changelog
0.13.1
~~~~~~

* DOC #1241 #1229 #1231: Minor documentation fixes and resolve documentation examples not working.
* ADD #1028: Add functions to delete runs, flows, datasets, and tasks (e.g., ``openml.datasets.delete_dataset``).
* ADD #1144: Add locally computed results to the ``OpenMLRun`` object's representation if the run was created locally and not downloaded from the server.
* ADD #1180: Improve the error message when the checksum of a downloaded dataset does not match the checksum provided by the API.
Expand Down
4 changes: 2 additions & 2 deletions examples/30_extended/configure_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@

import logging

openml.config.console_log.setLevel(logging.DEBUG)
openml.config.file_log.setLevel(logging.WARNING)
openml.config.set_console_log_level(logging.DEBUG)
openml.config.set_file_log_level(logging.WARNING)
openml.datasets.get_dataset("iris")

# Now the log level that was previously written to file should also be shown in the console.
Expand Down
6 changes: 5 additions & 1 deletion examples/30_extended/custom_flow_.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@
# you can use the Random Forest Classifier flow as a *subflow*. It allows for
# all hyperparameters of the Random Classifier Flow to also be specified in your pipeline flow.
#
# Note: you can currently only specific one subflow as part of the components.
#
# In this example, the auto-sklearn flow is a subflow: the auto-sklearn flow is entirely executed as part of this flow.
# This allows people to specify auto-sklearn hyperparameters used in this flow.
# In general, using a subflow is not required.
Expand All @@ -87,6 +89,8 @@
autosklearn_flow = openml.flows.get_flow(9313) # auto-sklearn 0.5.1
subflow = dict(
components=OrderedDict(automl_tool=autosklearn_flow),
# If you do not want to reference a subflow, you can use the following:
# components=OrderedDict(),
)

####################################################################################################
Expand Down Expand Up @@ -124,7 +128,7 @@
OrderedDict([("oml:name", "time"), ("oml:value", 120), ("oml:component", flow_id)]),
]

task_id = 1965 # Iris Task
task_id = 1200 # Iris Task
task = openml.tasks.get_task(task_id)
dataset_id = task.get_dataset().dataset_id

Expand Down
2 changes: 1 addition & 1 deletion openml/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ def _list_all(listing_call, output_format="dict", *args, **filters):
if len(result) == 0:
result = new_batch
else:
result = result.append(new_batch, ignore_index=True)
result = pd.concat([result, new_batch], ignore_index=True)
else:
# For output_format = 'dict' or 'object'
result.update(new_batch)
Expand Down
17 changes: 17 additions & 0 deletions tests/test_utils/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,23 @@ def mocked_perform_api_call(call, request_method):

def test_list_all(self):
openml.utils._list_all(listing_call=openml.tasks.functions._list_tasks)
openml.utils._list_all(
listing_call=openml.tasks.functions._list_tasks, output_format="dataframe"
)

def test_list_all_with_multiple_batches(self):
res = openml.utils._list_all(
listing_call=openml.tasks.functions._list_tasks, output_format="dict", batch_size=2000
)
# Verify that test server state is still valid for this test to work as intended
# -> If the number of results is less than 2000, the test can not test the
# batching operation.
assert len(res) > 2000
openml.utils._list_all(
listing_call=openml.tasks.functions._list_tasks,
output_format="dataframe",
batch_size=2000,
)

@unittest.mock.patch("openml._api_calls._perform_api_call", side_effect=mocked_perform_api_call)
def test_list_all_few_results_available(self, _perform_api_call):
Expand Down

0 comments on commit fb9f9eb

Please sign in to comment.