Skip to content

Commit

Permalink
wandb
Browse files Browse the repository at this point in the history
  • Loading branch information
Jian Park authored and Jian Park committed Jul 7, 2023
1 parent 7fadfd1 commit f54b643
Showing 1 changed file with 112 additions and 16 deletions.
128 changes: 112 additions & 16 deletions notebooks/project.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,17 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"import wandb\n",
"import torch\n",
"from functools import partial\n",
"from pytorch_lightning import Trainer\n",
"from pytorch_lightning.loggers import TensorBoardLogger, WandbLogger\n",
"from pytorch_lightning.callbacks import RichProgressBar\n",
"# from wandb_osh.lightning_hooks import TriggerWandbSyncLightningCallback\n",
"\n",
"from model import Model\n",
"\n",
Expand All @@ -31,6 +35,88 @@
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n",
"\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m `resume` will be ignored since W&B syncing is set to `offline`. Starting a new run with run id TEST_NAME.\n"
]
},
{
"data": {
"text/html": [
"Tracking run with wandb version 0.15.5"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"W&B syncing is set to <code>`offline`<code> in this directory. <br/>Run <code>`wandb online`<code> or set <code>WANDB_MODE=online<code> to enable cloud syncing."
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"name = \"TEST_NAME\"\n",
"\n",
"logger = WandbLogger(\n",
" project=\"jian-point_cloud_net\",\n",
" group=\"test\",\n",
" offline=True,\n",
" version=name,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<wandb.sdk.wandb_metric.Metric at 0x157411d90>"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wandb.define_metric(\n",
" \"max_mcc_pt0.9\",\n",
" step_metric=\"max_mcc_pt0.9\",\n",
" summary=\"max\",\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"tb_logger = TensorBoardLogger(\".\", version=name)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
Expand All @@ -47,7 +133,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -66,7 +152,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -75,7 +161,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -95,7 +181,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 11,
"metadata": {},
"outputs": [
{
Expand All @@ -105,19 +191,35 @@
"GPU available: False, used: False\n",
"TPU available: False, using: 0 TPU cores\n",
"IPU available: False, using: 0 IPUs\n",
"HPU available: False, using: 0 HPUs\n",
"/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py:67: UserWarning: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default\n",
" warning_cache.warn(\n"
"HPU available: False, using: 0 HPUs\n"
]
},
}
],
"source": [
"trainer = Trainer(\n",
" max_epochs=1,\n",
" accelerator=device,\n",
" log_every_n_steps=1,\n",
" callbacks=[\n",
" PrintValidationMetrics()\n",
" ],\n",
" logger=logger\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"ename": "FileNotFoundError",
"evalue": "Directory /Users/fuchur/tmp/truth_cut_graphs_for_gsoc/part_1_0/ does not exist.",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[7], line 7\u001b[0m\n\u001b[1;32m 1\u001b[0m trainer \u001b[39m=\u001b[39m Trainer(\n\u001b[1;32m 2\u001b[0m max_epochs\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m,\n\u001b[1;32m 3\u001b[0m accelerator\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mcpu\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[1;32m 4\u001b[0m log_every_n_steps\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m,\n\u001b[1;32m 5\u001b[0m callbacks\u001b[39m=\u001b[39m[PrintValidationMetrics()],\n\u001b[1;32m 6\u001b[0m )\n\u001b[0;32m----> 7\u001b[0m trainer\u001b[39m.\u001b[39;49mfit(model\u001b[39m=\u001b[39;49mlmodel, datamodule\u001b[39m=\u001b[39;49mdm)\n",
"Cell \u001b[0;32mIn[12], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m trainer\u001b[39m.\u001b[39;49mfit(model\u001b[39m=\u001b[39;49mlmodel, datamodule\u001b[39m=\u001b[39;49mdm)\n",
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pytorch_lightning/trainer/trainer.py:531\u001b[0m, in \u001b[0;36mTrainer.fit\u001b[0;34m(self, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path)\u001b[0m\n\u001b[1;32m 529\u001b[0m model \u001b[39m=\u001b[39m _maybe_unwrap_optimized(model)\n\u001b[1;32m 530\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstrategy\u001b[39m.\u001b[39m_lightning_module \u001b[39m=\u001b[39m model\n\u001b[0;32m--> 531\u001b[0m call\u001b[39m.\u001b[39;49m_call_and_handle_interrupt(\n\u001b[1;32m 532\u001b[0m \u001b[39mself\u001b[39;49m, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_fit_impl, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path\n\u001b[1;32m 533\u001b[0m )\n",
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pytorch_lightning/trainer/call.py:42\u001b[0m, in \u001b[0;36m_call_and_handle_interrupt\u001b[0;34m(trainer, trainer_fn, *args, **kwargs)\u001b[0m\n\u001b[1;32m 40\u001b[0m \u001b[39mif\u001b[39;00m trainer\u001b[39m.\u001b[39mstrategy\u001b[39m.\u001b[39mlauncher \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 41\u001b[0m \u001b[39mreturn\u001b[39;00m trainer\u001b[39m.\u001b[39mstrategy\u001b[39m.\u001b[39mlauncher\u001b[39m.\u001b[39mlaunch(trainer_fn, \u001b[39m*\u001b[39margs, trainer\u001b[39m=\u001b[39mtrainer, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m---> 42\u001b[0m \u001b[39mreturn\u001b[39;00m trainer_fn(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 44\u001b[0m \u001b[39mexcept\u001b[39;00m _TunerExitException:\n\u001b[1;32m 45\u001b[0m _call_teardown_hook(trainer)\n",
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pytorch_lightning/trainer/trainer.py:570\u001b[0m, in \u001b[0;36mTrainer._fit_impl\u001b[0;34m(self, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path)\u001b[0m\n\u001b[1;32m 560\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_data_connector\u001b[39m.\u001b[39mattach_data(\n\u001b[1;32m 561\u001b[0m model, train_dataloaders\u001b[39m=\u001b[39mtrain_dataloaders, val_dataloaders\u001b[39m=\u001b[39mval_dataloaders, datamodule\u001b[39m=\u001b[39mdatamodule\n\u001b[1;32m 562\u001b[0m )\n\u001b[1;32m 564\u001b[0m ckpt_path \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_checkpoint_connector\u001b[39m.\u001b[39m_select_ckpt_path(\n\u001b[1;32m 565\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstate\u001b[39m.\u001b[39mfn,\n\u001b[1;32m 566\u001b[0m ckpt_path,\n\u001b[1;32m 567\u001b[0m model_provided\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m,\n\u001b[1;32m 568\u001b[0m model_connected\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlightning_module \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m,\n\u001b[1;32m 569\u001b[0m )\n\u001b[0;32m--> 570\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_run(model, ckpt_path\u001b[39m=\u001b[39;49mckpt_path)\n\u001b[1;32m 572\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstate\u001b[39m.\u001b[39mstopped\n\u001b[1;32m 573\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtraining \u001b[39m=\u001b[39m \u001b[39mFalse\u001b[39;00m\n",
Expand All @@ -133,12 +235,6 @@
}
],
"source": [
"trainer = Trainer(\n",
" max_epochs=1,\n",
" accelerator=device,\n",
" log_every_n_steps=1,\n",
" callbacks=[PrintValidationMetrics()],\n",
")\n",
"trainer.fit(model=lmodel, datamodule=dm)"
]
},
Expand Down

0 comments on commit f54b643

Please sign in to comment.