wandb

gnn-tracking · Jul 7, 2023 · f54b643 · f54b643
1 parent 7fadfd1
commit f54b643
Showing 1 changed file with 112 additions and 16 deletions.
diff --git a/notebooks/project.ipynb b/notebooks/project.ipynb
@@ -10,13 +10,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
+    "import wandb\n",
     "import torch\n",
     "from functools import partial\n",
     "from pytorch_lightning import Trainer\n",
+    "from pytorch_lightning.loggers import TensorBoardLogger, WandbLogger\n",
+    "from pytorch_lightning.callbacks import RichProgressBar\n",
+    "# from wandb_osh.lightning_hooks import TriggerWandbSyncLightningCallback\n",
     "\n",
     "from model import Model\n",
     "\n",
@@ -31,6 +35,88 @@
    "cell_type": "code",
    "execution_count": 2,
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m `resume` will be ignored since W&B syncing is set to `offline`. Starting a new run with run id TEST_NAME.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "Tracking run with wandb version 0.15.5"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "W&B syncing is set to <code>`offline`<code> in this directory.  <br/>Run <code>`wandb online`<code> or set <code>WANDB_MODE=online<code> to enable cloud syncing."
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "name = \"TEST_NAME\"\n",
+    "\n",
+    "logger = WandbLogger(\n",
+    "    project=\"jian-point_cloud_net\",\n",
+    "    group=\"test\",\n",
+    "    offline=True,\n",
+    "    version=name,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<wandb.sdk.wandb_metric.Metric at 0x157411d90>"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "wandb.define_metric(\n",
+    "    \"max_mcc_pt0.9\",\n",
+    "    step_metric=\"max_mcc_pt0.9\",\n",
+    "    summary=\"max\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tb_logger = TensorBoardLogger(\".\", version=name)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -47,7 +133,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -66,7 +152,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -75,7 +161,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -95,7 +181,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
@@ -105,19 +191,35 @@
       "GPU available: False, used: False\n",
       "TPU available: False, using: 0 TPU cores\n",
       "IPU available: False, using: 0 IPUs\n",
-      "HPU available: False, using: 0 HPUs\n",
-      "/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py:67: UserWarning: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default\n",
-      "  warning_cache.warn(\n"
+      "HPU available: False, using: 0 HPUs\n"
      ]
-    },
+    }
+   ],
+   "source": [
+    "trainer = Trainer(\n",
+    "    max_epochs=1,\n",
+    "    accelerator=device,\n",
+    "    log_every_n_steps=1,\n",
+    "    callbacks=[\n",
+    "        PrintValidationMetrics()\n",
+    "    ],\n",
+    "    logger=logger\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
     {
      "ename": "FileNotFoundError",
      "evalue": "Directory /Users/fuchur/tmp/truth_cut_graphs_for_gsoc/part_1_0/ does not exist.",
      "output_type": "error",
      "traceback": [
       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
       "\u001b[0;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[7], line 7\u001b[0m\n\u001b[1;32m      1\u001b[0m trainer \u001b[39m=\u001b[39m Trainer(\n\u001b[1;32m      2\u001b[0m     max_epochs\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m,\n\u001b[1;32m      3\u001b[0m     accelerator\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mcpu\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[1;32m      4\u001b[0m     log_every_n_steps\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m,\n\u001b[1;32m      5\u001b[0m     callbacks\u001b[39m=\u001b[39m[PrintValidationMetrics()],\n\u001b[1;32m      6\u001b[0m )\n\u001b[0;32m----> 7\u001b[0m trainer\u001b[39m.\u001b[39;49mfit(model\u001b[39m=\u001b[39;49mlmodel, datamodule\u001b[39m=\u001b[39;49mdm)\n",
+      "Cell \u001b[0;32mIn[12], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m trainer\u001b[39m.\u001b[39;49mfit(model\u001b[39m=\u001b[39;49mlmodel, datamodule\u001b[39m=\u001b[39;49mdm)\n",
       "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pytorch_lightning/trainer/trainer.py:531\u001b[0m, in \u001b[0;36mTrainer.fit\u001b[0;34m(self, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path)\u001b[0m\n\u001b[1;32m    529\u001b[0m model \u001b[39m=\u001b[39m _maybe_unwrap_optimized(model)\n\u001b[1;32m    530\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstrategy\u001b[39m.\u001b[39m_lightning_module \u001b[39m=\u001b[39m model\n\u001b[0;32m--> 531\u001b[0m call\u001b[39m.\u001b[39;49m_call_and_handle_interrupt(\n\u001b[1;32m    532\u001b[0m     \u001b[39mself\u001b[39;49m, \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_fit_impl, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path\n\u001b[1;32m    533\u001b[0m )\n",
       "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pytorch_lightning/trainer/call.py:42\u001b[0m, in \u001b[0;36m_call_and_handle_interrupt\u001b[0;34m(trainer, trainer_fn, *args, **kwargs)\u001b[0m\n\u001b[1;32m     40\u001b[0m     \u001b[39mif\u001b[39;00m trainer\u001b[39m.\u001b[39mstrategy\u001b[39m.\u001b[39mlauncher \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m     41\u001b[0m         \u001b[39mreturn\u001b[39;00m trainer\u001b[39m.\u001b[39mstrategy\u001b[39m.\u001b[39mlauncher\u001b[39m.\u001b[39mlaunch(trainer_fn, \u001b[39m*\u001b[39margs, trainer\u001b[39m=\u001b[39mtrainer, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m---> 42\u001b[0m     \u001b[39mreturn\u001b[39;00m trainer_fn(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m     44\u001b[0m \u001b[39mexcept\u001b[39;00m _TunerExitException:\n\u001b[1;32m     45\u001b[0m     _call_teardown_hook(trainer)\n",
       "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/pytorch_lightning/trainer/trainer.py:570\u001b[0m, in \u001b[0;36mTrainer._fit_impl\u001b[0;34m(self, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path)\u001b[0m\n\u001b[1;32m    560\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_data_connector\u001b[39m.\u001b[39mattach_data(\n\u001b[1;32m    561\u001b[0m     model, train_dataloaders\u001b[39m=\u001b[39mtrain_dataloaders, val_dataloaders\u001b[39m=\u001b[39mval_dataloaders, datamodule\u001b[39m=\u001b[39mdatamodule\n\u001b[1;32m    562\u001b[0m )\n\u001b[1;32m    564\u001b[0m ckpt_path \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_checkpoint_connector\u001b[39m.\u001b[39m_select_ckpt_path(\n\u001b[1;32m    565\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstate\u001b[39m.\u001b[39mfn,\n\u001b[1;32m    566\u001b[0m     ckpt_path,\n\u001b[1;32m    567\u001b[0m     model_provided\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m,\n\u001b[1;32m    568\u001b[0m     model_connected\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlightning_module \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m,\n\u001b[1;32m    569\u001b[0m )\n\u001b[0;32m--> 570\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_run(model, ckpt_path\u001b[39m=\u001b[39;49mckpt_path)\n\u001b[1;32m    572\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstate\u001b[39m.\u001b[39mstopped\n\u001b[1;32m    573\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtraining \u001b[39m=\u001b[39m \u001b[39mFalse\u001b[39;00m\n",
@@ -133,12 +235,6 @@
     }
    ],
    "source": [
-    "trainer = Trainer(\n",
-    "    max_epochs=1,\n",
-    "    accelerator=device,\n",
-    "    log_every_n_steps=1,\n",
-    "    callbacks=[PrintValidationMetrics()],\n",
-    ")\n",
     "trainer.fit(model=lmodel, datamodule=dm)"
    ]
   },