From 3b990d3f750d7c79111cdc7cdf3d0ac60ddf1cda Mon Sep 17 00:00:00 2001
From: Surbhi Goel <surbhigoel77@github.com>
Date: Tue, 20 Feb 2024 15:46:18 +0000
Subject: [PATCH 01/35] Updating README

---
 README.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/README.md b/README.md
index e1e06bb..c9c7949 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,9 @@
+# Overview
+The repository contains the code to train a neural network to emulate the gravity wave drag (GWD) in the WACCM simulation.
+The code aims trains a pytorch Feed Forward network (FF)
+
+
+
 # newCAM-Emulation
 
 This is a DNN written with PyTorch to Emulate the gravity wave drag (GWD, both zonal and meridional) in the CAM model.

From 39139bb454fde280fe6e798c73267bc072c68c79 Mon Sep 17 00:00:00 2001
From: Surbhi Goel <surbhigoel77@github.com>
Date: Thu, 22 Feb 2024 13:13:59 +0000
Subject: [PATCH 02/35] basic linting

---
 newCAM_emulation/Model.py |  1 +
 newCAM_emulation/train.py | 11 +++++++++++
 2 files changed, 12 insertions(+)

diff --git a/newCAM_emulation/Model.py b/newCAM_emulation/Model.py
index ccdd690..91cf5be 100644
--- a/newCAM_emulation/Model.py
+++ b/newCAM_emulation/Model.py
@@ -10,6 +10,7 @@
 from torch.utils.data import DataLoader, Dataset
 
 
+
 # Required for feeding the data iinto NN.
 class myDataset(Dataset):
     """
diff --git a/newCAM_emulation/train.py b/newCAM_emulation/train.py
index 198097e..f11f53c 100644
--- a/newCAM_emulation/train.py
+++ b/newCAM_emulation/train.py
@@ -19,6 +19,17 @@
 print(f"Using device: {DEVICE}")
 
 
+from torch.cuda import is_available
+from torch.backends import mps
+if is_available():
+    DEVICE = "cuda"
+elif mps.is_available():
+    DEVICE = "mps"
+else:
+    DEVICE = "cpu"
+print(f"Using device: {DEVICE}")
+
+
 class EarlyStopper:
     """Class for implementing early stopping during training."""
 

From 3cebc20b3386ac7e9fbcebb42850e78d3a0b9c31 Mon Sep 17 00:00:00 2001
From: Surbhi Goel <surbhigoel77@github.com>
Date: Thu, 22 Feb 2024 13:54:20 +0000
Subject: [PATCH 03/35] basic linting

---
 newCAM_emulation/train.py | 142 +++++++++++++++++++-------------------
 1 file changed, 71 insertions(+), 71 deletions(-)

diff --git a/newCAM_emulation/train.py b/newCAM_emulation/train.py
index f11f53c..3d4b4ac 100644
--- a/newCAM_emulation/train.py
+++ b/newCAM_emulation/train.py
@@ -3,6 +3,7 @@
 import Model
 import netCDF4 as nc
 import numpy as np
+
 import torch
 from loaddata import data_loader, newnorm
 from torch import nn
@@ -10,6 +11,7 @@
 from torch.cuda import is_available
 from torch.utils.data import DataLoader
 
+
 if is_available():
     DEVICE = "cuda"
 elif mps.is_available():
@@ -21,6 +23,7 @@
 
 from torch.cuda import is_available
 from torch.backends import mps
+
 if is_available():
     DEVICE = "cuda"
 elif mps.is_available():
@@ -55,8 +58,8 @@ def early_stop(self, validation_loss):
             self.min_validation_loss = validation_loss
             self.counter = 0
 
-            #save model
-            torch.save(model.state_dict(), 'conv_torch.pth')
+            # save model
+            torch.save(model.state_dict(), "conv_torch.pth")
 
         elif validation_loss > (self.min_validation_loss + self.min_delta):
             self.counter += 1
@@ -65,44 +68,42 @@ def early_stop(self, validation_loss):
         return False
 
 
-
-
 ## load mean and std for normalization
-fm = np.load('../Demodata/mean_demo_sub.npz')
-fs = np.load('../Demodata/std_demo_sub.npz')
-
-Um        = fm['U']
-Vm        = fm['V']
-Tm        = fm['T']
-DSEm      = fm['DSE']
-NMm       = fm['NM']
-NETDTm    = fm['NETDT']
-Z3m    = fm['Z3']
-RHOIm    = fm['RHOI']
-PSm    = fm['PS']
-latm    = fm['lat']
-lonm    = fm['lon']
-UTGWSPECm    = fm['UTGWSPEC']
-VTGWSPECm    = fm['VTGWSPEC']
-
-Us        = fs['U']
-Vs        = fs['V']
-Ts        = fs['T']
-DSEs      = fs['DSE']
-NMs       = fs['NM']
-NETDTs    = fs['NETDT']
-Z3s    = fs['Z3']
-RHOIs    = fs['RHOI']
-PSs    = fs['PS']
-lats    = fs['lat']
-lons    = fs['lon']
-UTGWSPECs    = fs['UTGWSPEC']
-VTGWSPECs    = fs['VTGWSPEC']
+fm = np.load("../Demodata/mean_demo_sub.npz")
+fs = np.load("../Demodata/std_demo_sub.npz")
+
+Um = fm["U"]
+Vm = fm["V"]
+Tm = fm["T"]
+DSEm = fm["DSE"]
+NMm = fm["NM"]
+NETDTm = fm["NETDT"]
+Z3m = fm["Z3"]
+RHOIm = fm["RHOI"]
+PSm = fm["PS"]
+latm = fm["lat"]
+lonm = fm["lon"]
+UTGWSPECm = fm["UTGWSPEC"]
+VTGWSPECm = fm["VTGWSPEC"]
+
+Us = fs["U"]
+Vs = fs["V"]
+Ts = fs["T"]
+DSEs = fs["DSE"]
+NMs = fs["NM"]
+NETDTs = fs["NETDT"]
+Z3s = fs["Z3"]
+RHOIs = fs["RHOI"]
+PSs = fs["PS"]
+lats = fs["lat"]
+lons = fs["lon"]
+UTGWSPECs = fs["UTGWSPEC"]
+VTGWSPECs = fs["VTGWSPEC"]
 
 ilev = 93
 
-dim_NN =int(8*ilev+4)
-dim_NNout =int(2*ilev)
+dim_NN = int(8 * ilev + 4)
+dim_NNout = int(2 * ilev)
 
 model = Model.FullyConnected()
 
@@ -111,81 +112,81 @@ def early_stop(self, validation_loss):
 
 learning_rate = 1e-5
 epochs = 100
-optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # weight_decay=1e-5
+optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  # weight_decay=1e-5
 
 
 s_list = list(range(1, 6))
 
 for iter in s_list:
-    if (iter > 1):
-        model.load_state_dict(torch.load('conv_torch.pth'))
-    print ('data loader iteration',iter)
-    filename  = '../Demodata/newCAM_demo_sub_' + str(iter).zfill(1) + '.nc'
-    print('working on: ', filename)
+    if iter > 1:
+        model.load_state_dict(torch.load("conv_torch.pth"))
+    print("data loader iteration", iter)
+    filename = "../Demodata/newCAM_demo_sub_" + str(iter).zfill(1) + ".nc"
+    print("working on: ", filename)
 
     F = nc.Dataset(filename)
-    PS = np.asarray(F['PS'][0,:])
+    PS = np.asarray(F["PS"][0, :])
     PS = newnorm(PS, PSm, PSs)
 
-    Z3 = np.asarray(F['Z3'][0,:,:])
+    Z3 = np.asarray(F["Z3"][0, :, :])
     Z3 = newnorm(Z3, Z3m, Z3s)
 
-    U = np.asarray(F['U'][0,:,:])
+    U = np.asarray(F["U"][0, :, :])
     U = newnorm(U, Um, Us)
 
-    V = np.asarray(F['V'][0,:,:])
+    V = np.asarray(F["V"][0, :, :])
     V = newnorm(V, Vm, Vs)
 
-    T = np.asarray(F['T'][0,:,:])
+    T = np.asarray(F["T"][0, :, :])
     T = newnorm(T, Tm, Ts)
 
-    lat = F['lat']
+    lat = F["lat"]
     lat = newnorm(lat, np.mean(lat), np.std(lat))
 
-    lon = F['lon']
+    lon = F["lon"]
     lon = newnorm(lon, np.mean(lon), np.std(lon))
 
-    DSE = np.asarray(F['DSE'][0,:,:])
+    DSE = np.asarray(F["DSE"][0, :, :])
     DSE = newnorm(DSE, DSEm, DSEs)
 
-    RHOI = np.asarray(F['RHOI'][0,:,:])
+    RHOI = np.asarray(F["RHOI"][0, :, :])
     RHOI = newnorm(RHOI, RHOIm, RHOIs)
 
-    NETDT = np.asarray(F['NETDT'][0,:,:])
+    NETDT = np.asarray(F["NETDT"][0, :, :])
     NETDT = newnorm(NETDT, NETDTm, NETDTs)
 
-    NM = np.asarray(F['NMBV'][0,:,:])
+    NM = np.asarray(F["NMBV"][0, :, :])
     NM = newnorm(NM, NMm, NMs)
 
-    UTGWSPEC = np.asarray(F['UTGWSPEC'][0,:,:])
+    UTGWSPEC = np.asarray(F["UTGWSPEC"][0, :, :])
     UTGWSPEC = newnorm(UTGWSPEC, UTGWSPECm, UTGWSPECs)
 
-    VTGWSPEC = np.asarray(F['VTGWSPEC'][0,:,:])
+    VTGWSPEC = np.asarray(F["VTGWSPEC"][0, :, :])
     VTGWSPEC = newnorm(VTGWSPEC, VTGWSPECm, VTGWSPECs)
 
-    x_train,y_train = data_loader(U,V,T, DSE, NM, NETDT, Z3,
-                                  RHOI, PS,lat,lon,UTGWSPEC, VTGWSPEC)
+    x_train, y_train = data_loader(
+        U, V, T, DSE, NM, NETDT, Z3, RHOI, PS, lat, lon, UTGWSPEC, VTGWSPEC
+    )
 
     data = Model.myDataset(X=x_train, Y=y_train)
 
     batch_size = 128
 
-    split_data = torch.utils.data.random_split(data, [0.75, 0.25],
-                                               generator=torch.Generator().manual_seed(42))
-    train_dataloader = DataLoader(split_data[0],
-                                  batch_size=batch_size,
-                                  shuffle=True)
-    val_dataloader = DataLoader(split_data[1],
-                                batch_size=len(split_data[1]),
-                                shuffle=True)
-
-     # training
-    early_stopper = EarlyStopper(patience=5, min_delta=0) # Note the hyper parameters.
+    split_data = torch.utils.data.random_split(
+        data, [0.75, 0.25], generator=torch.Generator().manual_seed(42)
+    )
+    train_dataloader = DataLoader(split_data[0], batch_size=batch_size, shuffle=True)
+    val_dataloader = DataLoader(
+        split_data[1], batch_size=len(split_data[1]), shuffle=True
+    )
+
+    # training
+    early_stopper = EarlyStopper(patience=5, min_delta=0)  # Note the hyper parameters.
     for t in range(epochs):
-        if t % 2 ==0:
+        if t % 2 == 0:
             print(f"Epoch {t+1}\n-------------------------------")
             print(val_losses[-1])
-            print('counter=' + str(early_stopper.counter))
+            print("counter=" + str(early_stopper.counter))
         train_loss = Model.train_loop(train_dataloader, model, nn.MSELoss(), optimizer)
 
         train_losses.append(train_loss)
@@ -194,4 +195,3 @@ def early_stop(self, validation_loss):
         if early_stopper.early_stop(val_loss):
             print("BREAK!")
             break
-

From cd22ce8ddb6575bf1c9c17a610f5afc652225994 Mon Sep 17 00:00:00 2001
From: Surbhi Goel <surbhigoel77@github.com>
Date: Mon, 26 Feb 2024 15:57:17 +0000
Subject: [PATCH 04/35] Added ruff linting tool

---
 pyproject.toml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 93d8d9e..ae7c3d1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -62,7 +62,8 @@ extend-include = ["*.ipynb"]
 [tool.ruff.lint]
 # Enable:  D: `pydocstyle`, PL: `pylint`, I: `isort`, W: `pycodestyle whitespace`
 #          NPY: `numpy`, 
-select = ["D", "PL", "I", "E", "W", "NPY" ]
+
+select = ["D", "PL", "I", "E", "W", "NPY"]
 
 # Enable D417 (Missing argument description) on top of the NumPy convention.
 extend-select = ["D417"]

From 2780cd460b5eb167f5df26f53aca9abff846f3b5 Mon Sep 17 00:00:00 2001
From: Surbhi Goel <surbhigoel77@github.com>
Date: Mon, 4 Mar 2024 18:17:54 +0000
Subject: [PATCH 05/35] Updating train.py

---
 newCAM_emulation/train.py | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/newCAM_emulation/train.py b/newCAM_emulation/train.py
index 3d4b4ac..4ef75a8 100644
--- a/newCAM_emulation/train.py
+++ b/newCAM_emulation/train.py
@@ -10,17 +10,6 @@
 from torch.backends import mps
 from torch.cuda import is_available
 from torch.utils.data import DataLoader
-
-
-if is_available():
-    DEVICE = "cuda"
-elif mps.is_available():
-    DEVICE = "mps"
-else:
-    DEVICE = "cpu"
-print(f"Using device: {DEVICE}")
-
-
 from torch.cuda import is_available
 from torch.backends import mps
 
@@ -125,6 +114,7 @@ def early_stop(self, validation_loss):
     print("working on: ", filename)
 
     F = nc.Dataset(filename)
+
     PS = np.asarray(F["PS"][0, :])
     PS = newnorm(PS, PSm, PSs)
 

From 4292ab9f43cb29f8d4f0a72f282650bb60f8b2e9 Mon Sep 17 00:00:00 2001
From: Surbhi Goel <surbhigoel77@github.com>
Date: Mon, 4 Mar 2024 18:22:06 +0000
Subject: [PATCH 06/35] Updating train.py

---
 newCAM_emulation/train.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/newCAM_emulation/train.py b/newCAM_emulation/train.py
index 4ef75a8..3f44029 100644
--- a/newCAM_emulation/train.py
+++ b/newCAM_emulation/train.py
@@ -12,7 +12,6 @@
 from torch.utils.data import DataLoader
 from torch.cuda import is_available
 from torch.backends import mps
-
 if is_available():
     DEVICE = "cuda"
 elif mps.is_available():
@@ -57,6 +56,8 @@ def early_stop(self, validation_loss):
         return False
 
 
+
+
 ## load mean and std for normalization
 fm = np.load("../Demodata/mean_demo_sub.npz")
 fs = np.load("../Demodata/std_demo_sub.npz")

From d1cf88e3ff3aea942dc831b48331e252a105d05a Mon Sep 17 00:00:00 2001
From: Surbhi Goel <surbhigoel77@github.com>
Date: Fri, 8 Mar 2024 10:35:58 +0000
Subject: [PATCH 07/35] Reverted the changes introduced by 9dde06f

---
 README.md | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/README.md b/README.md
index c9c7949..e1e06bb 100644
--- a/README.md
+++ b/README.md
@@ -1,9 +1,3 @@
-# Overview
-The repository contains the code to train a neural network to emulate the gravity wave drag (GWD) in the WACCM simulation.
-The code aims trains a pytorch Feed Forward network (FF)
-
-
-
 # newCAM-Emulation
 
 This is a DNN written with PyTorch to Emulate the gravity wave drag (GWD, both zonal and meridional) in the CAM model.

From a289544ed15033fc6b01c70d11d38a3cd6ebc0ce Mon Sep 17 00:00:00 2001
From: Surbhi Goel <surbhigoel77@github.com>
Date: Sun, 17 Mar 2024 21:47:56 +0000
Subject: [PATCH 08/35] Adding normalisation in the model definition

---
 newCAM_emulation/Model.py | 127 +++++++++-----------------------------
 1 file changed, 30 insertions(+), 97 deletions(-)

diff --git a/newCAM_emulation/Model.py b/newCAM_emulation/Model.py
index 91cf5be..7077939 100644
--- a/newCAM_emulation/Model.py
+++ b/newCAM_emulation/Model.py
@@ -10,7 +10,6 @@
 from torch.utils.data import DataLoader, Dataset
 
 
-
 # Required for feeding the data iinto NN.
 class myDataset(Dataset):
     """
@@ -39,6 +38,16 @@ def __getitem__(self, idx):
 
 
 # The NN model.
+class NormalizationLayer(nn.Module):
+    def __init__(self, mean, std):
+        super(NormalizationLayer, self).__init__()
+        self.mean = mean
+        self.std = std
+
+    def forward(self, x):
+        return (x - self.mean) / self.std
+
+
 class FullyConnected(nn.Module):
     """
     Fully connected neural network model.
@@ -50,110 +59,34 @@ class FullyConnected(nn.Module):
         linear_stack (torch.nn.Sequential): Sequential container for layers.
     """
 
-    def __init__(self):
+    def __init__(self, ilev, mean, std):
         """Create an instance of FullyConnected NN model."""
         super(FullyConnected, self).__init__()
-        ilev = 93
-
-        self.linear_stack = nn.Sequential(
-            nn.Linear(8 * ilev + 4, 500, dtype=torch.float64),
-            nn.SiLU(),
-            nn.Linear(500, 500, dtype=torch.float64),
-            nn.SiLU(),
-            nn.Linear(500, 500, dtype=torch.float64),
-            nn.SiLU(),
-            nn.Linear(500, 500, dtype=torch.float64),
-            nn.SiLU(),
-            nn.Linear(500, 500, dtype=torch.float64),
-            nn.SiLU(),
-            nn.Linear(500, 500, dtype=torch.float64),
-            nn.SiLU(),
-            nn.Linear(500, 500, dtype=torch.float64),
-            nn.SiLU(),
-            nn.Linear(500, 500, dtype=torch.float64),
-            nn.SiLU(),
-            nn.Linear(500, 500, dtype=torch.float64),
-            nn.SiLU(),
-            nn.Linear(500, 500, dtype=torch.float64),
-            nn.SiLU(),
-            nn.Linear(500, 500, dtype=torch.float64),
-            nn.SiLU(),
-            nn.Linear(500, 500, dtype=torch.float64),
-            nn.SiLU(),
-            nn.Linear(500, 2 * ilev, dtype=torch.float64),
-        )
-
-    def forward(self, X):
+        self.normalization = NormalizationLayer(mean, std)
+        self.ilev = ilev
+
+        layers = []
+        layers.append(nn.Linear(8 * ilev + 4, 500))
+        layers.append(nn.SiLU())
+
+        num_layers = 10  # Example: Change this to the desired number of hidden layers
+        for _ in range(num_layers):
+            layers.append(nn.Linear(500, 500))
+            layers.append(nn.SiLU())
+
+        layers.append(nn.Linear(500, 2 * ilev))
+        self.linear_stack = nn.Sequential(*layers)
+
+    def forward(self, x):
         """
         Forward pass through the network.
 
         Args:
-            X (torch.Tensor): Input tensor.
+            x (torch.Tensor): Input tensor.
 
         Returns
         -------
             torch.Tensor: Output tensor.
         """
-        return self.linear_stack(X)
-
-
-# training loop
-def train_loop(dataloader, model, loss_fn, optimizer):
-    """
-    Training loop.
-
-    Args:
-        dataloader (DataLoader): DataLoader for training data.
-        model (nn.Module): Neural network model.
-        loss_fn (torch.nn.Module): Loss function.
-        optimizer (torch.optim.Optimizer): Optimizer.
-
-    Returns
-    -------
-        float: Average training loss.
-    """
-    size = len(dataloader.dataset)
-    avg_loss = 0
-    for batch, (X, Y) in enumerate(dataloader):
-        # Compute prediction and loss
-        pred = model(X)
-        loss = loss_fn(pred, Y)
-
-        # Backpropagation
-        optimizer.zero_grad(set_to_none=True)
-        loss.backward()
-        optimizer.step()
-
-        with torch.no_grad():
-            avg_loss += loss.item()
-
-    avg_loss /= len(dataloader)
-
-    return avg_loss
-
-
-# validating loop
-def val_loop(dataloader, model, loss_fn):
-    """
-    Validation loop.
-
-    Args:
-        dataloader (DataLoader): DataLoader for validation data.
-        model (nn.Module): Neural network model.
-        loss_fn (torch.nn.Module): Loss function.
-
-    Returns
-    -------
-        float: Average validation loss.
-    """
-    avg_loss = 0
-    with torch.no_grad():
-        for batch, (X, Y) in enumerate(dataloader):
-            # Compute prediction and loss
-            pred = model(X)
-            loss = loss_fn(pred, Y)
-            avg_loss += loss.item()
-
-    avg_loss /= len(dataloader)
-
-    return avg_loss
+        x = self.normalization(x)
+        return self.linear_stack(x)

From a080959c730fbe59a720f5f13521b0c3b55677cd Mon Sep 17 00:00:00 2001
From: Surbhi Goel <surbhigoel77@github.com>
Date: Sun, 17 Mar 2024 22:59:57 +0000
Subject: [PATCH 09/35] created convection subfolder in demodata and moving
 reading of data files to loaddata from train.py

---
 newCAM_emulation/loaddata.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/newCAM_emulation/loaddata.py b/newCAM_emulation/loaddata.py
index 859bf26..aaa4d0b 100644
--- a/newCAM_emulation/loaddata.py
+++ b/newCAM_emulation/loaddata.py
@@ -77,3 +77,14 @@ def data_loader (U,V,T, DSE, NM, NETDT, Z3, RHOI, PS, lat, lon, UTGWSPEC, VTGWSP
 
   return x_train,y_train
 
+
+"""Read the data and the corresponding mean and std deviation"""
+"""Iterating through the data files"""
+s_list = list(range(1, 6))
+
+for iter in s_list:
+    filename = "Demodata/Convection/newCAM_demo_sub_" + str(iter).zfill(1) + ".nc"  # data file
+    print('working on: ', filename)
+    fm = np.load('Demodata/mean_demo_sub.npz')  # mean file
+    fs = np.load('Demodata/std_demo_sub.npz')   # std deviation file
+

From 2c1de8bf0f2cd30bf92d273090a02ceb1d278669 Mon Sep 17 00:00:00 2001
From: Surbhi Goel <surbhigoel77@github.com>
Date: Sun, 17 Mar 2024 23:14:31 +0000
Subject: [PATCH 10/35] changed data_loader function

---
 newCAM_emulation/loaddata.py | 114 ++++++++++-------------------------
 1 file changed, 33 insertions(+), 81 deletions(-)

diff --git a/newCAM_emulation/loaddata.py b/newCAM_emulation/loaddata.py
index aaa4d0b..f2b8cae 100644
--- a/newCAM_emulation/loaddata.py
+++ b/newCAM_emulation/loaddata.py
@@ -3,88 +3,40 @@
 import numpy as np
 
 ilev = 93
-dim_NN =int(8*ilev+4)
-dim_NNout =int(2*ilev)
+dim_NN = int(8 * ilev + 4)
+dim_NNout = int(2 * ilev)
 
-def newnorm(var, varm, varstd):
-  """Normalizes the input variable(s) using mean and standard deviation.
 
-  Args:
-      var (numpy.ndarray): Input variable(s) to be normalized.
-      varm (numpy.ndarray): Mean of the variable(s).
-      varstd (numpy.ndarray): Standard deviation of the variable(s).
-
-  Returns
-  -------
-      numpy.ndarray: Normalized variable(s).
-  """
-  dim=varm.size
-  if dim > 1 :
-    vara = var - varm[:, :]
-    varstdmax = varstd
-    varstdmax[varstd==0.0] = 1.0
-    tmp = vara / varstdmax[:, :]
-  else:
-    tmp = ( var - varm ) / varstd
-  return tmp
-
-
-def data_loader (U,V,T, DSE, NM, NETDT, Z3, RHOI, PS, lat, lon, UTGWSPEC, VTGWSPEC):
-  """
-  Loads and preprocesses input data for neural network training.
-
-  Args:
-      U (numpy.ndarray): Zonal wind component.
-      V (numpy.ndarray): Meridional wind component.
-      T (numpy.ndarray): Temperature.
-      DSE (numpy.ndarray): Dry static energy.
-      NM (numpy.ndarray): Northward mass flux.
-      NETDT (numpy.ndarray): Net downward total radiation flux.
-      Z3 (numpy.ndarray): Geopotential height.
-      RHOI (numpy.ndarray): Air density.
-      PS (numpy.ndarray): Surface pressure.
-      lat (numpy.ndarray): Latitude.
-      lon (numpy.ndarray): Longitude.
-      UTGWSPEC (numpy.ndarray): Target zonal wind spectral component.
-      VTGWSPEC (numpy.ndarray): Target meridional wind spectral component.
-
-  Returns
-  -------
-      tuple: A tuple containing the input data and target data arrays.
-  """
-  Ncol = U.shape[1]
-  #Nlon = U.shape[2]
-  #Ncol = Nlat*Nlon
-
-  x_train = np.zeros([dim_NN,Ncol])
-  y_train = np.zeros([dim_NNout,Ncol])
-
-
-  x_train [0:ilev, : ] = U.reshape(ilev, Ncol)
-  x_train [ilev:2*ilev, :] = V.reshape(ilev, Ncol)
-  x_train [2*ilev:3*ilev,:] = T.reshape(ilev, Ncol)
-  x_train [3*ilev:4*ilev, :] = DSE.reshape(ilev, Ncol)
-  x_train [4*ilev:5*ilev, :] = NM.reshape(ilev, Ncol)
-  x_train [5*ilev:6*ilev, :] = NETDT.reshape(ilev, Ncol)
-  x_train [6*ilev:7*ilev, :] = Z3.reshape(ilev, Ncol)
-  x_train [7*ilev:8*ilev+1, :] = RHOI.reshape(ilev+1, Ncol)
-  x_train [8*ilev+1:8*ilev+2, :] = PS.reshape(1, Ncol)
-  x_train [8*ilev+2:8*ilev+3, :] = lat.reshape(1, Ncol)
-  x_train [8*ilev+3:ilev*ilev+4, :] = lon.reshape(1, Ncol)
-
-  y_train [0:ilev, :] = UTGWSPEC.reshape(ilev, Ncol)
-  y_train [ilev:2*ilev, :] = VTGWSPEC.reshape(ilev, Ncol)
-
-  return x_train,y_train
-
-
-"""Read the data and the corresponding mean and std deviation"""
-"""Iterating through the data files"""
+# Iterating through the data files
 s_list = list(range(1, 6))
-
 for iter in s_list:
-    filename = "Demodata/Convection/newCAM_demo_sub_" + str(iter).zfill(1) + ".nc"  # data file
-    print('working on: ', filename)
-    fm = np.load('Demodata/mean_demo_sub.npz')  # mean file
-    fs = np.load('Demodata/std_demo_sub.npz')   # std deviation file
-
+    filename = (
+        "Demodata/Convection/newCAM_demo_sub_" + str(iter).zfill(1) + ".nc"
+    )  # data file
+    print("working on: ", filename)
+    fm = np.load("Demodata/mean_demo_sub.npz")  # mean file
+    fs = np.load("Demodata/std_demo_sub.npz")  # std deviation file
+
+
+def data_loader(U, V, T, DSE, NM, NETDT, Z3, RHOI, PS, lat, lon, UTGWSPEC, VTGWSPEC):
+    """Function to iterate over the data read by the above part of code"""
+    Ncol = U.shape[1]
+    x_train = np.vstack(
+        [
+            U.reshape(-1, Ncol),
+            V.reshape(-1, Ncol),
+            T.reshape(-1, Ncol),
+            DSE.reshape(-1, Ncol),
+            NM.reshape(-1, Ncol),
+            NETDT.reshape(-1, Ncol),
+            Z3.reshape(-1, Ncol),
+            RHOI.reshape(-1, Ncol),
+            PS.reshape(1, Ncol),
+            lat.reshape(1, Ncol),
+            lon.reshape(1, Ncol),
+        ]
+    )
+
+    y_train = np.vstack([UTGWSPEC.reshape(-1, Ncol), VTGWSPEC.reshape(-1, Ncol)])
+
+    return x_train, y_train

From 181c3bc86f91b73adc0f8e949b36151b36a3d06d Mon Sep 17 00:00:00 2001
From: Surbhi Goel <surbhigoel77@github.com>
Date: Tue, 14 May 2024 21:34:32 +0100
Subject: [PATCH 11/35] Updated the notebook - work in progess

---
 newCAM_emulation/GW.ipynb | 581 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 581 insertions(+)
 create mode 100644 newCAM_emulation/GW.ipynb

diff --git a/newCAM_emulation/GW.ipynb b/newCAM_emulation/GW.ipynb
new file mode 100644
index 0000000..119be08
--- /dev/null
+++ b/newCAM_emulation/GW.ipynb
@@ -0,0 +1,581 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 252,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import netCDF4 as nc\n",
+    "import numpy as np\n",
+    "import os\n",
+    "import re\n",
+    "import torch\n",
+    "from torch import nn\n",
+    "from torch.utils.data import Dataset\n",
+    "from torch.utils.data import DataLoader\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 80,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define variable names and corresponding mean and std values\n",
+    "features = ['PS', 'Z3', 'U', 'V', 'T', 'lat', 'lon', 'DSE', 'RHOI', 'NETDT', 'NM', 'UTGWSPEC', 'VTGWSPEC']\n",
+    "\n",
+    "directory_path = '../Demodata/Convection'\n",
+    "file_path_mean = '../Demodata/Convection/mean_demo_sub.npz'\n",
+    "file_path_std = '../Demodata/Convection/std_demo_sub.npz'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 267,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def load_variables(directory_path, variable_names, startfile, endfile):\n",
+    "    # Define the variable mapping\n",
+    "    variable_mapping = {\n",
+    "        'NM': 'NMBV'\n",
+    "    }\n",
+    "\n",
+    "    # Dictionary to store data for each variable\n",
+    "    variable_data = {}\n",
+    "\n",
+    "    # Pattern to match file names\n",
+    "    pattern = re.compile(r'^newCAM_demo_sub_\\d{startfile,endfile}$')\n",
+    "\n",
+    "    # Iterate over each data file in the directory\n",
+    "    for file_name in os.listdir(directory_path):\n",
+    "        # Check if the file starts with 'newCAM_demo_sub_'\n",
+    "        if file_name.startswith('newCAM_demo_sub_'):\n",
+    "            # Construct the full file path\n",
+    "            file_path = os.path.join(directory_path, file_name)\n",
+    "\n",
+    "            # Load data from the file\n",
+    "            with nc.Dataset(file_path) as dataset:\n",
+    "                # Iterate over each variable name\n",
+    "                for var_name in variable_names:\n",
+    "                    # Check if the variable exists in the dataset\n",
+    "                    mapped_name = variable_mapping.get(var_name, var_name)\n",
+    "                    if mapped_name in dataset.variables:\n",
+    "                        # Read the variable data\n",
+    "                        var_data = dataset[mapped_name][:]\n",
+    "\n",
+    "                        # Store the variable data in the dictionary\n",
+    "                        variable_data[var_name] = var_data\n",
+    "\n",
+    "    return variable_data\n",
+    "\n",
+    "\n",
+    "def load_mean_std(file_path_mean, file_path_std, variable_names):\n",
+    "    \n",
+    "    # Load mean and standard deviation files\n",
+    "    mean_data = np.load(file_path_mean)\n",
+    "    std_data = np.load(file_path_std)\n",
+    "\n",
+    "    # Define dictionaries to store mean and std for each variable\n",
+    "    mean_dict = {var_name: mean_data[var_name] for var_name in variable_names}\n",
+    "    std_dict = {var_name: std_data[var_name] for var_name in variable_names}\n",
+    "\n",
+    "    return mean_dict, std_dict\n",
+    "\n",
+    "\n",
+    "\n",
+    "def normalize_data(variable_data, mean_values, std_values):\n",
+    "    \n",
+    "    normalized_data = {}\n",
+    "\n",
+    "    # Iterate over each variable in the variable data\n",
+    "    for var_name, var_data in variable_data.items():\n",
+    "        # Check if variable exists in the mean and std dictionaries\n",
+    "        if var_name in mean_values and var_name in std_values:\n",
+    "            # Extract mean and std for the variable\n",
+    "            mean = mean_values[var_name]\n",
+    "            std = std_values[var_name]\n",
+    "\n",
+    "            # Perform normalization\n",
+    "            normalized_var_data = (var_data - mean) / std\n",
+    "\n",
+    "            # Store normalized data\n",
+    "            normalized_data[var_name] = normalized_var_data\n",
+    "\n",
+    "    return normalized_data\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 293,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Variable: PS  Shape: (1, 4419)\n",
+      "Variable: Z3  Shape: (1, 93, 4419)\n",
+      "Variable: U  Shape: (1, 93, 4419)\n",
+      "Variable: V  Shape: (1, 93, 4419)\n",
+      "Variable: T  Shape: (1, 93, 4419)\n",
+      "Variable: lat  Shape: (1, 4419)\n",
+      "Variable: lon  Shape: (1, 4419)\n",
+      "Variable: DSE  Shape: (1, 93, 4419)\n",
+      "Variable: RHOI  Shape: (1, 94, 4419)\n",
+      "Variable: NETDT  Shape: (1, 93, 4419)\n",
+      "Variable: NM  Shape: (1, 93, 4419)\n",
+      "Variable: UTGWSPEC  Shape: (1, 93, 4419)\n",
+      "Variable: VTGWSPEC  Shape: (1, 93, 4419)\n"
+     ]
+    }
+   ],
+   "source": [
+    "variable_data = load_variables(directory_path, features, 1, 5)\n",
+    "# print(f'Data variables: {variable_data.keys()}')\n",
+    "mean_dict, std_dict = load_mean_std(file_path_mean, file_path_std, features)\n",
+    "# print(f'Mean variables: {mean_dict.keys()}')\n",
+    "# print(f'Std variables: {std_dict.keys()}')\n",
+    "normalized_data = normalize_data(variable_data, mean_dict, std_dict)\n",
+    "# print(f'Normalised variables: {normalized_data.keys()}')\n",
+    "\n",
+    "\n",
+    "for var_name, var_data in normalized_data.items():\n",
+    "    # Get the shape of the variable data\n",
+    "    var_shape = var_data.shape if isinstance(var_data, np.ndarray) else \"Not an array\"\n",
+    "    print(\"Variable:\", var_name, \" Shape:\", var_shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Correct NCol = 4419\n",
+    "ilev = 93\n",
+    "\n",
+    "Points to be considered:\n",
+    "1. Some variables are different dimensions, varying over different levels (ilev=93/94 here)\n",
+    "2. These levels cause the input dimensions to become large (8 variables, each with 93 instances (i.e.varying across 93 vertical levels), and 4 variables not varying across the 93 levels.)\n",
+    "3. Both Input and Ouput variables have 93 levels"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 256,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def data_loader(variable_names, normalized_data, ilev):\n",
+    "    # Determine the shape of the data\n",
+    "    Ncol = normalized_data[variable_names[1]].shape[2]\n",
+    "    # print(f'ilev= {ilev} and Ncol={Ncol}')\n",
+    "\n",
+    "    # Initialize x_train and y_train arrays\n",
+    "    # Calculate dim_NN and dim_NNout\n",
+    "    dim_NN = int(8 * ilev + 4) # 8 variables varying over 93 levels, 4 constant variables (lat, long, PS )\n",
+    "    dim_NNout = int(2 * ilev) #(UTGWSPEC, VTGWSPEC)\n",
+    "\n",
+    "    # Initialize x_train and y_train arrays\n",
+    "    x_train = np.zeros([dim_NN, Ncol])\n",
+    "    y_train = np.zeros([dim_NNout, Ncol])\n",
+    "\n",
+    "    # print(f'Set xtrain shape{x_train.shape}')\n",
+    "    # print(f'Set ytrain shape{y_train.shape}')\n",
+    "    target_var = ['UTGWSPEC','VTGWSPEC']\n",
+    "\n",
+    "    # Assign variables to x_train\n",
+    "    y_index = 0\n",
+    "    x_index = 0\n",
+    "    for var_name, var_data in normalized_data.items():\n",
+    "        var_shape = var_data.shape\n",
+    "\n",
+    "        if var_name in target_var:\n",
+    "            # print(var_name, y_index\n",
+    "            y_train[y_index * ilev:(y_index + 1) * ilev, :] = var_data.reshape(ilev, Ncol)\n",
+    "            y_index +=1\n",
+    "         \n",
+    "        elif len(var_shape) == 2:  # For 2D variables\n",
+    "            #  print(var_name, x_index)\n",
+    "             x_train[x_index, :] = var_data\n",
+    "          \n",
+    "        elif len(var_shape) == 3:\n",
+    "            new_ilev = var_shape[1]\n",
+    "            # print(var_name, x_index)\n",
+    "            x_train[x_index:x_index + new_ilev, :] = var_data ### Issue here in extracting variables level-wise because of difference in levels\n",
+    "        x_index+=1\n",
+    "\n",
+    "    return x_train, y_train\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 294,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(748, 4419) (186, 4419)\n"
+     ]
+    }
+   ],
+   "source": [
+    "xtrain, ytrain = data_loader(features, normalized_data, ilev=93)\n",
+    "print(xtrain.shape, ytrain.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 258,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Required for feeding the data into NN.\n",
+    "class myDataset(Dataset):\n",
+    "    def __init__(self, X, Y):\n",
+    "        \"\"\"\n",
+    "        Parameters:\n",
+    "            X (tensor): Input data.\n",
+    "            Y (tensor): Output data.\n",
+    "        \"\"\"\n",
+    "        self.features = torch.tensor(X, dtype=torch.float64)\n",
+    "        self.labels = torch.tensor(Y, dtype=torch.float64)\n",
+    "\n",
+    "    def __len__(self):\n",
+    "        \"\"\"Function that is called when you call len(dataloader)\"\"\"\n",
+    "        return len(self.features.T)\n",
+    "\n",
+    "    def __getitem__(self, idx):\n",
+    "        \"\"\"Function that is called when you call dataloader\"\"\"\n",
+    "        feature = self.features[:, idx]\n",
+    "        label = self.labels[:, idx]\n",
+    "\n",
+    "        return feature, label\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 259,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data = myDataset(X=xtrain, Y=ytrain)\n",
+    "split_data = torch.utils.data.random_split(data, [0.75, 0.25],generator=torch.Generator().manual_seed(42))\n",
+    "\n",
+    "train_dataloader = DataLoader(split_data[0], batch_size=128, shuffle=True)\n",
+    "val_dataloader = DataLoader(split_data[1], batch_size=len(split_data[1]), shuffle=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 260,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class FullyConnected(nn.Module):\n",
+    "    def __init__(self):\n",
+    "        \"\"\"Create an instance of FullyConnected NN model.\"\"\"\n",
+    "        super(FullyConnected, self).__init__()\n",
+    "        ilev = 93\n",
+    "        hidden_layers = 8  # Number of hidden layers\n",
+    "        hidden_size = 500  # Number of neurons in each hidden layer\n",
+    "\n",
+    "        layers = []\n",
+    "        input_size = 8 * ilev + 4\n",
+    "        for _ in range(hidden_layers):\n",
+    "            layers.append(nn.Linear(input_size, hidden_size, dtype=torch.float64))\n",
+    "            layers.append(nn.SiLU())\n",
+    "            input_size = hidden_size\n",
+    "\n",
+    "        layers.append(nn.Linear(hidden_size, 2 * ilev, dtype=torch.float64))\n",
+    "\n",
+    "        self.linear_stack = nn.Sequential(*layers)\n",
+    "\n",
+    "    def forward(self, X):\n",
+    "        return self.linear_stack(X)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 261,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class EarlyStopper:\n",
+    "    def __init__(self, patience=1, min_delta=0):\n",
+    "        \"\"\"Create an instance of EarlyStopper class.\"\"\"\n",
+    "        self.patience = patience\n",
+    "        self.min_delta = min_delta\n",
+    "        self.counter = 0\n",
+    "        self.min_validation_loss = np.inf\n",
+    "\n",
+    "    def early_stop(self, validation_loss, model=None):\n",
+    "        \"\"\"\n",
+    "        Check if early stopping condition is met.\n",
+    "\n",
+    "        Args:\n",
+    "            validation_loss (float): Loss value on the validation set.\n",
+    "            model (nn.Module, optional): Model to be saved if early stopping condition is met.\n",
+    "\n",
+    "        Returns\n",
+    "        -------\n",
+    "            bool: True if early stopping condition is met, False otherwise.\n",
+    "        \"\"\"\n",
+    "        if validation_loss < self.min_validation_loss:\n",
+    "            self.min_validation_loss = validation_loss\n",
+    "            self.counter = 0\n",
+    "\n",
+    "            # Save model\n",
+    "            if model is not None:\n",
+    "                torch.save(model.state_dict(), 'conv_torch.pth')\n",
+    "\n",
+    "        elif validation_loss > (self.min_validation_loss + self.min_delta):\n",
+    "            self.counter += 1\n",
+    "            if self.counter >= self.patience:\n",
+    "                return True\n",
+    "        return False\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 262,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def train_loop(dataloader, model, loss_fn, optimizer):\n",
+    "    size = len(dataloader.dataset)\n",
+    "    avg_loss = 0\n",
+    "    for batch, (X, Y) in enumerate(dataloader):\n",
+    "        # Compute prediction and loss\n",
+    "        pred = model(X)\n",
+    "        loss = loss_fn(pred, Y)\n",
+    "\n",
+    "        # Backpropagation\n",
+    "        optimizer.zero_grad(set_to_none=True)\n",
+    "        loss.backward()\n",
+    "        optimizer.step()\n",
+    "\n",
+    "        avg_loss += loss.item()  # Accumulate loss as a float\n",
+    "\n",
+    "    avg_loss /= len(dataloader)\n",
+    "\n",
+    "    return avg_loss\n",
+    "\n",
+    "\n",
+    "\n",
+    "def val_loop(dataloader, model, loss_fn):\n",
+    "    avg_loss = sum(loss_fn(model(X), Y).item() for X, Y in dataloader) / len(dataloader)\n",
+    "    return avg_loss"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 263,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def train_with_early_stopping(train_dataloader, val_dataloader, model, optimizer, criterion, early_stopper, epochs=100):\n",
+    "    train_losses = []\n",
+    "    val_losses = [0]\n",
+    "\n",
+    "    for epoch in range(epochs):\n",
+    "        if epoch % 2 == 0:\n",
+    "            print(f\"Epoch {epoch+1}\\n-------------------------------\")\n",
+    "            print(val_losses[-1])\n",
+    "            print('counter=' + str(early_stopper.counter))\n",
+    "\n",
+    "        train_loss = train_loop(train_dataloader, model, criterion, optimizer)\n",
+    "        train_losses.append(train_loss)\n",
+    "\n",
+    "        val_loss = val_loop(val_dataloader, model, criterion)\n",
+    "        val_losses.append(val_loss)\n",
+    "\n",
+    "        if early_stopper.early_stop(val_loss, model):\n",
+    "            print(\"BREAK!\")\n",
+    "            break\n",
+    "\n",
+    "    return train_losses, val_losses\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 264,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1\n",
+      "-------------------------------\n",
+      "0\n",
+      "counter=0\n",
+      "Epoch 3\n",
+      "-------------------------------\n",
+      "0.8489934648869807\n",
+      "counter=0\n",
+      "Epoch 5\n",
+      "-------------------------------\n",
+      "0.848900956575377\n",
+      "counter=0\n",
+      "Epoch 7\n",
+      "-------------------------------\n",
+      "0.8488277427356834\n",
+      "counter=0\n",
+      "Epoch 9\n",
+      "-------------------------------\n",
+      "0.8487524616774139\n",
+      "counter=0\n",
+      "Epoch 11\n",
+      "-------------------------------\n",
+      "0.8486892791301094\n",
+      "counter=0\n",
+      "Epoch 13\n",
+      "-------------------------------\n",
+      "0.8486358109089807\n",
+      "counter=0\n",
+      "Epoch 15\n",
+      "-------------------------------\n",
+      "0.8485900223018678\n",
+      "counter=0\n",
+      "Epoch 17\n",
+      "-------------------------------\n",
+      "0.8485513591271571\n",
+      "counter=0\n",
+      "Epoch 19\n",
+      "-------------------------------\n",
+      "0.8485374125600498\n",
+      "counter=0\n",
+      "Epoch 21\n",
+      "-------------------------------\n",
+      "0.8485556882976334\n",
+      "counter=2\n",
+      "Epoch 23\n",
+      "-------------------------------\n",
+      "0.8485675317451268\n",
+      "counter=4\n",
+      "BREAK!\n"
+     ]
+    }
+   ],
+   "source": [
+    "learning_rate = 1e-5\n",
+    "epochs = 100\n",
+    "\n",
+    "model = FullyConnected()\n",
+    "optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)\n",
+    "criterion = nn.MSELoss()\n",
+    "early_stopper = EarlyStopper(patience=5, min_delta=0)\n",
+    "\n",
+    "train_losses, val_losses = train_with_early_stopping(train_dataloader, val_dataloader, model, optimizer, criterion, early_stopper, epochs)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 275,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "def predict(input_data, model):\n",
+    "    # Convert input data to tensors\n",
+    "    input_tensors = {key: torch.tensor(value) for key, value in input_data.items()}\n",
+    "\n",
+    "    # Ensure model is in evaluation mode\n",
+    "    model.eval()\n",
+    "\n",
+    "    # Forward pass to make predictions\n",
+    "    with torch.no_grad():\n",
+    "        predictions = model(**input_tensors)\n",
+    "\n",
+    "    return predictions\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 287,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_data = load_variables(directory_path, features, 5,6)\n",
+    "mean_dict, std_dict = load_mean_std(file_path_mean, file_path_std, features)\n",
+    "normalized_test_data = normalize_data(test_data, mean_dict, std_dict)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 284,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<All keys matched successfully>"
+      ]
+     },
+     "execution_count": 284,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model_path = 'conv_torch.pth'\n",
+    "model = FullyConnected()\n",
+    "model.load_state_dict(torch.load(model_path))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 291,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(748, 4419) (186, 4419)\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "x_test, y_test = data_loader(features, normalized_test_data, ilev=93)\n",
+    "print(x_test.shape, y_test.shape)\n",
+    "\n",
+    "test_data = myDataset(X=x_test, Y=y_test)\n",
+    "\n",
+    "test_loader = DataLoader(data, batch_size=len(data), shuffle=False)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From dd921f09ac26ce26a133d07d99c70d9f54d1d9e0 Mon Sep 17 00:00:00 2001
From: Surbhi Goel <surbhigoel77@github.com>
Date: Mon, 17 Jun 2024 13:01:07 +0100
Subject: [PATCH 12/35] Updated loaddata, Model, train files and removed
 NN_pred, and added a main file

---
 newCAM_emulation/Model.py    | 136 +++++++++--------
 newCAM_emulation/loaddata.py | 238 +++++++++++++++++++++++++-----
 newCAM_emulation/main.py     |  67 +++++++++
 newCAM_emulation/train.py    | 274 +++++++++++++----------------------
 4 files changed, 441 insertions(+), 274 deletions(-)
 create mode 100644 newCAM_emulation/main.py

diff --git a/newCAM_emulation/Model.py b/newCAM_emulation/Model.py
index 7077939..096d0cc 100644
--- a/newCAM_emulation/Model.py
+++ b/newCAM_emulation/Model.py
@@ -1,92 +1,104 @@
 """Neural Network model for the CAM-EM."""
 
-import netCDF4 as nc
-import numpy as np
-import scipy.stats as st
 import torch
-import xarray as xr
+import numpy as np
 from torch import nn
-from torch.nn.utils import prune
-from torch.utils.data import DataLoader, Dataset
 
 
-# Required for feeding the data iinto NN.
-class myDataset(Dataset):
+class FullyConnected(nn.Module):
     """
-    Dataset class for loading features and labels.
+    Fully connected neural network model.
 
-    Args:
-        X (numpy.ndarray): Input features.
-        Y (numpy.ndarray): Corresponding labels.
+    Attributes
+    ----------
+    linear_stack : nn.Sequential
+        Sequential container of linear layers and activation functions.
     """
 
-    def __init__(self, X, Y):
-        """Create an instance of myDataset class."""
-        self.features = torch.tensor(X, dtype=torch.float64)
-        self.labels = torch.tensor(Y, dtype=torch.float64)
-
-    def __len__(self):
-        """Return the number of samples in the dataset."""
-        return len(self.features.T)
-
-    def __getitem__(self, idx):
-        """Return a sample from the dataset."""
-        feature = self.features[:, idx]
-        label = self.labels[:, idx]
+    def __init__(self):
+        super(FullyConnected, self).__init__()
+        ilev = 93
+        hidden_layers = 8
+        hidden_size = 500
+        layers = []
 
-        return feature, label
+        input_size = 8 * ilev + 4
+        for _ in range(hidden_layers):
+            layers.append(nn.Linear(input_size, hidden_size, dtype=torch.float64))
+            layers.append(nn.SiLU())
+            input_size = hidden_size
+        layers.append(nn.Linear(hidden_size, 2 * ilev, dtype=torch.float64))
+        self.linear_stack = nn.Sequential(*layers)
 
+    def forward(self, X):
+        """
+        Forward pass through the network.
 
-# The NN model.
-class NormalizationLayer(nn.Module):
-    def __init__(self, mean, std):
-        super(NormalizationLayer, self).__init__()
-        self.mean = mean
-        self.std = std
+        Parameters
+        ----------
+        X : torch.Tensor
+            Input tensor.
 
-    def forward(self, x):
-        return (x - self.mean) / self.std
+        Returns
+        -------
+        torch.Tensor
+            Output tensor.
+        """
+        return self.linear_stack(X)
 
 
-class FullyConnected(nn.Module):
+class EarlyStopper:
     """
-    Fully connected neural network model.
+    Early stopping utility to stop training when validation loss doesn't improve.
 
-    The model consists of multiple fully connected layers with SiLU activation function.
+    Parameters
+    ----------
+    patience : int, optional
+        Number of epochs to wait before stopping (default is 1).
+    min_delta : float, optional
+        Minimum change in the monitored quantity to qualify as an improvement (default is 0).
 
     Attributes
     ----------
-        linear_stack (torch.nn.Sequential): Sequential container for layers.
+    patience : int
+        Number of epochs to wait before stopping.
+    min_delta : float
+        Minimum change in the monitored quantity to qualify as an improvement.
+    counter : int
+        Counter for the number of epochs without improvement.
+    min_validation_loss : float
+        Minimum validation loss recorded.
     """
 
-    def __init__(self, ilev, mean, std):
-        """Create an instance of FullyConnected NN model."""
-        super(FullyConnected, self).__init__()
-        self.normalization = NormalizationLayer(mean, std)
-        self.ilev = ilev
-
-        layers = []
-        layers.append(nn.Linear(8 * ilev + 4, 500))
-        layers.append(nn.SiLU())
-
-        num_layers = 10  # Example: Change this to the desired number of hidden layers
-        for _ in range(num_layers):
-            layers.append(nn.Linear(500, 500))
-            layers.append(nn.SiLU())
+    def __init__(self, patience=1, min_delta=0):
+        self.patience = patience
+        self.min_delta = min_delta
+        self.counter = 0
+        self.min_validation_loss = np.inf
 
-        layers.append(nn.Linear(500, 2 * ilev))
-        self.linear_stack = nn.Sequential(*layers)
-
-    def forward(self, x):
+    def early_stop(self, validation_loss, model=None):
         """
-        Forward pass through the network.
+        Check if training should be stopped early.
 
-        Args:
-            x (torch.Tensor): Input tensor.
+        Parameters
+        ----------
+        validation_loss : float
+            Current validation loss.
+        model : nn.Module, optional
+            Model to save if validation loss improves (default is None).
 
         Returns
         -------
-            torch.Tensor: Output tensor.
+        bool
+            True if training should be stopped, False otherwise.
         """
-        x = self.normalization(x)
-        return self.linear_stack(x)
+        if validation_loss < self.min_validation_loss:
+            self.min_validation_loss = validation_loss
+            self.counter = 0
+            if model is not None:
+                torch.save(model.state_dict(), "conv_torch.pth")
+        elif validation_loss > (self.min_validation_loss + self.min_delta):
+            self.counter += 1
+            if self.counter >= self.patience:
+                return True
+        return False
diff --git a/newCAM_emulation/loaddata.py b/newCAM_emulation/loaddata.py
index f2b8cae..f4a6309 100644
--- a/newCAM_emulation/loaddata.py
+++ b/newCAM_emulation/loaddata.py
@@ -1,42 +1,208 @@
 """Implementing data loader for training neural network."""
 
+import os
+import re
+import torch
 import numpy as np
+import netCDF4 as nc
 
-ilev = 93
-dim_NN = int(8 * ilev + 4)
-dim_NNout = int(2 * ilev)
-
-
-# Iterating through the data files
-s_list = list(range(1, 6))
-for iter in s_list:
-    filename = (
-        "Demodata/Convection/newCAM_demo_sub_" + str(iter).zfill(1) + ".nc"
-    )  # data file
-    print("working on: ", filename)
-    fm = np.load("Demodata/mean_demo_sub.npz")  # mean file
-    fs = np.load("Demodata/std_demo_sub.npz")  # std deviation file
-
-
-def data_loader(U, V, T, DSE, NM, NETDT, Z3, RHOI, PS, lat, lon, UTGWSPEC, VTGWSPEC):
-    """Function to iterate over the data read by the above part of code"""
-    Ncol = U.shape[1]
-    x_train = np.vstack(
-        [
-            U.reshape(-1, Ncol),
-            V.reshape(-1, Ncol),
-            T.reshape(-1, Ncol),
-            DSE.reshape(-1, Ncol),
-            NM.reshape(-1, Ncol),
-            NETDT.reshape(-1, Ncol),
-            Z3.reshape(-1, Ncol),
-            RHOI.reshape(-1, Ncol),
-            PS.reshape(1, Ncol),
-            lat.reshape(1, Ncol),
-            lon.reshape(1, Ncol),
-        ]
-    )
-
-    y_train = np.vstack([UTGWSPEC.reshape(-1, Ncol), VTGWSPEC.reshape(-1, Ncol)])
 
+features = [
+    "PS",
+    "Z3",
+    "U",
+    "V",
+    "T",
+    "lat",
+    "lon",
+    "DSE",
+    "RHOI",
+    "NETDT",
+    "NM",
+    "UTGWSPEC",
+    "VTGWSPEC",
+]
+
+
+def load_variables(directory_path, variable_names, startfile, endfile):
+    """
+    Loads specified variables from NetCDF files in the given directory.
+
+    Parameters
+    ----------
+    directory_path : str
+        Path to the directory containing NetCDF files.
+    variable_names : list of str
+        List of variable names to load.
+    startfile : int
+        Starting file number.
+    endfile : int
+        Ending file number.
+
+    Returns
+    -------
+    dict
+        Dictionary containing loaded variables data.
+    """
+    variable_mapping = {"NM": "NMBV"}
+    variable_data = {}
+    pattern = re.compile(r"^newCAM_demo_sub_\d{startfile,endfile}$")
+
+    for file_name in os.listdir(directory_path):
+        if file_name.startswith("newCAM_demo_sub_"):
+            file_path = os.path.join(directory_path, file_name)
+            with nc.Dataset(file_path) as dataset:
+                for var_name in variable_names:
+                    mapped_name = variable_mapping.get(var_name, var_name)
+                    if mapped_name in dataset.variables:
+                        var_data = dataset[mapped_name][:]
+                        variable_data[var_name] = var_data
+
+    return variable_data
+
+
+def load_mean_std(file_path_mean, file_path_std, variable_names):
+    """
+    Loads mean and standard deviation values for specified variables from files.
+
+    Parameters
+    ----------
+    file_path_mean : str
+        Path to the file containing mean values.
+    file_path_std : str
+        Path to the file containing standard deviation values.
+    variable_names : list of str
+        List of variable names.
+
+    Returns
+    -------
+    tuple of dict
+        Dictionaries containing mean and standard deviation values.
+    """
+    mean_data = np.load(file_path_mean)
+    std_data = np.load(file_path_std)
+    mean_dict = {var_name: mean_data[var_name] for var_name in variable_names}
+    std_dict = {var_name: std_data[var_name] for var_name in variable_names}
+    return mean_dict, std_dict
+
+
+def normalize_data(variable_data, mean_values, std_values):
+    """
+    Normalizes the data using mean and standard deviation values.
+
+    Parameters
+    ----------
+    variable_data : dict
+        Dictionary containing the variable data.
+    mean_values : dict
+        Dictionary containing mean values.
+    std_values : dict
+        Dictionary containing standard deviation values.
+
+    Returns
+    -------
+    dict
+        Dictionary containing normalized data.
+    """
+    normalized_data = {}
+    for var_name, var_data in variable_data.items():
+        if var_name in mean_values and var_name in std_values:
+            mean = mean_values[var_name]
+            std = std_values[var_name]
+            normalized_var_data = (var_data - mean) / std
+            normalized_data[var_name] = normalized_var_data
+    return normalized_data
+
+
+def data_loader(variable_names, normalized_data, ilev):
+    """
+    Prepares the data for training by organizing it into input and output arrays.
+
+    Parameters
+    ----------
+    variable_names : list of str
+        List of variable names.
+    normalized_data : dict
+        Dictionary containing normalized data.
+    ilev : int
+        Number of vertical levels.
+
+    Returns
+    -------
+    tuple of np.ndarray
+        Input and output arrays for training.
+    """
+    Ncol = normalized_data[variable_names[1]].shape[2]
+    dim_NN = int(8 * ilev + 4)
+    dim_NNout = int(2 * ilev)
+    x_train = np.zeros([dim_NN, Ncol])
+    y_train = np.zeros([dim_NNout, Ncol])
+    target_var = ["UTGWSPEC", "VTGWSPEC"]
+    y_index = 0
+    x_index = 0
+    for var_name, var_data in normalized_data.items():
+        var_shape = var_data.shape
+        if var_name in target_var:
+            y_train[y_index * ilev : (y_index + 1) * ilev, :] = var_data.reshape(
+                ilev, Ncol
+            )
+            y_index += 1
+        elif len(var_shape) == 2:
+            x_train[x_index, :] = var_data
+        elif len(var_shape) == 3:
+            new_ilev = var_shape[1]
+            x_train[x_index : x_index + new_ilev, :] = var_data
+        x_index += 1
     return x_train, y_train
+
+
+class MyDataset(torch.utils.data.Dataset):
+    """
+    Custom Dataset for loading features and labels.
+
+    Parameters
+    ----------
+    X : np.ndarray
+        Feature data.
+    Y : np.ndarray
+        Label data.
+
+    Attributes
+    ----------
+    features : torch.Tensor
+        Tensor containing the feature data.
+    labels : torch.Tensor
+        Tensor containing the label data.
+    """
+
+    def __init__(self, X, Y):
+        self.features = torch.tensor(X, dtype=torch.float64)
+        self.labels = torch.tensor(Y, dtype=torch.float64)
+
+    def __len__(self):
+        """
+        Returns the length of the dataset.
+        Returns
+        -------
+        int
+            Length of the dataset.
+        """
+        return len(self.features.T)
+
+    def __getitem__(self, idx):
+        """
+        Returns a single sample from the dataset.
+
+        Parameters
+        ----------
+        idx : int
+            Index of the sample.
+
+        Returns
+        -------
+        tuple of torch.Tensor
+            Feature and label tensors for the sample.
+        """
+        feature = self.features[:, idx]
+        label = self.labels[:, idx]
+        return feature, label
diff --git a/newCAM_emulation/main.py b/newCAM_emulation/main.py
new file mode 100644
index 0000000..d0cb791
--- /dev/null
+++ b/newCAM_emulation/main.py
@@ -0,0 +1,67 @@
+import os
+import numpy as np
+import torch
+from torch.utils.data import DataLoader
+import torch.nn as nn
+from loaddata import load_variables, load_mean_std, normalize_data, data_loader, MyDataset, features
+from Model import FullyConnected, EarlyStopper
+from train import train_with_early_stopping
+
+# File paths and parameters
+directory_path = '../Demodata/Convection'
+file_path_mean = '../Demodata/Convection/mean_demo_sub.npz'
+file_path_std = '../Demodata/Convection/std_demo_sub.npz'
+trained_model_path = 'conv_torch.pth'  # Path to save and load the trained model
+
+# Load and preprocess data
+variable_data = load_variables(directory_path, features, 1, 5)
+mean_dict, std_dict = load_mean_std(file_path_mean, file_path_std, features)
+normalized_data = normalize_data(variable_data, mean_dict, std_dict)
+xtrain, ytrain = data_loader(features, normalized_data, ilev=93)
+
+
+# Print the shapes of xtrain and ytrain
+print(f"xtrain shape: {xtrain.shape}")
+print(f"ytrain shape: {ytrain.shape}")
+
+
+# Prepare dataset and dataloaders
+data = MyDataset(X=xtrain, Y=ytrain)
+split_data = torch.utils.data.random_split(data, [0.75, 0.25], generator=torch.Generator().manual_seed(42))
+train_dataloader = DataLoader(split_data[0], batch_size=128, shuffle=True)
+val_dataloader = DataLoader(split_data[1], batch_size=len(split_data[1]), shuffle=True)
+
+# Model training parameters
+learning_rate = 1e-5
+epochs = 100
+model = FullyConnected()
+optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
+criterion = nn.MSELoss()
+early_stopper = EarlyStopper(patience=5, min_delta=0)
+
+# Train the model with early stopping
+train_losses, val_losses = train_with_early_stopping(train_dataloader, val_dataloader, model, optimizer, criterion, early_stopper, epochs=epochs)
+
+# Save the trained model
+torch.save(model.state_dict(), trained_model_path)
+
+# Load the trained model for prediction
+model.load_state_dict(torch.load(trained_model_path))
+model.eval()
+
+# Prepare input data for prediction
+# For prediction, we need new input data. Here, we use different files for simplicity.
+test_data = load_variables(directory_path, features, 4, 5)
+normalized_test_data = normalize_data(test_data, mean_dict, std_dict)
+x_test, y_test = data_loader(features, normalized_test_data, ilev=93)
+
+# Convert test data to tensors
+x_test_tensor = torch.tensor(x_test, dtype=torch.float64).T
+
+# Make predictions
+with torch.no_grad():
+    predictions = model(x_test_tensor).numpy()
+
+# Print predictions
+print("Predictions Shape:\n", predictions.shape)
+# print("Predictions:\n", predictions)
diff --git a/newCAM_emulation/train.py b/newCAM_emulation/train.py
index 3f44029..3f084ad 100644
--- a/newCAM_emulation/train.py
+++ b/newCAM_emulation/train.py
@@ -1,188 +1,110 @@
 """Training script for the neural network."""
 
-import Model
-import netCDF4 as nc
-import numpy as np
-
 import torch
-from loaddata import data_loader, newnorm
 from torch import nn
-from torch.backends import mps
-from torch.cuda import is_available
-from torch.utils.data import DataLoader
-from torch.cuda import is_available
-from torch.backends import mps
-if is_available():
-    DEVICE = "cuda"
-elif mps.is_available():
-    DEVICE = "mps"
-else:
-    DEVICE = "cpu"
-print(f"Using device: {DEVICE}")
-
-
-class EarlyStopper:
-    """Class for implementing early stopping during training."""
 
-    def __init__(self, patience=1, min_delta=0):
-        """Create an instance of EarlyStopper class."""
-        self.patience = patience
-        self.min_delta = min_delta
-        self.counter = 0
-        self.min_validation_loss = np.inf
 
-    def early_stop(self, validation_loss):
-        """
-        Check if early stopping condition is met.
-
-        Args:
-            validation_loss (float): Loss value on the validation set.
+def train_loop(dataloader, model, loss_fn, optimizer):
+    """
+    Training loop for a single epoch.
+
+    Parameters
+    ----------
+    dataloader : torch.utils.data.DataLoader
+        DataLoader for the training data.
+    model : nn.Module
+        Neural network model.
+    loss_fn : callable
+        Loss function.
+    optimizer : torch.optim.Optimizer
+        Optimizer for training.
+
+    Returns
+    -------
+    float
+        Average training loss.
+    """
+    avg_loss = 0
+    for batch, (X, Y) in enumerate(dataloader):
+        pred = model(X)
+        loss = loss_fn(pred, Y)
+        optimizer.zero_grad(set_to_none=True)
+        loss.backward()
+        optimizer.step()
+        avg_loss += loss.item()
+    avg_loss /= len(dataloader)
+    return avg_loss
+
+
+def val_loop(dataloader, model, loss_fn):
+    """
+        Validation loop for a single epoch.
+    >>>>>>> 70b2c64 (Updated loaddata, Model, train files and removed NN_pred, and added a main file)
+
+        Parameters
+        ----------
+        dataloader : torch.utils.data.DataLoader
+            DataLoader for the validation data.
+        model : nn.Module
+            Neural network model.
+        loss_fn : callable
+            Loss function.
 
         Returns
         -------
-            bool: True if early stopping condition is met, False otherwise.
-        """
-        if validation_loss < self.min_validation_loss:
-            self.min_validation_loss = validation_loss
-            self.counter = 0
-
-            # save model
-            torch.save(model.state_dict(), "conv_torch.pth")
-
-        elif validation_loss > (self.min_validation_loss + self.min_delta):
-            self.counter += 1
-            if self.counter >= self.patience:
-                return True
-        return False
-
-
-
-
-## load mean and std for normalization
-fm = np.load("../Demodata/mean_demo_sub.npz")
-fs = np.load("../Demodata/std_demo_sub.npz")
-
-Um = fm["U"]
-Vm = fm["V"]
-Tm = fm["T"]
-DSEm = fm["DSE"]
-NMm = fm["NM"]
-NETDTm = fm["NETDT"]
-Z3m = fm["Z3"]
-RHOIm = fm["RHOI"]
-PSm = fm["PS"]
-latm = fm["lat"]
-lonm = fm["lon"]
-UTGWSPECm = fm["UTGWSPEC"]
-VTGWSPECm = fm["VTGWSPEC"]
-
-Us = fs["U"]
-Vs = fs["V"]
-Ts = fs["T"]
-DSEs = fs["DSE"]
-NMs = fs["NM"]
-NETDTs = fs["NETDT"]
-Z3s = fs["Z3"]
-RHOIs = fs["RHOI"]
-PSs = fs["PS"]
-lats = fs["lat"]
-lons = fs["lon"]
-UTGWSPECs = fs["UTGWSPEC"]
-VTGWSPECs = fs["VTGWSPEC"]
-
-ilev = 93
-
-dim_NN = int(8 * ilev + 4)
-dim_NNout = int(2 * ilev)
-
-model = Model.FullyConnected()
-
-train_losses = []
-val_losses = [0]
-
-learning_rate = 1e-5
-epochs = 100
-optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  # weight_decay=1e-5
-
-
-s_list = list(range(1, 6))
-
-for iter in s_list:
-    if iter > 1:
-        model.load_state_dict(torch.load("conv_torch.pth"))
-    print("data loader iteration", iter)
-    filename = "../Demodata/newCAM_demo_sub_" + str(iter).zfill(1) + ".nc"
-    print("working on: ", filename)
-
-    F = nc.Dataset(filename)
-
-    PS = np.asarray(F["PS"][0, :])
-    PS = newnorm(PS, PSm, PSs)
-
-    Z3 = np.asarray(F["Z3"][0, :, :])
-    Z3 = newnorm(Z3, Z3m, Z3s)
-
-    U = np.asarray(F["U"][0, :, :])
-    U = newnorm(U, Um, Us)
-
-    V = np.asarray(F["V"][0, :, :])
-    V = newnorm(V, Vm, Vs)
-
-    T = np.asarray(F["T"][0, :, :])
-    T = newnorm(T, Tm, Ts)
-
-    lat = F["lat"]
-    lat = newnorm(lat, np.mean(lat), np.std(lat))
-
-    lon = F["lon"]
-    lon = newnorm(lon, np.mean(lon), np.std(lon))
-
-    DSE = np.asarray(F["DSE"][0, :, :])
-    DSE = newnorm(DSE, DSEm, DSEs)
-
-    RHOI = np.asarray(F["RHOI"][0, :, :])
-    RHOI = newnorm(RHOI, RHOIm, RHOIs)
-
-    NETDT = np.asarray(F["NETDT"][0, :, :])
-    NETDT = newnorm(NETDT, NETDTm, NETDTs)
-
-    NM = np.asarray(F["NMBV"][0, :, :])
-    NM = newnorm(NM, NMm, NMs)
-
-    UTGWSPEC = np.asarray(F["UTGWSPEC"][0, :, :])
-    UTGWSPEC = newnorm(UTGWSPEC, UTGWSPECm, UTGWSPECs)
-
-    VTGWSPEC = np.asarray(F["VTGWSPEC"][0, :, :])
-    VTGWSPEC = newnorm(VTGWSPEC, VTGWSPECm, VTGWSPECs)
-
-    x_train, y_train = data_loader(
-        U, V, T, DSE, NM, NETDT, Z3, RHOI, PS, lat, lon, UTGWSPEC, VTGWSPEC
-    )
-
-    data = Model.myDataset(X=x_train, Y=y_train)
-
-    batch_size = 128
-
-    split_data = torch.utils.data.random_split(
-        data, [0.75, 0.25], generator=torch.Generator().manual_seed(42)
-    )
-    train_dataloader = DataLoader(split_data[0], batch_size=batch_size, shuffle=True)
-    val_dataloader = DataLoader(
-        split_data[1], batch_size=len(split_data[1]), shuffle=True
-    )
-
-    # training
-    early_stopper = EarlyStopper(patience=5, min_delta=0)  # Note the hyper parameters.
-    for t in range(epochs):
-        if t % 2 == 0:
-            print(f"Epoch {t+1}\n-------------------------------")
-            print(val_losses[-1])
-            print("counter=" + str(early_stopper.counter))
-        train_loss = Model.train_loop(train_dataloader, model, nn.MSELoss(), optimizer)
-
+        float
+            Average validation loss.
+    """
+    avg_loss = sum(loss_fn(model(X), Y).item() for X, Y in dataloader) / len(dataloader)
+    return avg_loss
+
+
+def train_with_early_stopping(
+    train_dataloader,
+    val_dataloader,
+    model,
+    optimizer,
+    criterion,
+    early_stopper,
+    epochs=100,
+):
+    """
+    Train the model with early stopping.
+
+    Parameters
+    ----------
+    train_dataloader : torch.utils.data.DataLoader
+        DataLoader for the training data.
+    val_dataloader : torch.utils.data.DataLoader
+        DataLoader for the validation data.
+    model : nn.Module
+        Neural network model.
+    optimizer : torch.optim.Optimizer
+        Optimizer for training.
+    criterion : callable
+        Loss function.
+    early_stopper : EarlyStopper
+        Early stopping utility.
+    epochs : int, optional
+        Number of epochs to train (default is 100).
+
+    Returns
+    -------
+    tuple of list of float
+        Training losses and validation losses for each epoch.
+    """
+    train_losses = []
+    val_losses = [0]
+    for epoch in range(epochs):
+        # if epoch % 2 == 0:
+        #     print(f"Epoch {epoch + 1}\n-------------------------------")
+        #     print(val_losses[-1])
+        #     print('counter=' + str(early_stopper.counter))
+        train_loss = train_loop(train_dataloader, model, criterion, optimizer)
         train_losses.append(train_loss)
-        val_loss = Model.val_loop(val_dataloader, model, nn.MSELoss())
+        val_loss = val_loop(val_dataloader, model, criterion)
         val_losses.append(val_loss)
-        if early_stopper.early_stop(val_loss):
-            print("BREAK!")
+        if early_stopper.early_stop(val_loss, model):
+            # print("BREAK!")
             break
+    return train_losses, val_losses

From 975d23b9f0dd902527b8160164d81222a57a4b44 Mon Sep 17 00:00:00 2001
From: Surbhi Goel <surbhigoel77@github.com>
Date: Mon, 24 Jun 2024 20:46:22 +0100
Subject: [PATCH 13/35] Updated Model.py

---
 newCAM_emulation/Model.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/newCAM_emulation/Model.py b/newCAM_emulation/Model.py
index 096d0cc..d39b6d0 100644
--- a/newCAM_emulation/Model.py
+++ b/newCAM_emulation/Model.py
@@ -15,14 +15,10 @@ class FullyConnected(nn.Module):
         Sequential container of linear layers and activation functions.
     """
 
-    def __init__(self):
+    def __init__(self, ilev=93, hidden_layers=8, hidden_size=500):
         super(FullyConnected, self).__init__()
-        ilev = 93
-        hidden_layers = 8
-        hidden_size = 500
         layers = []
-
-        input_size = 8 * ilev + 4
+        input_size = 8 * ilev + 4  ### Correct this hard coded part
         for _ in range(hidden_layers):
             layers.append(nn.Linear(input_size, hidden_size, dtype=torch.float64))
             layers.append(nn.SiLU())

From 9bb671cb72636eb2d3aeb2158694aae929d2b0fb Mon Sep 17 00:00:00 2001
From: Surbhi Goel <surbhigoel77@github.com>
Date: Mon, 24 Jun 2024 20:47:17 +0100
Subject: [PATCH 14/35] removed a side comment

---
 newCAM_emulation/Model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/newCAM_emulation/Model.py b/newCAM_emulation/Model.py
index d39b6d0..05f7f40 100644
--- a/newCAM_emulation/Model.py
+++ b/newCAM_emulation/Model.py
@@ -18,7 +18,7 @@ class FullyConnected(nn.Module):
     def __init__(self, ilev=93, hidden_layers=8, hidden_size=500):
         super(FullyConnected, self).__init__()
         layers = []
-        input_size = 8 * ilev + 4  ### Correct this hard coded part
+        input_size = 8 * ilev + 4  
         for _ in range(hidden_layers):
             layers.append(nn.Linear(input_size, hidden_size, dtype=torch.float64))
             layers.append(nn.SiLU())

From b578031ac472f857a0c0d01dd332004c4ad4a1c1 Mon Sep 17 00:00:00 2001
From: Surbhi Goel <surbhigoel77@github.com>
Date: Mon, 24 Jun 2024 20:51:05 +0100
Subject: [PATCH 15/35] removed redundant NN_pred.py, replaced with main.py

---
 newCAM_emulation/NN_pred.py | 170 ------------------------------------
 1 file changed, 170 deletions(-)
 delete mode 100644 newCAM_emulation/NN_pred.py

diff --git a/newCAM_emulation/NN_pred.py b/newCAM_emulation/NN_pred.py
deleted file mode 100644
index 8b20304..0000000
--- a/newCAM_emulation/NN_pred.py
+++ /dev/null
@@ -1,170 +0,0 @@
-"""Prediction module for the neural network."""
-
-import matplotlib.pyplot as plt
-import Model
-import netCDF4 as nc
-import numpy as np
-import torch
-import torch.nn.functional as nnF
-import torchvision
-from loaddata import data_loader, newnorm
-from torch import nn
-from torch.utils.data import DataLoader
-from torchvision import datasets, transforms
-from torchvision.utils import save_image
-
-"""
-Determine if any GPUs are available
-"""
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-print(device)
-
-
-"""
-Initialize Hyperparameters
-"""
-ilev = 93
-dim_NN = 8*ilev + 4
-dim_NNout = 2*ilev
-
-batch_size = 8
-learning_rate = 1e-4
-num_epochs = 1
-
-
-
-
-
-## load mean and std for normalization
-fm = np.load('Demodata/mean_demo.npz')
-fs = np.load('Demodata/std_demo.npz')
-
-Um        = fm['U']
-Vm        = fm['V']
-Tm        = fm['T']
-DSEm      = fm['DSE']
-NMm       = fm['NM']
-NETDTm    = fm['NETDT']
-Z3m    = fm['Z3']
-RHOIm    = fm['RHOI']
-PSm    = fm['PS']
-latm    = fm['lat']
-lonm    = fm['lon']
-UTGWSPECm    = fm['UTGWSPEC']
-VTGWSPECm    = fm['VTGWSPEC']
-
-Us        = fs['U']
-Vs        = fs['V']
-Ts        = fs['T']
-DSEs      = fs['DSE']
-NMs       = fs['NM']
-NETDTs    = fs['NETDT']
-Z3s    = fs['Z3']
-RHOIs    = fs['RHOI']
-PSs    = fs['PS']
-lats    = fs['lat']
-lons    = fs['lon']
-UTGWSPECs    = fs['UTGWSPEC']
-VTGWSPECs    = fs['VTGWSPEC']
-
-
-
-"""
-Initialize the network and the Adam optimizer
-"""
-GWnet = Model.FullyConnected()
-
-optimizer = torch.optim.Adam(GWnet.parameters(), lr=learning_rate)
-
-
-s_list = list(range(5,6))
-
-for iter in s_list:
- if (iter > 0):
-   GWnet.load_state_dict(torch.load('./conv_torch.pth'))
-   GWnet.eval()
- print ('data loader iteration',iter)
- filename  = './Demodata/Demo_timestep_' + str(iter).zfill(3) + '.nc'
-
- F = nc.Dataset(filename)
- PS = np.asarray(F['PS'][0,:])
- PS = newnorm(PS, PSm, PSs)
-
- Z3 = np.asarray(F['Z3'][0,:,:])
- Z3 = newnorm(Z3, Z3m, Z3s)
-
- U = np.asarray(F['U'][0,:,:])
- U = newnorm(U, Um, Us)
-
- V = np.asarray(F['V'][0,:,:])
- V = newnorm(V, Vm, Vs)
-
- T = np.asarray(F['T'][0,:,:])
- T = newnorm(T, Tm, Ts)
-
- lat = F['lat']
- lat = newnorm(lat, np.mean(lat), np.std(lat))
-
- lon = F['lon']
- lon = newnorm(lon, np.mean(lon), np.std(lon))
-
- DSE = np.asarray(F['DSE'][0,:,:])
- DSE = newnorm(DSE, DSEm, DSEs)
-
- RHOI = np.asarray(F['RHOI'][0,:,:])
- RHOI = newnorm(RHOI, RHOIm, RHOIs)
-
- NETDT = np.asarray(F['NETDT'][0,:,:])
- NETDT = newnorm(NETDT, NETDTm, NETDTs)
-
- NM = np.asarray(F['NMBV'][0,:,:])
- NM = newnorm(NM, NMm, NMs)
-
- UTGWSPEC = np.asarray(F['BUTGWSPEC'][0,:,:])
- UTGWSPEC = newnorm(UTGWSPEC, UTGWSPECm, UTGWSPECs)
-
- VTGWSPEC = np.asarray(F['BVTGWSPEC'][0,:,:])
- VTGWSPEC = newnorm(VTGWSPEC, VTGWSPECm, VTGWSPECs)
-
-
-
- print('shape of PS',np.shape(PS))
- print('shape of Z3',np.shape(Z3))
- print('shape of U',np.shape(U))
- print('shape of V',np.shape(V))
- print('shape of T',np.shape(T))
- print('shape of DSE',np.shape(DSE))
- print('shape of RHOI',np.shape(RHOI))
- print('shape of NETDT',np.shape(NETDT))
- print('shape of NM',np.shape(NM))
- print('shape of UTGWSPEC',np.shape(UTGWSPEC))
- print('shape of VTGWSPEC',np.shape(VTGWSPEC))
-
- x_test,y_test = data_loader (U,V,T, DSE, NM, NETDT, Z3,
-                              RHOI, PS,lat,lon,UTGWSPEC, VTGWSPEC)
-
- print('shape of x_test', np.shape(x_test))
- print('shape of y_test', np.shape(y_test))
-
-
- data = Model.myDataset(X=x_test, Y=y_test)
- test_loader = DataLoader(data, batch_size=len(data), shuffle=False)
- print(test_loader)
-
-
- for batch, (X, Y) in enumerate(test_loader):
-  print(np.shape(Y))
-  pred = GWnet(X)
-  truth = Y.cpu().detach().numpy()
-  predict = pred.cpu().detach().numpy()
-
- print(np.corrcoef(truth.flatten(), predict.flatten())[0, 1])
- print('shape of truth ',np.shape(truth))
- print('shape of prediction',np.shape(predict))
-
- np.save('./pred_data_' + str(iter) + '.npy', predict)
-
-
-
-
-

From 7167611dad674f4fa67ce7bac6b387e85343fd40 Mon Sep 17 00:00:00 2001
From: Surbhi Goel <surbhigoel77@github.com>
Date: Mon, 24 Jun 2024 20:52:08 +0100
Subject: [PATCH 16/35] removed

---
 newCAM_emulation/GW.ipynb | 581 --------------------------------------
 1 file changed, 581 deletions(-)
 delete mode 100644 newCAM_emulation/GW.ipynb

diff --git a/newCAM_emulation/GW.ipynb b/newCAM_emulation/GW.ipynb
deleted file mode 100644
index 119be08..0000000
--- a/newCAM_emulation/GW.ipynb
+++ /dev/null
@@ -1,581 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 252,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import netCDF4 as nc\n",
-    "import numpy as np\n",
-    "import os\n",
-    "import re\n",
-    "import torch\n",
-    "from torch import nn\n",
-    "from torch.utils.data import Dataset\n",
-    "from torch.utils.data import DataLoader\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 80,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Define variable names and corresponding mean and std values\n",
-    "features = ['PS', 'Z3', 'U', 'V', 'T', 'lat', 'lon', 'DSE', 'RHOI', 'NETDT', 'NM', 'UTGWSPEC', 'VTGWSPEC']\n",
-    "\n",
-    "directory_path = '../Demodata/Convection'\n",
-    "file_path_mean = '../Demodata/Convection/mean_demo_sub.npz'\n",
-    "file_path_std = '../Demodata/Convection/std_demo_sub.npz'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 267,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def load_variables(directory_path, variable_names, startfile, endfile):\n",
-    "    # Define the variable mapping\n",
-    "    variable_mapping = {\n",
-    "        'NM': 'NMBV'\n",
-    "    }\n",
-    "\n",
-    "    # Dictionary to store data for each variable\n",
-    "    variable_data = {}\n",
-    "\n",
-    "    # Pattern to match file names\n",
-    "    pattern = re.compile(r'^newCAM_demo_sub_\\d{startfile,endfile}$')\n",
-    "\n",
-    "    # Iterate over each data file in the directory\n",
-    "    for file_name in os.listdir(directory_path):\n",
-    "        # Check if the file starts with 'newCAM_demo_sub_'\n",
-    "        if file_name.startswith('newCAM_demo_sub_'):\n",
-    "            # Construct the full file path\n",
-    "            file_path = os.path.join(directory_path, file_name)\n",
-    "\n",
-    "            # Load data from the file\n",
-    "            with nc.Dataset(file_path) as dataset:\n",
-    "                # Iterate over each variable name\n",
-    "                for var_name in variable_names:\n",
-    "                    # Check if the variable exists in the dataset\n",
-    "                    mapped_name = variable_mapping.get(var_name, var_name)\n",
-    "                    if mapped_name in dataset.variables:\n",
-    "                        # Read the variable data\n",
-    "                        var_data = dataset[mapped_name][:]\n",
-    "\n",
-    "                        # Store the variable data in the dictionary\n",
-    "                        variable_data[var_name] = var_data\n",
-    "\n",
-    "    return variable_data\n",
-    "\n",
-    "\n",
-    "def load_mean_std(file_path_mean, file_path_std, variable_names):\n",
-    "    \n",
-    "    # Load mean and standard deviation files\n",
-    "    mean_data = np.load(file_path_mean)\n",
-    "    std_data = np.load(file_path_std)\n",
-    "\n",
-    "    # Define dictionaries to store mean and std for each variable\n",
-    "    mean_dict = {var_name: mean_data[var_name] for var_name in variable_names}\n",
-    "    std_dict = {var_name: std_data[var_name] for var_name in variable_names}\n",
-    "\n",
-    "    return mean_dict, std_dict\n",
-    "\n",
-    "\n",
-    "\n",
-    "def normalize_data(variable_data, mean_values, std_values):\n",
-    "    \n",
-    "    normalized_data = {}\n",
-    "\n",
-    "    # Iterate over each variable in the variable data\n",
-    "    for var_name, var_data in variable_data.items():\n",
-    "        # Check if variable exists in the mean and std dictionaries\n",
-    "        if var_name in mean_values and var_name in std_values:\n",
-    "            # Extract mean and std for the variable\n",
-    "            mean = mean_values[var_name]\n",
-    "            std = std_values[var_name]\n",
-    "\n",
-    "            # Perform normalization\n",
-    "            normalized_var_data = (var_data - mean) / std\n",
-    "\n",
-    "            # Store normalized data\n",
-    "            normalized_data[var_name] = normalized_var_data\n",
-    "\n",
-    "    return normalized_data\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 293,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Variable: PS  Shape: (1, 4419)\n",
-      "Variable: Z3  Shape: (1, 93, 4419)\n",
-      "Variable: U  Shape: (1, 93, 4419)\n",
-      "Variable: V  Shape: (1, 93, 4419)\n",
-      "Variable: T  Shape: (1, 93, 4419)\n",
-      "Variable: lat  Shape: (1, 4419)\n",
-      "Variable: lon  Shape: (1, 4419)\n",
-      "Variable: DSE  Shape: (1, 93, 4419)\n",
-      "Variable: RHOI  Shape: (1, 94, 4419)\n",
-      "Variable: NETDT  Shape: (1, 93, 4419)\n",
-      "Variable: NM  Shape: (1, 93, 4419)\n",
-      "Variable: UTGWSPEC  Shape: (1, 93, 4419)\n",
-      "Variable: VTGWSPEC  Shape: (1, 93, 4419)\n"
-     ]
-    }
-   ],
-   "source": [
-    "variable_data = load_variables(directory_path, features, 1, 5)\n",
-    "# print(f'Data variables: {variable_data.keys()}')\n",
-    "mean_dict, std_dict = load_mean_std(file_path_mean, file_path_std, features)\n",
-    "# print(f'Mean variables: {mean_dict.keys()}')\n",
-    "# print(f'Std variables: {std_dict.keys()}')\n",
-    "normalized_data = normalize_data(variable_data, mean_dict, std_dict)\n",
-    "# print(f'Normalised variables: {normalized_data.keys()}')\n",
-    "\n",
-    "\n",
-    "for var_name, var_data in normalized_data.items():\n",
-    "    # Get the shape of the variable data\n",
-    "    var_shape = var_data.shape if isinstance(var_data, np.ndarray) else \"Not an array\"\n",
-    "    print(\"Variable:\", var_name, \" Shape:\", var_shape)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Correct NCol = 4419\n",
-    "ilev = 93\n",
-    "\n",
-    "Points to be considered:\n",
-    "1. Some variables are different dimensions, varying over different levels (ilev=93/94 here)\n",
-    "2. These levels cause the input dimensions to become large (8 variables, each with 93 instances (i.e.varying across 93 vertical levels), and 4 variables not varying across the 93 levels.)\n",
-    "3. Both Input and Ouput variables have 93 levels"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 256,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def data_loader(variable_names, normalized_data, ilev):\n",
-    "    # Determine the shape of the data\n",
-    "    Ncol = normalized_data[variable_names[1]].shape[2]\n",
-    "    # print(f'ilev= {ilev} and Ncol={Ncol}')\n",
-    "\n",
-    "    # Initialize x_train and y_train arrays\n",
-    "    # Calculate dim_NN and dim_NNout\n",
-    "    dim_NN = int(8 * ilev + 4) # 8 variables varying over 93 levels, 4 constant variables (lat, long, PS )\n",
-    "    dim_NNout = int(2 * ilev) #(UTGWSPEC, VTGWSPEC)\n",
-    "\n",
-    "    # Initialize x_train and y_train arrays\n",
-    "    x_train = np.zeros([dim_NN, Ncol])\n",
-    "    y_train = np.zeros([dim_NNout, Ncol])\n",
-    "\n",
-    "    # print(f'Set xtrain shape{x_train.shape}')\n",
-    "    # print(f'Set ytrain shape{y_train.shape}')\n",
-    "    target_var = ['UTGWSPEC','VTGWSPEC']\n",
-    "\n",
-    "    # Assign variables to x_train\n",
-    "    y_index = 0\n",
-    "    x_index = 0\n",
-    "    for var_name, var_data in normalized_data.items():\n",
-    "        var_shape = var_data.shape\n",
-    "\n",
-    "        if var_name in target_var:\n",
-    "            # print(var_name, y_index\n",
-    "            y_train[y_index * ilev:(y_index + 1) * ilev, :] = var_data.reshape(ilev, Ncol)\n",
-    "            y_index +=1\n",
-    "         \n",
-    "        elif len(var_shape) == 2:  # For 2D variables\n",
-    "            #  print(var_name, x_index)\n",
-    "             x_train[x_index, :] = var_data\n",
-    "          \n",
-    "        elif len(var_shape) == 3:\n",
-    "            new_ilev = var_shape[1]\n",
-    "            # print(var_name, x_index)\n",
-    "            x_train[x_index:x_index + new_ilev, :] = var_data ### Issue here in extracting variables level-wise because of difference in levels\n",
-    "        x_index+=1\n",
-    "\n",
-    "    return x_train, y_train\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 294,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "(748, 4419) (186, 4419)\n"
-     ]
-    }
-   ],
-   "source": [
-    "xtrain, ytrain = data_loader(features, normalized_data, ilev=93)\n",
-    "print(xtrain.shape, ytrain.shape)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 258,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Required for feeding the data into NN.\n",
-    "class myDataset(Dataset):\n",
-    "    def __init__(self, X, Y):\n",
-    "        \"\"\"\n",
-    "        Parameters:\n",
-    "            X (tensor): Input data.\n",
-    "            Y (tensor): Output data.\n",
-    "        \"\"\"\n",
-    "        self.features = torch.tensor(X, dtype=torch.float64)\n",
-    "        self.labels = torch.tensor(Y, dtype=torch.float64)\n",
-    "\n",
-    "    def __len__(self):\n",
-    "        \"\"\"Function that is called when you call len(dataloader)\"\"\"\n",
-    "        return len(self.features.T)\n",
-    "\n",
-    "    def __getitem__(self, idx):\n",
-    "        \"\"\"Function that is called when you call dataloader\"\"\"\n",
-    "        feature = self.features[:, idx]\n",
-    "        label = self.labels[:, idx]\n",
-    "\n",
-    "        return feature, label\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 259,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data = myDataset(X=xtrain, Y=ytrain)\n",
-    "split_data = torch.utils.data.random_split(data, [0.75, 0.25],generator=torch.Generator().manual_seed(42))\n",
-    "\n",
-    "train_dataloader = DataLoader(split_data[0], batch_size=128, shuffle=True)\n",
-    "val_dataloader = DataLoader(split_data[1], batch_size=len(split_data[1]), shuffle=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 260,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "class FullyConnected(nn.Module):\n",
-    "    def __init__(self):\n",
-    "        \"\"\"Create an instance of FullyConnected NN model.\"\"\"\n",
-    "        super(FullyConnected, self).__init__()\n",
-    "        ilev = 93\n",
-    "        hidden_layers = 8  # Number of hidden layers\n",
-    "        hidden_size = 500  # Number of neurons in each hidden layer\n",
-    "\n",
-    "        layers = []\n",
-    "        input_size = 8 * ilev + 4\n",
-    "        for _ in range(hidden_layers):\n",
-    "            layers.append(nn.Linear(input_size, hidden_size, dtype=torch.float64))\n",
-    "            layers.append(nn.SiLU())\n",
-    "            input_size = hidden_size\n",
-    "\n",
-    "        layers.append(nn.Linear(hidden_size, 2 * ilev, dtype=torch.float64))\n",
-    "\n",
-    "        self.linear_stack = nn.Sequential(*layers)\n",
-    "\n",
-    "    def forward(self, X):\n",
-    "        return self.linear_stack(X)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 261,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "class EarlyStopper:\n",
-    "    def __init__(self, patience=1, min_delta=0):\n",
-    "        \"\"\"Create an instance of EarlyStopper class.\"\"\"\n",
-    "        self.patience = patience\n",
-    "        self.min_delta = min_delta\n",
-    "        self.counter = 0\n",
-    "        self.min_validation_loss = np.inf\n",
-    "\n",
-    "    def early_stop(self, validation_loss, model=None):\n",
-    "        \"\"\"\n",
-    "        Check if early stopping condition is met.\n",
-    "\n",
-    "        Args:\n",
-    "            validation_loss (float): Loss value on the validation set.\n",
-    "            model (nn.Module, optional): Model to be saved if early stopping condition is met.\n",
-    "\n",
-    "        Returns\n",
-    "        -------\n",
-    "            bool: True if early stopping condition is met, False otherwise.\n",
-    "        \"\"\"\n",
-    "        if validation_loss < self.min_validation_loss:\n",
-    "            self.min_validation_loss = validation_loss\n",
-    "            self.counter = 0\n",
-    "\n",
-    "            # Save model\n",
-    "            if model is not None:\n",
-    "                torch.save(model.state_dict(), 'conv_torch.pth')\n",
-    "\n",
-    "        elif validation_loss > (self.min_validation_loss + self.min_delta):\n",
-    "            self.counter += 1\n",
-    "            if self.counter >= self.patience:\n",
-    "                return True\n",
-    "        return False\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 262,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def train_loop(dataloader, model, loss_fn, optimizer):\n",
-    "    size = len(dataloader.dataset)\n",
-    "    avg_loss = 0\n",
-    "    for batch, (X, Y) in enumerate(dataloader):\n",
-    "        # Compute prediction and loss\n",
-    "        pred = model(X)\n",
-    "        loss = loss_fn(pred, Y)\n",
-    "\n",
-    "        # Backpropagation\n",
-    "        optimizer.zero_grad(set_to_none=True)\n",
-    "        loss.backward()\n",
-    "        optimizer.step()\n",
-    "\n",
-    "        avg_loss += loss.item()  # Accumulate loss as a float\n",
-    "\n",
-    "    avg_loss /= len(dataloader)\n",
-    "\n",
-    "    return avg_loss\n",
-    "\n",
-    "\n",
-    "\n",
-    "def val_loop(dataloader, model, loss_fn):\n",
-    "    avg_loss = sum(loss_fn(model(X), Y).item() for X, Y in dataloader) / len(dataloader)\n",
-    "    return avg_loss"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 263,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def train_with_early_stopping(train_dataloader, val_dataloader, model, optimizer, criterion, early_stopper, epochs=100):\n",
-    "    train_losses = []\n",
-    "    val_losses = [0]\n",
-    "\n",
-    "    for epoch in range(epochs):\n",
-    "        if epoch % 2 == 0:\n",
-    "            print(f\"Epoch {epoch+1}\\n-------------------------------\")\n",
-    "            print(val_losses[-1])\n",
-    "            print('counter=' + str(early_stopper.counter))\n",
-    "\n",
-    "        train_loss = train_loop(train_dataloader, model, criterion, optimizer)\n",
-    "        train_losses.append(train_loss)\n",
-    "\n",
-    "        val_loss = val_loop(val_dataloader, model, criterion)\n",
-    "        val_losses.append(val_loss)\n",
-    "\n",
-    "        if early_stopper.early_stop(val_loss, model):\n",
-    "            print(\"BREAK!\")\n",
-    "            break\n",
-    "\n",
-    "    return train_losses, val_losses\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 264,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch 1\n",
-      "-------------------------------\n",
-      "0\n",
-      "counter=0\n",
-      "Epoch 3\n",
-      "-------------------------------\n",
-      "0.8489934648869807\n",
-      "counter=0\n",
-      "Epoch 5\n",
-      "-------------------------------\n",
-      "0.848900956575377\n",
-      "counter=0\n",
-      "Epoch 7\n",
-      "-------------------------------\n",
-      "0.8488277427356834\n",
-      "counter=0\n",
-      "Epoch 9\n",
-      "-------------------------------\n",
-      "0.8487524616774139\n",
-      "counter=0\n",
-      "Epoch 11\n",
-      "-------------------------------\n",
-      "0.8486892791301094\n",
-      "counter=0\n",
-      "Epoch 13\n",
-      "-------------------------------\n",
-      "0.8486358109089807\n",
-      "counter=0\n",
-      "Epoch 15\n",
-      "-------------------------------\n",
-      "0.8485900223018678\n",
-      "counter=0\n",
-      "Epoch 17\n",
-      "-------------------------------\n",
-      "0.8485513591271571\n",
-      "counter=0\n",
-      "Epoch 19\n",
-      "-------------------------------\n",
-      "0.8485374125600498\n",
-      "counter=0\n",
-      "Epoch 21\n",
-      "-------------------------------\n",
-      "0.8485556882976334\n",
-      "counter=2\n",
-      "Epoch 23\n",
-      "-------------------------------\n",
-      "0.8485675317451268\n",
-      "counter=4\n",
-      "BREAK!\n"
-     ]
-    }
-   ],
-   "source": [
-    "learning_rate = 1e-5\n",
-    "epochs = 100\n",
-    "\n",
-    "model = FullyConnected()\n",
-    "optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)\n",
-    "criterion = nn.MSELoss()\n",
-    "early_stopper = EarlyStopper(patience=5, min_delta=0)\n",
-    "\n",
-    "train_losses, val_losses = train_with_early_stopping(train_dataloader, val_dataloader, model, optimizer, criterion, early_stopper, epochs)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 275,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "\n",
-    "def predict(input_data, model):\n",
-    "    # Convert input data to tensors\n",
-    "    input_tensors = {key: torch.tensor(value) for key, value in input_data.items()}\n",
-    "\n",
-    "    # Ensure model is in evaluation mode\n",
-    "    model.eval()\n",
-    "\n",
-    "    # Forward pass to make predictions\n",
-    "    with torch.no_grad():\n",
-    "        predictions = model(**input_tensors)\n",
-    "\n",
-    "    return predictions\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 287,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "test_data = load_variables(directory_path, features, 5,6)\n",
-    "mean_dict, std_dict = load_mean_std(file_path_mean, file_path_std, features)\n",
-    "normalized_test_data = normalize_data(test_data, mean_dict, std_dict)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 284,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<All keys matched successfully>"
-      ]
-     },
-     "execution_count": 284,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model_path = 'conv_torch.pth'\n",
-    "model = FullyConnected()\n",
-    "model.load_state_dict(torch.load(model_path))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 291,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "(748, 4419) (186, 4419)\n"
-     ]
-    }
-   ],
-   "source": [
-    "\n",
-    "x_test, y_test = data_loader(features, normalized_test_data, ilev=93)\n",
-    "print(x_test.shape, y_test.shape)\n",
-    "\n",
-    "test_data = myDataset(X=x_test, Y=y_test)\n",
-    "\n",
-    "test_loader = DataLoader(data, batch_size=len(data), shuffle=False)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "venv",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.7"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

From dcd75858982324fa31a317cec9f75cb92e13ad52 Mon Sep 17 00:00:00 2001
From: Surbhi Goel <surbhigoel77@github.com>
Date: Mon, 24 Jun 2024 20:54:29 +0100
Subject: [PATCH 17/35] Updated name of the file Model.py to model.py

---
 newCAM_emulation/modelrun.py | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)
 create mode 100644 newCAM_emulation/modelrun.py

diff --git a/newCAM_emulation/modelrun.py b/newCAM_emulation/modelrun.py
new file mode 100644
index 0000000..3833b5c
--- /dev/null
+++ b/newCAM_emulation/modelrun.py
@@ -0,0 +1,34 @@
+import torch
+from torch import nn
+import numpy as np
+
+import netCDF4 as nc
+
+# Path to the .nc file
+file_path = 'Demodata/Convection/newCAM_demo_sub_5.nc'
+
+
+# Open the netCDF file
+with nc.Dataset(file_path, "r") as f:
+    # List all variables in the file along with their shapes
+    print("Variables in the file:")
+    for var_name, var in f.variables.items():
+        print(f"Variable '{var_name}' shape: {var.shape}")
+
+
+
+# # import numpy as np
+
+# def get_variable_names(npz_file_path):
+#     # Load the .npz file
+#     npz_data = np.load(npz_file_path)
+
+#     # Get the list of variable names
+#     variable_names = list(npz_data.keys())
+
+#     return variable_names
+
+# # # Example usage:
+# # npz_file_path = 'Demodata/Convection/std_demo_sub.npz' # Replace 'your_file_path.npz' with the path to your .npz file
+# # variable_names = get_variable_names(npz_file_path)
+# # print("Variable names:", variable_names)

From 1a8c86f7a60e99d1ab57b0606d2ceefd9f5661be Mon Sep 17 00:00:00 2001
From: Surbhi Goel <surbhigoel77@github.com>
Date: Mon, 24 Jun 2024 20:55:45 +0100
Subject: [PATCH 18/35] removed modelrun file, unused here

---
 newCAM_emulation/modelrun.py | 34 ----------------------------------
 1 file changed, 34 deletions(-)
 delete mode 100644 newCAM_emulation/modelrun.py

diff --git a/newCAM_emulation/modelrun.py b/newCAM_emulation/modelrun.py
deleted file mode 100644
index 3833b5c..0000000
--- a/newCAM_emulation/modelrun.py
+++ /dev/null
@@ -1,34 +0,0 @@
-import torch
-from torch import nn
-import numpy as np
-
-import netCDF4 as nc
-
-# Path to the .nc file
-file_path = 'Demodata/Convection/newCAM_demo_sub_5.nc'
-
-
-# Open the netCDF file
-with nc.Dataset(file_path, "r") as f:
-    # List all variables in the file along with their shapes
-    print("Variables in the file:")
-    for var_name, var in f.variables.items():
-        print(f"Variable '{var_name}' shape: {var.shape}")
-
-
-
-# # import numpy as np
-
-# def get_variable_names(npz_file_path):
-#     # Load the .npz file
-#     npz_data = np.load(npz_file_path)
-
-#     # Get the list of variable names
-#     variable_names = list(npz_data.keys())
-
-#     return variable_names
-
-# # # Example usage:
-# # npz_file_path = 'Demodata/Convection/std_demo_sub.npz' # Replace 'your_file_path.npz' with the path to your .npz file
-# # variable_names = get_variable_names(npz_file_path)
-# # print("Variable names:", variable_names)

From 8b2a8a2659327d43a52fddcd01e46024a9401888 Mon Sep 17 00:00:00 2001
From: Surbhi Goel <surbhigoel77@github.com>
Date: Tue, 25 Jun 2024 21:35:31 +0100
Subject: [PATCH 19/35] Changed hard coded input output values to variables

---
 newCAM_emulation/Model.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/newCAM_emulation/Model.py b/newCAM_emulation/Model.py
index 05f7f40..fe73724 100644
--- a/newCAM_emulation/Model.py
+++ b/newCAM_emulation/Model.py
@@ -15,15 +15,24 @@ class FullyConnected(nn.Module):
         Sequential container of linear layers and activation functions.
     """
 
-    def __init__(self, ilev=93, hidden_layers=8, hidden_size=500):
+    def __init__(
+        self, ilev=93, in_ver=8, in_nover=4, out_ver=2, hidden_layers=8, hidden_size=500
+    ):
         super(FullyConnected, self).__init__()
+        self.ilev = ilev
+        self.in_ver = in_ver
+        self.in_nover = in_nover
+        self.out_ver = out_ver
+        self.hidden_layers = hidden_layers
+        self.hidden_size = hidden_size
+
         layers = []
-        input_size = 8 * ilev + 4  
+        input_size = in_ver * ilev + in_nover
         for _ in range(hidden_layers):
             layers.append(nn.Linear(input_size, hidden_size, dtype=torch.float64))
             layers.append(nn.SiLU())
             input_size = hidden_size
-        layers.append(nn.Linear(hidden_size, 2 * ilev, dtype=torch.float64))
+        layers.append(nn.Linear(hidden_size, out_ver * ilev, dtype=torch.float64))
         self.linear_stack = nn.Sequential(*layers)
 
     def forward(self, X):

From 9a63f3fa10d4d0a06240eb7c1ad494e5d4f0a062 Mon Sep 17 00:00:00 2001
From: Surbhi Goel <surbhigoel77@github.com>
Date: Tue, 25 Jun 2024 21:41:29 +0100
Subject: [PATCH 20/35] Removed hard-coded values in data_loader

---
 newCAM_emulation/loaddata.py | 23 +++--------------------
 newCAM_emulation/main.py     | 20 +++++++++++++++-----
 2 files changed, 18 insertions(+), 25 deletions(-)

diff --git a/newCAM_emulation/loaddata.py b/newCAM_emulation/loaddata.py
index f4a6309..f790993 100644
--- a/newCAM_emulation/loaddata.py
+++ b/newCAM_emulation/loaddata.py
@@ -7,23 +7,6 @@
 import netCDF4 as nc
 
 
-features = [
-    "PS",
-    "Z3",
-    "U",
-    "V",
-    "T",
-    "lat",
-    "lon",
-    "DSE",
-    "RHOI",
-    "NETDT",
-    "NM",
-    "UTGWSPEC",
-    "VTGWSPEC",
-]
-
-
 def load_variables(directory_path, variable_names, startfile, endfile):
     """
     Loads specified variables from NetCDF files in the given directory.
@@ -114,7 +97,7 @@ def normalize_data(variable_data, mean_values, std_values):
     return normalized_data
 
 
-def data_loader(variable_names, normalized_data, ilev):
+def data_loader(variable_names, normalized_data, ilev, in_ver, in_nover, out_ver):
     """
     Prepares the data for training by organizing it into input and output arrays.
 
@@ -133,8 +116,8 @@ def data_loader(variable_names, normalized_data, ilev):
         Input and output arrays for training.
     """
     Ncol = normalized_data[variable_names[1]].shape[2]
-    dim_NN = int(8 * ilev + 4)
-    dim_NNout = int(2 * ilev)
+    dim_NN = int(in_ver * ilev + in_nover)
+    dim_NNout = int(out_ver * ilev)
     x_train = np.zeros([dim_NN, Ncol])
     y_train = np.zeros([dim_NNout, Ncol])
     target_var = ["UTGWSPEC", "VTGWSPEC"]
diff --git a/newCAM_emulation/main.py b/newCAM_emulation/main.py
index d0cb791..60771d9 100644
--- a/newCAM_emulation/main.py
+++ b/newCAM_emulation/main.py
@@ -3,8 +3,8 @@
 import torch
 from torch.utils.data import DataLoader
 import torch.nn as nn
-from loaddata import load_variables, load_mean_std, normalize_data, data_loader, MyDataset, features
-from Model import FullyConnected, EarlyStopper
+from loaddata import load_variables, load_mean_std, normalize_data, data_loader, MyDataset
+from model import FullyConnected, EarlyStopper
 from train import train_with_early_stopping
 
 # File paths and parameters
@@ -13,11 +13,18 @@
 file_path_std = '../Demodata/Convection/std_demo_sub.npz'
 trained_model_path = 'conv_torch.pth'  # Path to save and load the trained model
 
+#variable information
+features = ['PS', 'Z3', 'U', 'V', 'T', 'lat', 'lon', 'DSE', 'RHOI', 'NETDT', 'NM', 'UTGWSPEC', 'VTGWSPEC']
+ilev = 93
+in_ver = 8
+in_nover = 4
+out_ver = 2
+
 # Load and preprocess data
 variable_data = load_variables(directory_path, features, 1, 5)
 mean_dict, std_dict = load_mean_std(file_path_mean, file_path_std, features)
 normalized_data = normalize_data(variable_data, mean_dict, std_dict)
-xtrain, ytrain = data_loader(features, normalized_data, ilev=93)
+xtrain, ytrain = data_loader(features, normalized_data, ilev=ilev, in_ver=in_ver, in_nover=in_nover, out_ver=out_ver)
 
 
 # Print the shapes of xtrain and ytrain
@@ -34,7 +41,10 @@
 # Model training parameters
 learning_rate = 1e-5
 epochs = 100
-model = FullyConnected()
+hidden_layers = 8
+hidden_size = 500
+
+model = FullyConnected(ilev, in_ver,in_nover,out_ver, hidden_layers, hidden_size)
 optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
 criterion = nn.MSELoss()
 early_stopper = EarlyStopper(patience=5, min_delta=0)
@@ -53,7 +63,7 @@
 # For prediction, we need new input data. Here, we use different files for simplicity.
 test_data = load_variables(directory_path, features, 4, 5)
 normalized_test_data = normalize_data(test_data, mean_dict, std_dict)
-x_test, y_test = data_loader(features, normalized_test_data, ilev=93)
+x_test, y_test = data_loader(features, normalized_test_data, ilev=ilev, in_ver=in_ver, in_nover=in_nover, out_ver=out_ver)
 
 # Convert test data to tensors
 x_test_tensor = torch.tensor(x_test, dtype=torch.float64).T

From e041e3badf6d8df6c50d4270295e2c7e1170635d Mon Sep 17 00:00:00 2001
From: Surbhi Goel <surbhigoel77@github.com>
Date: Mon, 29 Jul 2024 15:56:23 +0100
Subject: [PATCH 21/35] Updated model saving to trained_models folder

---
 newCAM_emulation/main.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/newCAM_emulation/main.py b/newCAM_emulation/main.py
index 60771d9..05800f8 100644
--- a/newCAM_emulation/main.py
+++ b/newCAM_emulation/main.py
@@ -8,10 +8,10 @@
 from train import train_with_early_stopping
 
 # File paths and parameters
-directory_path = '../Demodata/Convection'
-file_path_mean = '../Demodata/Convection/mean_demo_sub.npz'
-file_path_std = '../Demodata/Convection/std_demo_sub.npz'
-trained_model_path = 'conv_torch.pth'  # Path to save and load the trained model
+directory_path = 'Demodata/Convection'
+file_path_mean = 'Demodata/Convection/mean_demo_sub.npz'
+file_path_std = 'Demodata/Convection/std_demo_sub.npz'
+trained_model_path = 'trained_models/weights_conv'  # Path to save and load the trained model
 
 #variable information
 features = ['PS', 'Z3', 'U', 'V', 'T', 'lat', 'lon', 'DSE', 'RHOI', 'NETDT', 'NM', 'UTGWSPEC', 'VTGWSPEC']
@@ -21,12 +21,11 @@
 out_ver = 2
 
 # Load and preprocess data
-variable_data = load_variables(directory_path, features, 1, 5)
+variable_data = load_variables(directory_path, features, 1, 5) 
 mean_dict, std_dict = load_mean_std(file_path_mean, file_path_std, features)
 normalized_data = normalize_data(variable_data, mean_dict, std_dict)
 xtrain, ytrain = data_loader(features, normalized_data, ilev=ilev, in_ver=in_ver, in_nover=in_nover, out_ver=out_ver)
 
-
 # Print the shapes of xtrain and ytrain
 print(f"xtrain shape: {xtrain.shape}")
 print(f"ytrain shape: {ytrain.shape}")
@@ -51,6 +50,8 @@
 
 # Train the model with early stopping
 train_losses, val_losses = train_with_early_stopping(train_dataloader, val_dataloader, model, optimizer, criterion, early_stopper, epochs=epochs)
+print(f'Train Loss: {train_losses}')
+print(f'Valid Loss: {val_losses}')
 
 # Save the trained model
 torch.save(model.state_dict(), trained_model_path)
@@ -58,6 +59,7 @@
 # Load the trained model for prediction
 model.load_state_dict(torch.load(trained_model_path))
 model.eval()
+print()
 
 # Prepare input data for prediction
 # For prediction, we need new input data. Here, we use different files for simplicity.

From 19576f2bf7ed628e934c10daafc4065e10b4aa2b Mon Sep 17 00:00:00 2001
From: Surbhi Goel <surbhigoel77@github.com>
Date: Tue, 30 Jul 2024 11:45:14 +0100
Subject: [PATCH 22/35] Updated reshaping in loaddata dataloader, inccluded
 print message in train and deleted commented out code in model

---
 newCAM_emulation/Model.py    | 5 +++--
 newCAM_emulation/loaddata.py | 6 +++---
 newCAM_emulation/train.py    | 8 ++++----
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/newCAM_emulation/Model.py b/newCAM_emulation/Model.py
index fe73724..da6049d 100644
--- a/newCAM_emulation/Model.py
+++ b/newCAM_emulation/Model.py
@@ -100,8 +100,9 @@ def early_stop(self, validation_loss, model=None):
         if validation_loss < self.min_validation_loss:
             self.min_validation_loss = validation_loss
             self.counter = 0
-            if model is not None:
-                torch.save(model.state_dict(), "conv_torch.pth")
+            # if model is not None:
+            #     # torch.save(model.state_dict(), 'conv_torch.pth')
+            #     torch.save(model.state_dict(), 'trained_models/weights_conv')
         elif validation_loss > (self.min_validation_loss + self.min_delta):
             self.counter += 1
             if self.counter >= self.patience:
diff --git a/newCAM_emulation/loaddata.py b/newCAM_emulation/loaddata.py
index f790993..2860b31 100644
--- a/newCAM_emulation/loaddata.py
+++ b/newCAM_emulation/loaddata.py
@@ -126,9 +126,9 @@ def data_loader(variable_names, normalized_data, ilev, in_ver, in_nover, out_ver
     for var_name, var_data in normalized_data.items():
         var_shape = var_data.shape
         if var_name in target_var:
-            y_train[y_index * ilev : (y_index + 1) * ilev, :] = var_data.reshape(
-                ilev, Ncol
-            )
+            # y_train[y_index * ilev:(y_index + 1) * ilev, :] = var_data.reshape(ilev, Ncol)
+            y_train[y_index * ilev : (y_index + 1) * ilev, :] = var_data
+
             y_index += 1
         elif len(var_shape) == 2:
             x_train[x_index, :] = var_data
diff --git a/newCAM_emulation/train.py b/newCAM_emulation/train.py
index 3f084ad..cb38e9c 100644
--- a/newCAM_emulation/train.py
+++ b/newCAM_emulation/train.py
@@ -96,10 +96,10 @@ def train_with_early_stopping(
     train_losses = []
     val_losses = [0]
     for epoch in range(epochs):
-        # if epoch % 2 == 0:
-        #     print(f"Epoch {epoch + 1}\n-------------------------------")
-        #     print(val_losses[-1])
-        #     print('counter=' + str(early_stopper.counter))
+        if epoch % 2 == 0:
+            print(f"Epoch {epoch + 1}\n-------------------------------")
+            print(val_losses[-1])
+            print("counter=" + str(early_stopper.counter))
         train_loss = train_loop(train_dataloader, model, criterion, optimizer)
         train_losses.append(train_loss)
         val_loss = val_loop(val_dataloader, model, criterion)

From 61e6fbaf04e498205cb1909345a1a96cb8298cfd Mon Sep 17 00:00:00 2001
From: Surbhi Goel <surbhigoel77@github.com>
Date: Mon, 5 Aug 2024 12:26:17 +0100
Subject: [PATCH 23/35] Added a comment to clarify the looping through
 identical layers of the net

---
 newCAM_emulation/Model.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/newCAM_emulation/Model.py b/newCAM_emulation/Model.py
index da6049d..9947f2d 100644
--- a/newCAM_emulation/Model.py
+++ b/newCAM_emulation/Model.py
@@ -27,7 +27,10 @@ def __init__(
         self.hidden_size = hidden_size
 
         layers = []
+
         input_size = in_ver * ilev + in_nover
+
+        # The following for loop provides the sequential layer by layer flow of data in the model as the layers used in our model are identical.
         for _ in range(hidden_layers):
             layers.append(nn.Linear(input_size, hidden_size, dtype=torch.float64))
             layers.append(nn.SiLU())

From c22b64f184413d1877b183c15586cb4301bb86ed Mon Sep 17 00:00:00 2001
From: Surbhi Goel <surbhigoel77@github.com>
Date: Mon, 5 Aug 2024 15:30:29 +0100
Subject: [PATCH 24/35] Updated the documentation to add latest code changes

---
 README.md | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index e1e06bb..cab0759 100644
--- a/README.md
+++ b/README.md
@@ -48,13 +48,23 @@ The model is trained using the script `train.py` using the demo data. The optimi
 The `Demodata` folder contains the demo data used to train and test the model
 
 The `newCAM_emulation` folder contains the code that is required to load data, train the model and make predictions which is structured as following:
-> `train.py` - train the model
 
-> `NN-pred.py` - predict the GWD using the trained model
-    
-> `loaddata.py` - load the data and reshape it to the NN input
+> `loaddata.py` - load the data from source .nc files and normalises before feeding it to the neural network.
 
-> `model.py` - define the NN model
+> `model.py` - defines the NN class and the early stopping mechanism.
+
+> `train.py` - trains the model for given number of epochs using the training and validation loops.
+
+> `main.py` - uses the above three modules to sequentially 
+1. Read the features list (would vary depending on the GW source, currently is convection)
+2. Take information on data like ilev, number of variables varying acrross vertical levels etc.
+3. Use `loaddata.py` to load data for the variables in the feature list defined earlier, normalise it, build an `xtrain` `ytrain` for model using a data loader and finally create a custom dataset for easy iteration over the xtrain and ytrain. 
+4. Take model hyperparameters such as learning rate, epochs, hidden layers and passes to `model.py`
+5. Also take Loss function, optimiser and early stopping parameters ans pass it to `train.py` along with the defined model and the custom dataset.
+6. Train the model and save the weights in the  
+`trained_models` folder. 
+7. The saved model can be loaded and tested on any dataset here.
+ 
 
 ## Usage Instructions
 To use the repository, following steps are required:

From 6a8d1eaa41a0920fe8aedda71bc5cfb05d318df8 Mon Sep 17 00:00:00 2001
From: Surbhi Goel <surbhigoel77@github.com>
Date: Tue, 13 Aug 2024 10:56:52 +0100
Subject: [PATCH 25/35] Changed the directory path to load data from Demodata

---
 newCAM_emulation/main.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/newCAM_emulation/main.py b/newCAM_emulation/main.py
index 05800f8..bc15714 100644
--- a/newCAM_emulation/main.py
+++ b/newCAM_emulation/main.py
@@ -4,13 +4,13 @@
 from torch.utils.data import DataLoader
 import torch.nn as nn
 from loaddata import load_variables, load_mean_std, normalize_data, data_loader, MyDataset
-from model import FullyConnected, EarlyStopper
+from Model import FullyConnected, EarlyStopper
 from train import train_with_early_stopping
 
 # File paths and parameters
-directory_path = 'Demodata/Convection'
-file_path_mean = 'Demodata/Convection/mean_demo_sub.npz'
-file_path_std = 'Demodata/Convection/std_demo_sub.npz'
+directory_path = 'Demodata'
+file_path_mean = 'Demodata/mean_demo_sub.npz'
+file_path_std = 'Demodata/std_demo_sub.npz'
 trained_model_path = 'trained_models/weights_conv'  # Path to save and load the trained model
 
 #variable information

From 53e9069a88eb7a0d3acf412f4da559d3fd1158cf Mon Sep 17 00:00:00 2001
From: Surbhi Goel <surbhigoel77@github.com>
Date: Mon, 19 Aug 2024 10:29:43 +0100
Subject: [PATCH 26/35] Rebased and added 2 ruff warning supreesors PLR0913 and
 PLR2004

---
 newCAM_emulation/Model.py    | 11 ++++++----
 newCAM_emulation/__init__.py |  1 +
 newCAM_emulation/loaddata.py | 29 ++++++++++++++++---------
 newCAM_emulation/main.py     | 42 +++++++++++++++++++++++++++---------
 newCAM_emulation/train.py    | 28 ++++++++++++------------
 5 files changed, 73 insertions(+), 38 deletions(-)

diff --git a/newCAM_emulation/Model.py b/newCAM_emulation/Model.py
index 9947f2d..be01c30 100644
--- a/newCAM_emulation/Model.py
+++ b/newCAM_emulation/Model.py
@@ -1,9 +1,10 @@
 """Neural Network model for the CAM-EM."""
 
-import torch
 import numpy as np
+import torch
 from torch import nn
 
+# ruff: noqa: PLR0913
 
 class FullyConnected(nn.Module):
     """
@@ -16,7 +17,8 @@ class FullyConnected(nn.Module):
     """
 
     def __init__(
-        self, ilev=93, in_ver=8, in_nover=4, out_ver=2, hidden_layers=8, hidden_size=500
+        self, ilev=93, in_ver=8, in_nover=4, out_ver=2,
+        hidden_layers=8, hidden_size=500
     ):
         super(FullyConnected, self).__init__()
         self.ilev = ilev
@@ -30,7 +32,8 @@ def __init__(
 
         input_size = in_ver * ilev + in_nover
 
-        # The following for loop provides the sequential layer by layer flow of data in the model as the layers used in our model are identical.
+        # The following for loop provides the sequential layer by layer flow
+        # of data in the model as the layers used in our model are identical.
         for _ in range(hidden_layers):
             layers.append(nn.Linear(input_size, hidden_size, dtype=torch.float64))
             layers.append(nn.SiLU())
@@ -64,7 +67,7 @@ class EarlyStopper:
     patience : int, optional
         Number of epochs to wait before stopping (default is 1).
     min_delta : float, optional
-        Minimum change in the monitored quantity to qualify as an improvement (default is 0).
+        Minimum change in the loss to qualify as an improvement (default is 0).
 
     Attributes
     ----------
diff --git a/newCAM_emulation/__init__.py b/newCAM_emulation/__init__.py
index e69de29..2cf3fc8 100644
--- a/newCAM_emulation/__init__.py
+++ b/newCAM_emulation/__init__.py
@@ -0,0 +1 @@
+"""Intantiate the Emulation."""
diff --git a/newCAM_emulation/loaddata.py b/newCAM_emulation/loaddata.py
index 2860b31..cb0b4f5 100644
--- a/newCAM_emulation/loaddata.py
+++ b/newCAM_emulation/loaddata.py
@@ -2,14 +2,17 @@
 
 import os
 import re
-import torch
-import numpy as np
+
 import netCDF4 as nc
+import numpy as np
+import torch
 
+# ruff: noqa: PLR0913
+# ruff: noqa: PLR2004
 
 def load_variables(directory_path, variable_names, startfile, endfile):
     """
-    Loads specified variables from NetCDF files in the given directory.
+    Load specified variables from NetCDF files in the given directory.
 
     Parameters
     ----------
@@ -46,7 +49,7 @@ def load_variables(directory_path, variable_names, startfile, endfile):
 
 def load_mean_std(file_path_mean, file_path_std, variable_names):
     """
-    Loads mean and standard deviation values for specified variables from files.
+    Load mean and standard deviation values for specified variables from files.
 
     Parameters
     ----------
@@ -71,7 +74,7 @@ def load_mean_std(file_path_mean, file_path_std, variable_names):
 
 def normalize_data(variable_data, mean_values, std_values):
     """
-    Normalizes the data using mean and standard deviation values.
+    Normalize the data using mean and standard deviation values.
 
     Parameters
     ----------
@@ -99,7 +102,7 @@ def normalize_data(variable_data, mean_values, std_values):
 
 def data_loader(variable_names, normalized_data, ilev, in_ver, in_nover, out_ver):
     """
-    Prepares the data for training by organizing it into input and output arrays.
+    Prepare the data for training by organizing it into input and output arrays.
 
     Parameters
     ----------
@@ -109,6 +112,13 @@ def data_loader(variable_names, normalized_data, ilev, in_ver, in_nover, out_ver
         Dictionary containing normalized data.
     ilev : int
         Number of vertical levels.
+    in_ver : int
+        Number of input variables that vary across vertical levels.
+    in_nover : int
+        Number of input variables that do not vary across vertical levels.
+    out_ver : int
+        Number of output variables that vary across vertical levels.
+
 
     Returns
     -------
@@ -126,9 +136,7 @@ def data_loader(variable_names, normalized_data, ilev, in_ver, in_nover, out_ver
     for var_name, var_data in normalized_data.items():
         var_shape = var_data.shape
         if var_name in target_var:
-            # y_train[y_index * ilev:(y_index + 1) * ilev, :] = var_data.reshape(ilev, Ncol)
             y_train[y_index * ilev : (y_index + 1) * ilev, :] = var_data
-
             y_index += 1
         elif len(var_shape) == 2:
             x_train[x_index, :] = var_data
@@ -164,7 +172,8 @@ def __init__(self, X, Y):
 
     def __len__(self):
         """
-        Returns the length of the dataset.
+        Return the length of the dataset.
+
         Returns
         -------
         int
@@ -174,7 +183,7 @@ def __len__(self):
 
     def __getitem__(self, idx):
         """
-        Returns a single sample from the dataset.
+        Return a single sample from the dataset.
 
         Parameters
         ----------
diff --git a/newCAM_emulation/main.py b/newCAM_emulation/main.py
index bc15714..a3c6a7d 100644
--- a/newCAM_emulation/main.py
+++ b/newCAM_emulation/main.py
@@ -1,30 +1,42 @@
+"""Script to load data and train the neural network."""
+
 import os
+
 import numpy as np
 import torch
+from loaddata import (
+    MyDataset,
+    data_loader,
+    load_mean_std,
+    load_variables,
+    normalize_data,
+)
+from Model import EarlyStopper, FullyConnected
+from torch import nn
 from torch.utils.data import DataLoader
-import torch.nn as nn
-from loaddata import load_variables, load_mean_std, normalize_data, data_loader, MyDataset
-from Model import FullyConnected, EarlyStopper
 from train import train_with_early_stopping
 
 # File paths and parameters
 directory_path = 'Demodata'
 file_path_mean = 'Demodata/mean_demo_sub.npz'
 file_path_std = 'Demodata/std_demo_sub.npz'
-trained_model_path = 'trained_models/weights_conv'  # Path to save and load the trained model
+trained_model_path = 'trained_models/weights_conv'
 
 #variable information
-features = ['PS', 'Z3', 'U', 'V', 'T', 'lat', 'lon', 'DSE', 'RHOI', 'NETDT', 'NM', 'UTGWSPEC', 'VTGWSPEC']
+features = ['PS', 'Z3', 'U', 'V', 'T', 'lat', 'lon', 'DSE',
+            'RHOI', 'NETDT', 'NM', 'UTGWSPEC', 'VTGWSPEC']
 ilev = 93
 in_ver = 8
 in_nover = 4
 out_ver = 2
 
 # Load and preprocess data
-variable_data = load_variables(directory_path, features, 1, 5) 
+variable_data = load_variables(directory_path, features, 1, 5)
 mean_dict, std_dict = load_mean_std(file_path_mean, file_path_std, features)
 normalized_data = normalize_data(variable_data, mean_dict, std_dict)
-xtrain, ytrain = data_loader(features, normalized_data, ilev=ilev, in_ver=in_ver, in_nover=in_nover, out_ver=out_ver)
+xtrain, ytrain = data_loader(
+    features, normalized_data, ilev=ilev, in_ver=in_ver,
+    in_nover=in_nover, out_ver=out_ver)
 
 # Print the shapes of xtrain and ytrain
 print(f"xtrain shape: {xtrain.shape}")
@@ -33,7 +45,8 @@
 
 # Prepare dataset and dataloaders
 data = MyDataset(X=xtrain, Y=ytrain)
-split_data = torch.utils.data.random_split(data, [0.75, 0.25], generator=torch.Generator().manual_seed(42))
+split_data = torch.utils.data.random_split(
+    data, [0.75, 0.25], generator=torch.Generator().manual_seed(42))
 train_dataloader = DataLoader(split_data[0], batch_size=128, shuffle=True)
 val_dataloader = DataLoader(split_data[1], batch_size=len(split_data[1]), shuffle=True)
 
@@ -49,7 +62,14 @@
 early_stopper = EarlyStopper(patience=5, min_delta=0)
 
 # Train the model with early stopping
-train_losses, val_losses = train_with_early_stopping(train_dataloader, val_dataloader, model, optimizer, criterion, early_stopper, epochs=epochs)
+train_losses, val_losses = train_with_early_stopping(
+    train_dataloader,
+    val_dataloader,
+    model,
+    optimizer,
+    criterion,
+    early_stopper,
+    epochs=epochs)
 print(f'Train Loss: {train_losses}')
 print(f'Valid Loss: {val_losses}')
 
@@ -65,7 +85,9 @@
 # For prediction, we need new input data. Here, we use different files for simplicity.
 test_data = load_variables(directory_path, features, 4, 5)
 normalized_test_data = normalize_data(test_data, mean_dict, std_dict)
-x_test, y_test = data_loader(features, normalized_test_data, ilev=ilev, in_ver=in_ver, in_nover=in_nover, out_ver=out_ver)
+x_test, y_test = data_loader(
+    features, normalized_test_data, ilev=ilev, in_ver=in_ver,
+    in_nover=in_nover, out_ver=out_ver)
 
 # Convert test data to tensors
 x_test_tensor = torch.tensor(x_test, dtype=torch.float64).T
diff --git a/newCAM_emulation/train.py b/newCAM_emulation/train.py
index cb38e9c..733d90b 100644
--- a/newCAM_emulation/train.py
+++ b/newCAM_emulation/train.py
@@ -3,6 +3,7 @@
 import torch
 from torch import nn
 
+# ruff: noqa: PLR0913
 
 def train_loop(dataloader, model, loss_fn, optimizer):
     """
@@ -38,22 +39,21 @@ def train_loop(dataloader, model, loss_fn, optimizer):
 
 def val_loop(dataloader, model, loss_fn):
     """
-        Validation loop for a single epoch.
-    >>>>>>> 70b2c64 (Updated loaddata, Model, train files and removed NN_pred, and added a main file)
+        Validate loop for a single epoch.
 
-        Parameters
-        ----------
-        dataloader : torch.utils.data.DataLoader
-            DataLoader for the validation data.
-        model : nn.Module
-            Neural network model.
-        loss_fn : callable
-            Loss function.
+    Parameters
+    ----------
+    dataloader : torch.utils.data.DataLoader
+        DataLoader for the validation data.
+    model : nn.Module
+        Neural network model.
+    loss_fn : callable
+        Loss function.
 
-        Returns
-        -------
-        float
-            Average validation loss.
+    Returns
+    -------
+    float
+        Average validation loss.
     """
     avg_loss = sum(loss_fn(model(X), Y).item() for X, Y in dataloader) / len(dataloader)
     return avg_loss

From 4775e45482f732172f107f45fee104b2d3e680ea Mon Sep 17 00:00:00 2001
From: Surbhi Goel <surbhigoel77@github.com>
Date: Mon, 19 Aug 2024 10:55:45 +0100
Subject: [PATCH 27/35] Removed ruff warning supressor ruff: noqa: PLR0913

---
 newCAM_emulation/train.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/newCAM_emulation/train.py b/newCAM_emulation/train.py
index 733d90b..aeb3631 100644
--- a/newCAM_emulation/train.py
+++ b/newCAM_emulation/train.py
@@ -3,7 +3,6 @@
 import torch
 from torch import nn
 
-# ruff: noqa: PLR0913
 
 def train_loop(dataloader, model, loss_fn, optimizer):
     """

From e76ab22b5dd9b4ce7656a3cf862a7040d19f10d7 Mon Sep 17 00:00:00 2001
From: Surbhi Goel <surbhigoel77@github.com>
Date: Mon, 19 Aug 2024 11:14:13 +0100
Subject: [PATCH 28/35] Added ruff warning supressor ruff: noqa: PLR0913

---
 newCAM_emulation/train.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/newCAM_emulation/train.py b/newCAM_emulation/train.py
index aeb3631..733d90b 100644
--- a/newCAM_emulation/train.py
+++ b/newCAM_emulation/train.py
@@ -3,6 +3,7 @@
 import torch
 from torch import nn
 
+# ruff: noqa: PLR0913
 
 def train_loop(dataloader, model, loss_fn, optimizer):
     """

From 122253c06f4adc60ed0cdbf91c7867bf7b9d6e61 Mon Sep 17 00:00:00 2001
From: Surbhi Goel <surbhigoel77@github.com>
Date: Mon, 19 Aug 2024 11:21:07 +0100
Subject: [PATCH 29/35] Reformatted using Ruff

---
 newCAM_emulation/Model.py    |  4 +--
 newCAM_emulation/loaddata.py |  1 +
 newCAM_emulation/main.py     | 59 +++++++++++++++++++++++++-----------
 newCAM_emulation/train.py    |  1 +
 4 files changed, 46 insertions(+), 19 deletions(-)

diff --git a/newCAM_emulation/Model.py b/newCAM_emulation/Model.py
index be01c30..1b16cba 100644
--- a/newCAM_emulation/Model.py
+++ b/newCAM_emulation/Model.py
@@ -6,6 +6,7 @@
 
 # ruff: noqa: PLR0913
 
+
 class FullyConnected(nn.Module):
     """
     Fully connected neural network model.
@@ -17,8 +18,7 @@ class FullyConnected(nn.Module):
     """
 
     def __init__(
-        self, ilev=93, in_ver=8, in_nover=4, out_ver=2,
-        hidden_layers=8, hidden_size=500
+        self, ilev=93, in_ver=8, in_nover=4, out_ver=2, hidden_layers=8, hidden_size=500
     ):
         super(FullyConnected, self).__init__()
         self.ilev = ilev
diff --git a/newCAM_emulation/loaddata.py b/newCAM_emulation/loaddata.py
index cb0b4f5..2764cc5 100644
--- a/newCAM_emulation/loaddata.py
+++ b/newCAM_emulation/loaddata.py
@@ -10,6 +10,7 @@
 # ruff: noqa: PLR0913
 # ruff: noqa: PLR2004
 
+
 def load_variables(directory_path, variable_names, startfile, endfile):
     """
     Load specified variables from NetCDF files in the given directory.
diff --git a/newCAM_emulation/main.py b/newCAM_emulation/main.py
index a3c6a7d..9cce3bd 100644
--- a/newCAM_emulation/main.py
+++ b/newCAM_emulation/main.py
@@ -17,14 +17,27 @@
 from train import train_with_early_stopping
 
 # File paths and parameters
-directory_path = 'Demodata'
-file_path_mean = 'Demodata/mean_demo_sub.npz'
-file_path_std = 'Demodata/std_demo_sub.npz'
-trained_model_path = 'trained_models/weights_conv'
-
-#variable information
-features = ['PS', 'Z3', 'U', 'V', 'T', 'lat', 'lon', 'DSE',
-            'RHOI', 'NETDT', 'NM', 'UTGWSPEC', 'VTGWSPEC']
+directory_path = "Demodata"
+file_path_mean = "Demodata/mean_demo_sub.npz"
+file_path_std = "Demodata/std_demo_sub.npz"
+trained_model_path = "trained_models/weights_conv"
+
+# variable information
+features = [
+    "PS",
+    "Z3",
+    "U",
+    "V",
+    "T",
+    "lat",
+    "lon",
+    "DSE",
+    "RHOI",
+    "NETDT",
+    "NM",
+    "UTGWSPEC",
+    "VTGWSPEC",
+]
 ilev = 93
 in_ver = 8
 in_nover = 4
@@ -35,8 +48,13 @@
 mean_dict, std_dict = load_mean_std(file_path_mean, file_path_std, features)
 normalized_data = normalize_data(variable_data, mean_dict, std_dict)
 xtrain, ytrain = data_loader(
-    features, normalized_data, ilev=ilev, in_ver=in_ver,
-    in_nover=in_nover, out_ver=out_ver)
+    features,
+    normalized_data,
+    ilev=ilev,
+    in_ver=in_ver,
+    in_nover=in_nover,
+    out_ver=out_ver,
+)
 
 # Print the shapes of xtrain and ytrain
 print(f"xtrain shape: {xtrain.shape}")
@@ -46,7 +64,8 @@
 # Prepare dataset and dataloaders
 data = MyDataset(X=xtrain, Y=ytrain)
 split_data = torch.utils.data.random_split(
-    data, [0.75, 0.25], generator=torch.Generator().manual_seed(42))
+    data, [0.75, 0.25], generator=torch.Generator().manual_seed(42)
+)
 train_dataloader = DataLoader(split_data[0], batch_size=128, shuffle=True)
 val_dataloader = DataLoader(split_data[1], batch_size=len(split_data[1]), shuffle=True)
 
@@ -56,7 +75,7 @@
 hidden_layers = 8
 hidden_size = 500
 
-model = FullyConnected(ilev, in_ver,in_nover,out_ver, hidden_layers, hidden_size)
+model = FullyConnected(ilev, in_ver, in_nover, out_ver, hidden_layers, hidden_size)
 optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
 criterion = nn.MSELoss()
 early_stopper = EarlyStopper(patience=5, min_delta=0)
@@ -69,9 +88,10 @@
     optimizer,
     criterion,
     early_stopper,
-    epochs=epochs)
-print(f'Train Loss: {train_losses}')
-print(f'Valid Loss: {val_losses}')
+    epochs=epochs,
+)
+print(f"Train Loss: {train_losses}")
+print(f"Valid Loss: {val_losses}")
 
 # Save the trained model
 torch.save(model.state_dict(), trained_model_path)
@@ -86,8 +106,13 @@
 test_data = load_variables(directory_path, features, 4, 5)
 normalized_test_data = normalize_data(test_data, mean_dict, std_dict)
 x_test, y_test = data_loader(
-    features, normalized_test_data, ilev=ilev, in_ver=in_ver,
-    in_nover=in_nover, out_ver=out_ver)
+    features,
+    normalized_test_data,
+    ilev=ilev,
+    in_ver=in_ver,
+    in_nover=in_nover,
+    out_ver=out_ver,
+)
 
 # Convert test data to tensors
 x_test_tensor = torch.tensor(x_test, dtype=torch.float64).T
diff --git a/newCAM_emulation/train.py b/newCAM_emulation/train.py
index 733d90b..690ed27 100644
--- a/newCAM_emulation/train.py
+++ b/newCAM_emulation/train.py
@@ -5,6 +5,7 @@
 
 # ruff: noqa: PLR0913
 
+
 def train_loop(dataloader, model, loss_fn, optimizer):
     """
     Training loop for a single epoch.

From 3bb2324d88d00c7c867bd36b7f3174220edc2c1a Mon Sep 17 00:00:00 2001
From: Surbhi Goel <surbhigoel77@github.com>
Date: Mon, 19 Aug 2024 12:08:23 +0100
Subject: [PATCH 30/35] Rearranged sequence of variables in the list

---
 newCAM_emulation/main.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/newCAM_emulation/main.py b/newCAM_emulation/main.py
index 9cce3bd..dbc02b9 100644
--- a/newCAM_emulation/main.py
+++ b/newCAM_emulation/main.py
@@ -24,17 +24,17 @@
 
 # variable information
 features = [
-    "PS",
-    "Z3",
     "U",
     "V",
     "T",
-    "lat",
-    "lon",
     "DSE",
-    "RHOI",
-    "NETDT",
     "NM",
+    "NETDT",
+    "Z3",
+    "RHOI",
+    "PS",
+    "lat",
+    "lon",
     "UTGWSPEC",
     "VTGWSPEC",
 ]

From dac7400553c1840bd88b4f30d2eafb7b4d73f6ee Mon Sep 17 00:00:00 2001
From: Surbhi Goel <surbhigoel77@github.com>
Date: Mon, 16 Sep 2024 13:05:42 +0100
Subject: [PATCH 31/35] Removed hard-coded target variables in data loader

---
 newCAM_emulation/loaddata.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/newCAM_emulation/loaddata.py b/newCAM_emulation/loaddata.py
index 2764cc5..17b203a 100644
--- a/newCAM_emulation/loaddata.py
+++ b/newCAM_emulation/loaddata.py
@@ -131,7 +131,7 @@ def data_loader(variable_names, normalized_data, ilev, in_ver, in_nover, out_ver
     dim_NNout = int(out_ver * ilev)
     x_train = np.zeros([dim_NN, Ncol])
     y_train = np.zeros([dim_NNout, Ncol])
-    target_var = ["UTGWSPEC", "VTGWSPEC"]
+    target_var = variable_names[-2:]
     y_index = 0
     x_index = 0
     for var_name, var_data in normalized_data.items():

From ff53367b9047cbd30c512ab4274f283596eae7ca Mon Sep 17 00:00:00 2001
From: Surbhi Goel <surbhigoel77@github.com>
Date: Mon, 16 Sep 2024 13:08:20 +0100
Subject: [PATCH 32/35] Moved Early Stopper class from Model

---
 newCAM_emulation/Model.py | 56 ---------------------------------------
 1 file changed, 56 deletions(-)

diff --git a/newCAM_emulation/Model.py b/newCAM_emulation/Model.py
index 1b16cba..2f1da62 100644
--- a/newCAM_emulation/Model.py
+++ b/newCAM_emulation/Model.py
@@ -58,59 +58,3 @@ def forward(self, X):
         return self.linear_stack(X)
 
 
-class EarlyStopper:
-    """
-    Early stopping utility to stop training when validation loss doesn't improve.
-
-    Parameters
-    ----------
-    patience : int, optional
-        Number of epochs to wait before stopping (default is 1).
-    min_delta : float, optional
-        Minimum change in the loss to qualify as an improvement (default is 0).
-
-    Attributes
-    ----------
-    patience : int
-        Number of epochs to wait before stopping.
-    min_delta : float
-        Minimum change in the monitored quantity to qualify as an improvement.
-    counter : int
-        Counter for the number of epochs without improvement.
-    min_validation_loss : float
-        Minimum validation loss recorded.
-    """
-
-    def __init__(self, patience=1, min_delta=0):
-        self.patience = patience
-        self.min_delta = min_delta
-        self.counter = 0
-        self.min_validation_loss = np.inf
-
-    def early_stop(self, validation_loss, model=None):
-        """
-        Check if training should be stopped early.
-
-        Parameters
-        ----------
-        validation_loss : float
-            Current validation loss.
-        model : nn.Module, optional
-            Model to save if validation loss improves (default is None).
-
-        Returns
-        -------
-        bool
-            True if training should be stopped, False otherwise.
-        """
-        if validation_loss < self.min_validation_loss:
-            self.min_validation_loss = validation_loss
-            self.counter = 0
-            # if model is not None:
-            #     # torch.save(model.state_dict(), 'conv_torch.pth')
-            #     torch.save(model.state_dict(), 'trained_models/weights_conv')
-        elif validation_loss > (self.min_validation_loss + self.min_delta):
-            self.counter += 1
-            if self.counter >= self.patience:
-                return True
-        return False

From 773b32e148830356cc1fabb4edbbd6ff49beecd7 Mon Sep 17 00:00:00 2001
From: Surbhi Goel <surbhigoel77@github.com>
Date: Mon, 16 Sep 2024 13:12:49 +0100
Subject: [PATCH 33/35] Moved Early Stopper class from Model

---
 newCAM_emulation/Model.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/newCAM_emulation/Model.py b/newCAM_emulation/Model.py
index 2f1da62..9d2c44d 100644
--- a/newCAM_emulation/Model.py
+++ b/newCAM_emulation/Model.py
@@ -56,5 +56,3 @@ def forward(self, X):
             Output tensor.
         """
         return self.linear_stack(X)
-
-

From 1a07f0915c9778d0b61ee5400b38f69b8095e976 Mon Sep 17 00:00:00 2001
From: Surbhi Goel <surbhigoel77@github.com>
Date: Mon, 16 Sep 2024 16:30:58 +0100
Subject: [PATCH 34/35] Added pre-commit-config file

---
 .pre-commit-config.yaml | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 .pre-commit-config.yaml

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..a3f1f16
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,6 @@
+repos:
+  - repo: https://github.com/charliermarsh/ruff-pre-commit
+    rev: v0.1.0
+    hooks:
+      - id: ruff
+        args: [--fix]  # This will auto-fix issues if possible
\ No newline at end of file

From aaa1d2c978379e979b0bd6151fdc9185f2255636 Mon Sep 17 00:00:00 2001
From: Surbhi Goel <surbhigoel77@github.com>
Date: Mon, 16 Sep 2024 16:33:59 +0100
Subject: [PATCH 35/35] Added pre-commit-config file

---
 .pre-commit-config.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index a3f1f16..086021b 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -3,4 +3,5 @@ repos:
     rev: v0.1.0
     hooks:
       - id: ruff
-        args: [--fix]  # This will auto-fix issues if possible
\ No newline at end of file
+        args: [--fix]  # This will auto-fix issues if possible
+        
\ No newline at end of file