Add chghnet mlearn.

usnistgov · Jan 29, 2024 · 0da0ac1 · 0da0ac1
1 parent 6c3f0b6
commit 0da0ac1
Show file tree

Hide file tree

Showing 6 changed files with 1,494 additions and 0 deletions.
diff --git a/jarvis_leaderboard/contributions/chgnet_mlearn/AI-MLFF-energy-mlearn_Si-test-mae.csv.zip b/jarvis_leaderboard/contributions/chgnet_mlearn/AI-MLFF-energy-mlearn_Si-test-mae.csv.zip
diff --git a/...is_leaderboard/contributions/chgnet_mlearn/AI-MLFF-forces-mlearn_Si-test-multimae.csv.zip b/...is_leaderboard/contributions/chgnet_mlearn/AI-MLFF-forces-mlearn_Si-test-multimae.csv.zip
diff --git a/jarvis_leaderboard/contributions/chgnet_mlearn/Train_CHGNet.ipynb b/jarvis_leaderboard/contributions/chgnet_mlearn/Train_CHGNet.ipynb
diff --git a/jarvis_leaderboard/contributions/chgnet_mlearn/metadata.json b/jarvis_leaderboard/contributions/chgnet_mlearn/metadata.json
@@ -0,0 +1,29 @@
+{
+    "model_name": "CHGNET",
+    "project_url": "https://github.com/CederGroupHub/chgnet",
+    "date_submitted": "01-29-2024",
+    "author_email": "[email protected]",
+    "database_version": "12-12-2022",
+    "team_name": "CHGNET",
+    "time_taken_seconds": {
+        "AI-MLFF-energy-mlearn_Cu-test-mae.csv.zip": "",
+        "AI-MLFF-energy-mlearn_Ge-test-mae.csv.zip": "",
+        "AI-MLFF-energy-mlearn_Li-test-mae.csv.zip": "",
+        "AI-MLFF-energy-mlearn_Mo-test-mae.csv.zip": "",
+        "AI-MLFF-energy-mlearn_Ni-test-mae.csv.zip": "",
+        "AI-MLFF-energy-mlearn_Si-test-mae.csv.zip": "",
+        "AI-MLFF-forces-mlearn_Cu-test-multimae.csv.zip": "",
+        "AI-MLFF-forces-mlearn_Ge-test-multimae.csv.zip": "",
+        "AI-MLFF-forces-mlearn_Li-test-multimae.csv.zip": "",
+        "AI-MLFF-forces-mlearn_Mo-test-multimae.csv.zip": "",
+        "AI-MLFF-forces-mlearn_Ni-test-multimae.csv.zip": "",
+        "AI-MLFF-forces-mlearn_Si-test-multimae.csv.zip": ""
+    },
+    "language": "python",
+    "os": "linux",
+    "software_used": "jarvis-tools,numpy,scipy,torch,m3gnet",
+    "hardware_used": "nisaba-cluster at NIST, V100 Tesla GPU",
+    "git_url": [
+        "https://github.com/CederGroupHub/chgnet"
+    ]
+}
diff --git a/jarvis_leaderboard/contributions/chgnet_mlearn/run.py b/jarvis_leaderboard/contributions/chgnet_mlearn/run.py
@@ -0,0 +1,278 @@
+# -*- coding: utf-8 -*-
+"""Train_CHGNet.ipynb
+
+Automatically generated by Colaboratory.
+
+Original file is located at
+    https://colab.research.google.com/github/knc6/jarvis-tools-notebooks/blob/master/jarvis-tools-notebooks/Train_CHGNet.ipynb
+"""
+
+pip install -q chgnet
+
+import os
+if not os.path.exists('jarvis_leaderboard'):
+  !git clone https://github.com/usnistgov/jarvis_leaderboard.git
+os.chdir('jarvis_leaderboard')
+!pip install -e .
+
+!pip install --upgrade git+https://gitlab.com/ase/ase.git@optimizer-tests
+
+!pip install numpy==1.23.5
+# then restart session
+
+!wget https://figshare.com/ndownloader/files/40357663 -O mlearn.json.zip
+
+import json,zipfile
+mlearn = json.loads(
+        zipfile.ZipFile("mlearn.json.zip").read(
+            "mlearn.json"
+        )
+    )
+
+import os
+from jarvis.core.atoms import Atoms
+import json
+import shutil
+import warnings
+os.chdir('/content')
+elements = ['Si']
+max_epochs=100
+models={}
+for element in elements:
+
+    benchmark_energies = (
+        "jarvis_leaderboard/jarvis_leaderboard/benchmarks/AI/MLFF/mlearn_"
+        + element
+        + "_energy.json.zip"
+    )
+    temp_energies = benchmark_energies.split("/")[-1].split(".zip")[0]
+    energies = json.loads(
+        zipfile.ZipFile(benchmark_energies).read(temp_energies)
+    )
+    train_ids = list(energies["train"].keys())
+    test_ids = list(energies["test"].keys())
+
+    train_energies=[]
+    train_forces=[]
+    train_stresses=[]
+    train_structures=[]
+    for i in mlearn:
+        if i["jid"] in train_ids:
+            # print(i)
+
+            train_forces.append(i["forces"])
+            train_stresses.append(i['stresses'])
+            atoms = Atoms.from_dict(i["atoms"])
+            train_energies.append(i["energy"]/atoms.num_atoms)
+            train_structures.append(atoms.pymatgen_converter())
+
+    test_energies=[]
+    test_forces=[]
+    test_stresses=[]
+    test_structures=[]
+    tids = []
+    for i in mlearn:
+        if i["jid"] in test_ids:
+            # print(i)
+
+            test_forces.append(i["forces"])
+            test_stresses.append(i['stresses'])
+            atoms = Atoms.from_dict(i["atoms"])
+            test_energies.append(i["energy"]/atoms.num_atoms)
+            test_structures.append(atoms.pymatgen_converter())
+            tids.append(i['jid'])
+
+len(test_structures)
+
+# Commented out IPython magic to ensure Python compatibility.
+# %%time
+# from chgnet.data.dataset import StructureData, get_train_val_test_loader
+# from chgnet.trainer import Trainer
+# from torch.utils.data import DataLoader, Dataset
+# from chgnet.model.model import CHGNet
+# from chgnet.data.dataset import collate_graphs
+# chgnet = CHGNet()
+# 
+# batch_size = 2
+# train_dataset = StructureData(
+#     structures=train_structures,
+#     energies=train_energies,
+#     forces=train_forces,
+#     #stresses=train_stresses,
+#     # magmoms=None,
+# )
+# test_dataset = StructureData(
+#     structures=test_structures,
+#     energies=test_energies,
+#     forces=test_forces,
+#     #stresses=test_stresses,
+#     # magmoms=None,
+# )
+# train_loader = DataLoader(
+#     train_dataset,
+#     batch_size=batch_size,
+#     collate_fn=collate_graphs,
+#     num_workers=0,
+#     pin_memory=False,
+# )
+# val_loader = DataLoader(
+#     test_dataset,
+#     batch_size=batch_size,
+#     collate_fn=collate_graphs,
+#     num_workers=0,
+#     pin_memory=False,
+# )
+# test_loader = DataLoader(
+#     test_dataset,
+#     batch_size=batch_size,
+#     collate_fn=collate_graphs,
+#     num_workers=0,
+#     pin_memory=False,
+# )
+# trainer = Trainer(
+#     model=chgnet,
+#     targets="ef",
+#     optimizer="Adam",
+#     criterion="MSE",
+#     learning_rate=1e-2,
+#     epochs=100,
+#     use_device="cuda",
+# )
+# 
+# trainer.train(train_loader, val_loader, test_loader)
+
+best_model = trainer.best_model
+
+out = best_model.predict_structure(test_structures[0])
+
+out.keys()
+
+import pandas as pd
+import glob
+import numpy as np
+def get_chgnet_pred(atoms=None,model=None):
+    pmg = atoms.pymatgen_converter()
+    out = model.predict_structure(pmg)
+    return out['e']*atoms.num_atoms,out['f'],out['s']
+
+best_model = trainer.best_model
+df = pd.DataFrame(mlearn)
+for i in glob.glob("jarvis_leaderboard/jarvis_leaderboard/benchmarks/AI/MLFF/*energy*.zip"):
+
+    if "mlearn" in i and element in i:
+        fname_e = (
+            "AI-MLFF-energy-"
+            + i.split("/")[-1].split("_energy.json.zip")[0]
+            + "-test-mae.csv"
+        )
+        fname_f = (
+            "AI-MLFF-forces-"
+            + i.split("/")[-1].split("_energy.json.zip")[0]
+            + "-test-multimae.csv"
+        )
+        fname_s = (
+            "AI-MLFF-stresses-"
+            + i.split("/")[-1].split("_energy.json.zip")[0]
+            + "-test-multimae.csv"
+        )
+        f_e = open(fname_e, "w")
+        f_f = open(fname_f, "w")
+        # f_s = open(fname_s, "w")
+
+        f_e.write("id,target,prediction\n")
+        f_f.write("id,target,prediction\n")
+        # f_s.write("id,prediction\n")
+        #
+        print(i)
+        dat = json.loads(
+            zipfile.ZipFile(i).read(
+                i.split("/")[-1].split(".zip")[0]
+            )
+        )
+        print(dat["test"])
+        for key, val in dat["test"].items():
+            entry = df[df["jid"] == key]
+            atoms = Atoms.from_dict(entry.atoms.values[0])
+            # print(key,val,df[df['jid']==key],atoms)
+            # energy,forces=get_alignn_forces(atoms)
+            energy, forces, stress = get_chgnet_pred(
+                model=best_model, atoms=atoms
+            )
+            print(key, val, energy, atoms.num_atoms)
+            line = (
+                key
+                + ","
+                + str(entry.energy.values[0])
+                + ","
+                + str(energy)
+                + "\n"
+            )
+            f_e.write(line)
+            line = (
+                key
+                + ","
+                + str(
+                    ";".join(
+                        map(
+                            str,
+                            np.array(
+                                entry.forces.values[0]
+                            ).flatten(),
+                        )
+                    )
+                )
+                + ","
+                + str(
+                    ";".join(map(str, np.array(forces).flatten()))
+                )
+                + "\n"
+            )
+            f_f.write(line)
+            # line = (
+            #     key
+            #     + ","
+            #     + str(";".join(map(str, np.array(stress).flatten())))
+            #     + "\n"
+            # )
+            # f_s.write(line)
+        f_e.close()
+        f_f.close()
+        # f_s.close()
+        zname = fname_e + ".zip"
+        with zipfile.ZipFile(zname, "w") as myzip:
+            myzip.write(fname_e)
+
+        zname = fname_f + ".zip"
+        with zipfile.ZipFile(zname, "w") as myzip:
+            myzip.write(fname_f)
+
+        # zname = fname_s + ".zip"
+        # with zipfile.ZipFile(zname, "w") as myzip:
+        #     myzip.write(fname_s)
+
+# Commented out IPython magic to ensure Python compatibility.
+en_df = pd.read_csv('AI-MLFF-energy-mlearn_Si-test-mae.csv.zip')
+from sklearn.metrics import mean_absolute_error
+print(mean_absolute_error(en_df['target'],en_df['prediction']))
+# %matplotlib inline
+import matplotlib.pyplot as plt
+plt.plot(en_df['target'],en_df['prediction'],'.')
+plt.xlabel('DFT energy(eV)')
+plt.ylabel('FF energy(eV)')
+
+f_df = pd.read_csv('AI-MLFF-forces-mlearn_Si-test-multimae.csv.zip')
+target = np.concatenate([np.array(i.split(';'),dtype='float') for i in f_df['target'].values])
+pred= np.concatenate([np.array(i.split(';'),dtype='float') for i in f_df['prediction'].values])
+print(mean_absolute_error(target,pred))
+plt.plot(target,pred,'.')
+plt.xlabel('DFT forces(eV/A)')
+plt.ylabel('FF forces(eV/A)')
+
+
+
+
+
+
+
+
+
diff --git a/jarvis_leaderboard/contributions/chgnet_mlearn/run.sh b/jarvis_leaderboard/contributions/chgnet_mlearn/run.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+# python jid_vac_alignn.py
+python make_pred.py