Skip to content

Commit

Permalink
Much improved matpes explorer.
Browse files Browse the repository at this point in the history
  • Loading branch information
shyuep committed Nov 20, 2024
1 parent 861e1d9 commit cee872e
Show file tree
Hide file tree
Showing 7 changed files with 178 additions and 60 deletions.
76 changes: 38 additions & 38 deletions notebooks/Insert Calculations.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,27 @@
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"id": "1671fa59-c236-4401-8548-afaee3e85fcd",
"execution_count": null,
"id": "0",
"metadata": {},
"outputs": [],
"source": [
"from __future__ import annotations\n",
"\n",
"import os\n",
"from tqdm import tqdm\n",
"import warnings\n",
"\n",
"from monty.serialization import loadfn\n",
"from pymongo import MongoClient\n",
"from tqdm import tqdm\n",
"\n",
"warnings.simplefilter('ignore')"
"warnings.simplefilter(\"ignore\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "870765b8-719c-4405-b489-ba3f2733b10f",
"execution_count": null,
"id": "1",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -31,8 +32,8 @@
},
{
"cell_type": "code",
"execution_count": 4,
"id": "5e60c4e6-c868-4dc1-ac1e-ed25ceca96ba",
"execution_count": null,
"id": "2",
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -45,13 +46,13 @@
" Args:\n",
" functional (str): The name of the functional. This is used to locate\n",
" the appropriate file for loading the data and also defines the MongoDB collection name.\n",
" \n",
"\n",
" Raises:\n",
" FileNotFoundError: If the designated file does not exist or cannot be accessed.\n",
"\n",
" Example:\n",
" To create the database and indexes for a given functional 'pbe', run:\n",
" \n",
"\n",
" ```python\n",
" make_db(\"pbe\")\n",
" ```\n",
Expand All @@ -60,7 +61,7 @@
" -------------\n",
" 1. Load Data:\n",
" - Reads data from a gzipped JSON file for the specified `functional`.\n",
" \n",
"\n",
" 2. Extract & Process Fields:\n",
" - Each dataset entry is extracted and processed to include information such as:\n",
" - `matpesid`: A unique identifier for the material.\n",
Expand All @@ -72,12 +73,12 @@
" - `composition`: Dictionary depicting the element counts in the structure.\n",
" - `formation_energy_per_atom`: Energy per atom (derived from `formation_energy`).\n",
" - `structure`: The structure in dictionary format.\n",
" \n",
"\n",
" 3. Store Data in MongoDB:\n",
" - Deletes any existing records in the collection corresponding to `functional`.\n",
" - Inserts the processed records.\n",
" \n",
" 4. Create Indexes: \n",
"\n",
" 4. Create Indexes:\n",
" - Indexes are created on the following fields to optimize searching:\n",
" - `natoms`\n",
" - `elements`\n",
Expand All @@ -99,7 +100,7 @@
" - nelements: int\n",
" Number of distinct chemical elements.\n",
" - chemsys:\n",
" String representation of the elements in the chemical system, \n",
" String representation of the elements in the chemical system,\n",
" sorted alphabetically (e.g., 'H-O').\n",
" - formula: str\n",
" The reduced chemical formula of the material (e.g., 'H2O').\n",
Expand All @@ -109,11 +110,11 @@
" Formation energy per atom for the material (extracted from `formation_energy`).\n",
" - structure: dict\n",
" The detailed structure of the material in dictionary format.\n",
" \n",
"\n",
" Indexes:\n",
" --------\n",
" The created MongoDB indexes optimize the following fields:\n",
" \n",
"\n",
" - `natoms`: Number of atoms per structure.\n",
" - `elements`: Chemical elements present in the structure.\n",
" - `nelements`: Number of distinct elements in the structure.\n",
Expand All @@ -127,15 +128,14 @@
" - The JSON file path is specific to the user's system configuration.\n",
"\n",
" \"\"\"\n",
" \n",
" raw = loadfn(os.path.expanduser(f\"~/Desktop/2024_11_18_MatPES-20240214-{functional}-training-data.json.gz\"))\n",
" data = []\n",
" \n",
"\n",
" for k, v in tqdm(raw.items()):\n",
" # Combine IDs and structure information\n",
" d = {\"matpesid\": k} | v\n",
" comp = d[\"structure\"].composition\n",
" \n",
"\n",
" # Populate additional fields based on composition\n",
" d[\"natoms\"] = len(d[\"structure\"])\n",
" d[\"elements\"] = list(comp.chemical_system_set)\n",
Expand All @@ -144,37 +144,37 @@
" d[\"formula\"] = comp.reduced_formula\n",
" d[\"composition\"] = {el.symbol: amt for el, amt in comp.items()}\n",
" d[\"structure\"] = d[\"structure\"].as_dict()\n",
" \n",
"\n",
" # Restructure formation energy data\n",
" d[\"formation_energy_per_atom\"] = d[\"formation_energy\"]\n",
" del d[\"formation_energy\"]\n",
" \n",
"\n",
" # Add processed entry to list\n",
" data.append(d)\n",
" \n",
"\n",
" # Get collection from DB and clear old data\n",
" collection = db[functional]\n",
" collection.delete_many({})\n",
" \n",
"\n",
" # new data\n",
" collection.insert_many(data)\n",
" \n",
"\n",
" # Create indexes for optimized query performance\n",
" for k in [\"natoms\", \"elements\", \"nelements\", \"chemsys\", \"formula\", \"matpesid\"]:\n",
" collection.create_index(k)\n"
" collection.create_index(k)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e7eb2367-d29b-4de2-91bb-489906689761",
"id": "3",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" 62%|█████████████████████████████████████████████████▎ | 271161/434712 [01:07<01:16, 2149.16it/s]"
"100%|███████████████████████████████████████████████████████████████████████████████| 434712/434712 [03:45<00:00, 1930.57it/s]\n"
]
}
],
Expand All @@ -185,20 +185,20 @@
{
"cell_type": "code",
"execution_count": null,
"id": "d167bd6c-197a-4e95-9267-c7b10b0ad6ef",
"id": "4",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|███████████████████████████████████████████████████████████████████████████████| 387897/387897 [02:50<00:00, 2278.72it/s]\n"
]
}
],
"source": [
"make_db(\"r2SCAN\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "89b44d7f-b5e1-4f0f-bff8-f6926a6240c4",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
11 changes: 9 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,16 @@ dependencies = [
"pymatgen",
"dash",
"dash_bootstrap_components",
"pymatviz","pymongo"
"pymongo"
]
version = "2024.11.13"
version = "0.0.1"

[tool.setuptools.packages.find]
where = ["src"]
include = ["matpes", "matpes.*"]

[project.scripts]
matpes_explorer = "matpes.ui:main"

[tool.versioningit.vcs]
method = "git"
Expand Down
7 changes: 0 additions & 7 deletions requirements-ci.txt

This file was deleted.

1 change: 0 additions & 1 deletion requirements.txt

This file was deleted.

Empty file added src/matpes/__init__.py
Empty file.
30 changes: 18 additions & 12 deletions app.py → src/matpes/ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@
import dash_bootstrap_components as dbc
import pandas as pd
import plotly.express as px
import pymatviz as pmv
import plotly.figure_factory as ff
from dash import Dash, Input, Output, State, callback, dcc, html
from pymatgen.core import Element
from pymongo import MongoClient

from matpes.utils import get_pt_heatmap

FUNCTIONALS = ("PBE", "r2SCAN")

# Set up MongoDB client and database
Expand Down Expand Up @@ -56,7 +58,7 @@ def get_data(functional, el, chemsys):

# Initialize the Dash app with a Bootstrap theme
external_stylesheets = [dbc.themes.CERULEAN]
app = Dash(__name__, external_stylesheets=external_stylesheets)
app = Dash("MatPES Explorer", external_stylesheets=external_stylesheets)

# Define the app layout
app.layout = dbc.Container(
Expand All @@ -67,7 +69,7 @@ def get_data(functional, el, chemsys):
dbc.Col(
[
html.Label("Functional"),
dcc.RadioItems(
dcc.Dropdown(
options=[{"label": f, "value": f} for f in FUNCTIONALS], value="PBE", id="functional"
),
],
Expand Down Expand Up @@ -119,6 +121,13 @@ def get_data(functional, el, chemsys):
)


def get_dist_plot(data, label, nbins=100):
fig = ff.create_distplot([data], [label], show_rug=False)

fig.update_layout(xaxis=dict(title=label), showlegend=False)
return fig


# Define callback to update the heatmap based on selected functional
@callback(
[
Expand All @@ -135,16 +144,13 @@ def update_graph(functional, el_filter, chemsys_filter):
df = get_data(functional, el_filter, chemsys_filter)
el_count = {el.symbol: 0 for el in Element}
el_count.update(collections.Counter(itertools.chain(*df["elements"])))
heatmap_figure = pmv.ptable_heatmap_plotly(el_count)
heatmap_figure = get_pt_heatmap(el_count, label="Count", log=True)
return (
heatmap_figure,
px.histogram(
df, x="cohesive_energy_per_atom", labels={"cohesive_energy_per_atom": "Cohesive Energy per Atom (eV/atom)"}
),
px.histogram(
df,
x="formation_energy_per_atom",
labels={"formation_energy_per_atom": "Formation Energy per Atom (eV/atom)"},
get_dist_plot(df["cohesive_energy_per_atom"], "Cohesive Energy per Atom (eV/atom)"),
get_dist_plot(
df["formation_energy_per_atom"].dropna(),
"Formation Energy per Atom (eV/atom)",
),
px.histogram(df, x="natoms"),
px.histogram(df, x="nelements"),
Expand Down Expand Up @@ -174,5 +180,5 @@ def download(n_clicks, functional, el_filter, chemsys_filter):


# Run the app
if __name__ == "__main__":
def main():
app.run(debug=True)
Loading

0 comments on commit cee872e

Please sign in to comment.