diff --git a/examples/advanced/ex02_shuffle_data.py b/examples/advanced/ex02_shuffle_data.py index 7f67f428..4631a934 100644 --- a/examples/advanced/ex02_shuffle_data.py +++ b/examples/advanced/ex02_shuffle_data.py @@ -53,6 +53,14 @@ "Be_snapshot1.out.npy", data_path, ) +# On-the-fly snapshots can be added as well. +# data_shuffler.add_snapshot( +# "Be_snapshot2.info.json", +# data_path, +# "Be_snapshot2.out.npy", +# data_path, +# ) + # Shuffle the snapshots using the "shuffle_to_temporary" option. data_shuffler.shuffle_snapshots( diff --git a/examples/basic/ex01_train_network.py b/examples/basic/ex01_train_network.py index a2542302..600840b5 100644 --- a/examples/basic/ex01_train_network.py +++ b/examples/basic/ex01_train_network.py @@ -65,15 +65,25 @@ data_handler.add_snapshot( "Be_snapshot0.in.npy", data_path, "Be_snapshot0.out.npy", data_path, "tr" ) -# Add snapshots with "raw" (=MALA formatted) JSON, computation of descriptors -# will be performed "on-the-fly". data_handler.add_snapshot( - "Be_snapshot1.info.json", - data_path, - "Be_snapshot1.out.npy", - data_path, - "va", + "Be_snapshot1.in.npy", data_path, "Be_snapshot1.out.npy", data_path, "va" ) +# Add snapshots with "raw" (=MALA formatted) JSON, computation of descriptors +# will be performed "on-the-fly". +# data_handler.add_snapshot( +# "Be_snapshot0.info.json", +# data_path, +# "Be_snapshot0.out.npy", +# data_path, +# "tr", +# ) +# data_handler.add_snapshot( +# "Be_snapshot1.info.json", +# data_path, +# "Be_snapshot1.out.npy", +# data_path, +# "va", +# ) data_handler.prepare_data() diff --git a/examples/basic/ex02_test_network.py b/examples/basic/ex02_test_network.py index 8675a6dc..d02a5874 100644 --- a/examples/basic/ex02_test_network.py +++ b/examples/basic/ex02_test_network.py @@ -50,17 +50,33 @@ "te", calculation_output_file=os.path.join(data_path, "Be_snapshot2.info.json"), ) - -# Add snapshots with "raw" (=MALA formatted) JSON, computation of descriptors -# will be performed "on-the-fly". data_handler.add_snapshot( - "Be_snapshot3.info.json", + "Be_snapshot3.in.npy", data_path, "Be_snapshot3.out.npy", data_path, "te", calculation_output_file=os.path.join(data_path, "Be_snapshot3.info.json"), ) + +# Add snapshots with "raw" (=MALA formatted) JSON, computation of descriptors +# will be performed "on-the-fly". +# data_handler.add_snapshot( +# "Be_snapshot2.info.json", +# data_path, +# "Be_snapshot2.out.npy", +# data_path, +# "te", +# calculation_output_file=os.path.join(data_path, "Be_snapshot2.info.json"), +# ) +# data_handler.add_snapshot( +# "Be_snapshot3.info.json", +# data_path, +# "Be_snapshot3.out.npy", +# data_path, +# "te", +# calculation_output_file=os.path.join(data_path, "Be_snapshot3.info.json"), +# ) data_handler.prepare_data(reparametrize_scaler=False) diff --git a/examples/basic/ex03_preprocess_data.py b/examples/basic/ex03_preprocess_data.py index 8476ce49..4f8f014b 100644 --- a/examples/basic/ex03_preprocess_data.py +++ b/examples/basic/ex03_preprocess_data.py @@ -51,7 +51,9 @@ # more convenient *.json files that can be used in their stead. This saves # on disk space and makes the process more reproducible. # To only process parts of the data, omit/add descriptor_input*, target_input_* -# and simulation_output_* at your leisure. +# and simulation_output_* at your leisure. This is especially useful if you, +# e.g., do not need to convert the descriptor data, since it will be +# calculated on-the-fly during training. # Make sure to set the correct units - for QE, this should always be # 1/(Ry*Bohr^3). #################### @@ -60,6 +62,7 @@ outfile = os.path.join(data_path, "Be_snapshot0.out") ldosfile = os.path.join(data_path, "cubes/tmp.pp*Be_ldos.cube") +# Converting a snapshot for training on precomputed descriptor data. data_converter.add_snapshot( descriptor_input_type="espresso-out", descriptor_input_path=outfile, @@ -70,6 +73,16 @@ target_units="1/(Ry*Bohr^3)", ) +# Converting a snapshot for training with on-the-fly descriptor calculation. +# data_converter.add_snapshot( +# target_input_type=".cube", +# target_input_path=ldosfile, +# simulation_output_type="espresso-out", +# simulation_output_path=outfile, +# target_units="1/(Ry*Bohr^3)", +# ) + + #################### # 3. Converting the data # To convert the data we now simply have to call the convert_snapshot function. @@ -82,9 +95,11 @@ #################### data_converter.convert_snapshots( - descriptor_save_path="./", target_save_path="./", simulation_output_save_path="./", + # The next line should be omitted, if the descriptor data is to be + # calculated on-the-fly during training. + descriptor_save_path="./", naming_scheme="Be_snapshot*.npy", descriptor_calculation_kwargs={"working_directory": data_path}, )