diff --git a/newton-4.00/applications/media/marineRocks1.fbx b/newton-4.00/applications/media/marineRocks1.fbx index 77c5f2f51..c877c5e7f 100644 Binary files a/newton-4.00/applications/media/marineRocks1.fbx and b/newton-4.00/applications/media/marineRocks1.fbx differ diff --git a/newton-4.00/applications/media/marineRocks2.fbx b/newton-4.00/applications/media/marineRocks2.fbx index 9e9b521b1..f52a8b2b2 100644 Binary files a/newton-4.00/applications/media/marineRocks2.fbx and b/newton-4.00/applications/media/marineRocks2.fbx differ diff --git a/newton-4.00/applications/ndSandbox/demos/ndAdvancedIndustrialRobot.cpp b/newton-4.00/applications/ndSandbox/demos/ndAdvancedIndustrialRobot.cpp index 630d4641d..c765e343c 100644 --- a/newton-4.00/applications/ndSandbox/demos/ndAdvancedIndustrialRobot.cpp +++ b/newton-4.00/applications/ndSandbox/demos/ndAdvancedIndustrialRobot.cpp @@ -204,7 +204,7 @@ namespace ndAdvancedRobot } ndController(const ndController& src) - :ndBrainAgentContinuePolicyGradient(src.m_actor) + :ndBrainAgentContinuePolicyGradient(src.m_policy) ,m_robot(nullptr) { } @@ -1078,7 +1078,7 @@ namespace ndAdvancedRobot hyperParameters.m_numberOfObservations = ND_AGENT_INPUT_SIZE; m_master = ndSharedPtr(new ndBrainAgentContinuePolicyGradient_TrainerMaster(hyperParameters)); - m_bestActor = ndSharedPtr(new ndBrain(*m_master->GetActor())); + m_bestActor = ndSharedPtr(new ndBrain(*m_master->GetPolicyNetwork())); snprintf(name, sizeof(name), "%s.dnn", CONTROLLER_NAME); m_master->SetName(name); @@ -1088,12 +1088,12 @@ namespace ndAdvancedRobot snprintf(name, sizeof(name), "%s_critic.dnn", CONTROLLER_NAME); ndGetWorkingFileName(name, fileName); ndSharedPtr critic(ndBrainLoad::Load(fileName)); - m_master->GetCritic()->CopyFrom(**critic); + m_master->GetValueNetwork()->CopyFrom(**critic); snprintf(name, sizeof(name), "%s_actor.dnn", CONTROLLER_NAME); ndGetWorkingFileName(name, fileName); ndSharedPtr actor(ndBrainLoad::Load(fileName)); - m_master->GetActor()->CopyFrom(**actor); + m_master->GetPolicyNetwork()->CopyFrom(**actor); #endif auto SpawnModel = [this, scene, &visualMesh, floor](const ndMatrix& matrix) @@ -1215,7 +1215,7 @@ namespace ndAdvancedRobot if (m_lastEpisode != m_master->GetEposideCount()) { m_maxScore = rewardTrajectory; - m_bestActor->CopyFrom(*m_master->GetActor()); + m_bestActor->CopyFrom(*m_master->GetPolicyNetwork()); ndExpandTraceMessage("best actor episode: %u\treward %f\ttrajectoryFrames: %f\n", m_master->GetEposideCount(), 100.0f * m_master->GetAverageScore() / m_horizon, m_master->GetAverageFrames()); m_lastEpisode = m_master->GetEposideCount(); } @@ -1237,13 +1237,13 @@ namespace ndAdvancedRobot m_saveScore = ndFloor(rewardTrajectory) + 2.0f; // save partial controller in case of crash - ndBrain* const actor = m_master->GetActor(); + ndBrain* const actor = m_master->GetPolicyNetwork(); char name[256]; snprintf(name, sizeof(name), "%s_actor.dnn", CONTROLLER_NAME); ndGetWorkingFileName(name, fileName); actor->SaveToFile(fileName); - ndBrain* const critic = m_master->GetCritic(); + ndBrain* const critic = m_master->GetValueNetwork(); snprintf(name, sizeof(name), "%s_critic.dnn", CONTROLLER_NAME); ndGetWorkingFileName(name, fileName); critic->SaveToFile(fileName); @@ -1254,9 +1254,9 @@ namespace ndAdvancedRobot { char fileName[1024]; m_modelIsTrained = true; - m_master->GetActor()->CopyFrom(*(*m_bestActor)); + m_master->GetPolicyNetwork()->CopyFrom(*(*m_bestActor)); ndGetWorkingFileName(m_master->GetName().GetStr(), fileName); - m_master->GetActor()->SaveToFile(fileName); + m_master->GetPolicyNetwork()->SaveToFile(fileName); ndExpandTraceMessage("saving to file: %s\n", fileName); ndExpandTraceMessage("training complete\n"); ndUnsigned64 timer = ndGetTimeInMicroseconds() - m_timer; diff --git a/newton-4.00/applications/ndSandbox/demos/ndCartpoleContinue.cpp b/newton-4.00/applications/ndSandbox/demos/ndCartpoleContinue.cpp index 624a00321..4fbc47329 100644 --- a/newton-4.00/applications/ndSandbox/demos/ndCartpoleContinue.cpp +++ b/newton-4.00/applications/ndSandbox/demos/ndCartpoleContinue.cpp @@ -77,7 +77,7 @@ namespace ndCarpole_1 } ndController(const ndController& src) - :ndBrainAgentContinuePolicyGradient(src.m_actor) + :ndBrainAgentContinuePolicyGradient(src.m_policy) ,m_robot(nullptr) { } @@ -330,7 +330,7 @@ namespace ndCarpole_1 hyperParameters.m_discountFactor = ndReal(m_discountFactor); m_master = ndSharedPtr(new ndBrainAgentContinuePolicyGradient_TrainerMaster(hyperParameters)); - m_bestActor = ndSharedPtr< ndBrain>(new ndBrain(*m_master->GetActor())); + m_bestActor = ndSharedPtr< ndBrain>(new ndBrain(*m_master->GetPolicyNetwork())); snprintf(name, sizeof(name), "%s.dnn", CONTROLLER_NAME); m_master->SetName(name); @@ -340,12 +340,12 @@ namespace ndCarpole_1 snprintf(name, sizeof(name), "%s_critic.dnn", CONTROLLER_NAME); ndGetWorkingFileName(name, fileName); ndSharedPtr critic(ndBrainLoad::Load(fileName)); - m_master->GetCritic()->CopyFrom(**critic); + m_master->GetValueNetwork()->CopyFrom(**critic); snprintf(name, sizeof(name), "%s_actor.dnn", CONTROLLER_NAME); ndGetWorkingFileName(name, fileName); ndSharedPtr actor(ndBrainLoad::Load(fileName)); - m_master->GetActor()->CopyFrom(**actor); + m_master->GetPolicyNetwork()->CopyFrom(**actor); #endif ndWorld* const world = scene->GetWorld(); @@ -471,7 +471,7 @@ namespace ndCarpole_1 if (m_lastEpisode != m_master->GetEposideCount()) { m_maxScore = rewardTrajectory; - m_bestActor->CopyFrom(*m_master->GetActor()); + m_bestActor->CopyFrom(*m_master->GetPolicyNetwork()); ndExpandTraceMessage("best actor episode: %d\treward %f\ttrajectoryFrames: %f\n", m_master->GetEposideCount(), 100.0f * m_master->GetAverageScore() / m_horizon, m_master->GetAverageFrames()); m_lastEpisode = m_master->GetEposideCount(); } @@ -483,13 +483,13 @@ namespace ndCarpole_1 m_saveScore = ndFloor(rewardTrajectory) + 2.0f; // save partial controller in case of crash - ndBrain* const actor = m_master->GetActor(); + ndBrain* const actor = m_master->GetPolicyNetwork(); char name[256]; snprintf(name, sizeof(name), "%s_actor.dnn", CONTROLLER_NAME); ndGetWorkingFileName(name, fileName); actor->SaveToFile(fileName); - ndBrain* const critic = m_master->GetCritic(); + ndBrain* const critic = m_master->GetValueNetwork(); snprintf(name, sizeof(name), "%s_critic.dnn", CONTROLLER_NAME); ndGetWorkingFileName(name, fileName); critic->SaveToFile(fileName); @@ -510,9 +510,9 @@ namespace ndCarpole_1 { char fileName[1024]; m_modelIsTrained = true; - m_master->GetActor()->CopyFrom(*(*m_bestActor)); + m_master->GetPolicyNetwork()->CopyFrom(*(*m_bestActor)); ndGetWorkingFileName(m_master->GetName().GetStr(), fileName); - m_master->GetActor()->SaveToFile(fileName); + m_master->GetPolicyNetwork()->SaveToFile(fileName); ndExpandTraceMessage("saving to file: %s\n", fileName); ndExpandTraceMessage("training complete\n"); ndUnsigned64 timer = ndGetTimeInMicroseconds() - m_timer; diff --git a/newton-4.00/applications/ndSandbox/demos/ndCartpoleDiscrete.cpp b/newton-4.00/applications/ndSandbox/demos/ndCartpoleDiscrete.cpp index e8b022fc3..b545ccef6 100644 --- a/newton-4.00/applications/ndSandbox/demos/ndCartpoleDiscrete.cpp +++ b/newton-4.00/applications/ndSandbox/demos/ndCartpoleDiscrete.cpp @@ -123,7 +123,7 @@ namespace ndCarpole_0 } ndController(const ndController& src) - :ndBrainAgentDiscretePolicyGradient(src.m_actor) + :ndBrainAgentDiscretePolicyGradient(src.m_policy) ,m_robot(nullptr) { } @@ -369,7 +369,7 @@ namespace ndCarpole_0 hyperParameters.m_discountFactor = ndReal(m_discountFactor); m_master = ndSharedPtr(new ndBrainAgentDiscretePolicyGradient_TrainerMaster(hyperParameters)); - m_bestActor = ndSharedPtr< ndBrain>(new ndBrain(*m_master->GetActor())); + m_bestActor = ndSharedPtr< ndBrain>(new ndBrain(*m_master->GetPolicyNetwork())); snprintf(name, sizeof(name), "%s.dnn", CONTROLLER_NAME); m_master->SetName(name); @@ -379,12 +379,12 @@ namespace ndCarpole_0 snprintf(name, sizeof(name), "%s_critic.dnn", CONTROLLER_NAME); ndGetWorkingFileName(name, fileName); ndSharedPtr critic(ndBrainLoad::Load(fileName)); - m_master->GetCritic()->CopyFrom(**critic); + m_master->GetValueNetwork()->CopyFrom(**critic); snprintf(name, sizeof(name), "%s_actor.dnn", CONTROLLER_NAME); ndGetWorkingFileName(name, fileName); ndSharedPtr actor(ndBrainLoad::Load(fileName)); - m_master->GetActor()->CopyFrom(**actor); + m_master->GetPolicyNetwork()->CopyFrom(**actor); #endif ndWorld* const world = scene->GetWorld(); @@ -510,7 +510,7 @@ namespace ndCarpole_0 if (m_lastEpisode != m_master->GetEposideCount()) { m_maxScore = rewardTrajectory; - m_bestActor->CopyFrom(*m_master->GetActor()); + m_bestActor->CopyFrom(*m_master->GetPolicyNetwork()); ndExpandTraceMessage("best actor episode: %d\treward %f\ttrajectoryFrames: %f\n", m_master->GetEposideCount(), 100.0f * m_master->GetAverageScore() / m_horizon, m_master->GetAverageFrames()); m_lastEpisode = m_master->GetEposideCount(); } @@ -522,13 +522,13 @@ namespace ndCarpole_0 m_saveScore = ndFloor(rewardTrajectory) + 2.0f; // save partial controller in case of crash - ndBrain* const actor = m_master->GetActor(); + ndBrain* const actor = m_master->GetPolicyNetwork(); char name[256]; snprintf(name, sizeof(name), "%s_actor.dnn", CONTROLLER_NAME); ndGetWorkingFileName(name, fileName); actor->SaveToFile(fileName); - ndBrain* const critic = m_master->GetCritic(); + ndBrain* const critic = m_master->GetValueNetwork(); snprintf(name, sizeof(name), "%s_critic.dnn", CONTROLLER_NAME); ndGetWorkingFileName(name, fileName); critic->SaveToFile(fileName); @@ -549,9 +549,9 @@ namespace ndCarpole_0 { char fileName[1024]; m_modelIsTrained = true; - m_master->GetActor()->CopyFrom(*(*m_bestActor)); + m_master->GetPolicyNetwork()->CopyFrom(*(*m_bestActor)); ndGetWorkingFileName(m_master->GetName().GetStr(), fileName); - m_master->GetActor()->SaveToFile(fileName); + m_master->GetPolicyNetwork()->SaveToFile(fileName); ndExpandTraceMessage("saving to file: %s\n", fileName); ndExpandTraceMessage("training complete\n"); ndUnsigned64 timer = ndGetTimeInMicroseconds() - m_timer; diff --git a/newton-4.00/applications/ndSandbox/demos/ndQuadrupedTest_1.cpp b/newton-4.00/applications/ndSandbox/demos/ndQuadrupedTest_1.cpp index 833fd25e0..cbfbd7624 100644 --- a/newton-4.00/applications/ndSandbox/demos/ndQuadrupedTest_1.cpp +++ b/newton-4.00/applications/ndSandbox/demos/ndQuadrupedTest_1.cpp @@ -229,7 +229,7 @@ namespace ndQuadruped_1 } ndController(const ndController& src) - :ndBrainAgentContinuePolicyGradient(src.m_actor) + :ndBrainAgentContinuePolicyGradient(src.m_policy) , m_robot(nullptr) { } @@ -1275,7 +1275,7 @@ namespace ndQuadruped_1 hyperParameters.m_numberOfObservations = ND_AGENT_INPUT_SIZE; m_master = ndSharedPtr(new ndBrainAgentContinuePolicyGradient_TrainerMaster(hyperParameters)); - m_bestActor = ndSharedPtr(new ndBrain(*m_master->GetActor())); + m_bestActor = ndSharedPtr(new ndBrain(*m_master->GetPolicyNetwork())); m_master->SetName(CONTROLLER_NAME); auto SpawnModel = [this, scene](const ndMatrix& matrix, bool debug) @@ -1412,7 +1412,7 @@ namespace ndQuadruped_1 if (m_lastEpisode != m_master->GetEposideCount()) { m_maxScore = rewardTrajectory; - m_bestActor->CopyFrom(*m_master->GetActor()); + m_bestActor->CopyFrom(*m_master->GetPolicyNetwork()); ndExpandTraceMessage("best actor episode: %d\treward %f\ttrajectoryFrames: %f\n", m_master->GetEposideCount(), 100.0f * m_master->GetAverageScore() / m_horizon, m_master->GetAverageFrames()); m_lastEpisode = m_master->GetEposideCount(); } @@ -1433,9 +1433,9 @@ namespace ndQuadruped_1 { char fileName[1024]; m_modelIsTrained = true; - m_master->GetActor()->CopyFrom(*(*m_bestActor)); + m_master->GetPolicyNetwork()->CopyFrom(*(*m_bestActor)); ndGetWorkingFileName(m_master->GetName().GetStr(), fileName); - m_master->GetActor()->SaveToFile(fileName); + m_master->GetPolicyNetwork()->SaveToFile(fileName); ndExpandTraceMessage("saving to file: %s\n", fileName); ndExpandTraceMessage("training complete\n"); ndUnsigned64 timer = ndGetTimeInMicroseconds() - m_timer; diff --git a/newton-4.00/applications/ndSandbox/demos/ndQuadrupedTest_2.cpp b/newton-4.00/applications/ndSandbox/demos/ndQuadrupedTest_2.cpp index f3e0511a7..ceaec71e5 100644 --- a/newton-4.00/applications/ndSandbox/demos/ndQuadrupedTest_2.cpp +++ b/newton-4.00/applications/ndSandbox/demos/ndQuadrupedTest_2.cpp @@ -243,7 +243,7 @@ namespace ndQuadruped_2 } ndController(const ndController& src) - :ndBrainAgentContinuePolicyGradient(src.m_actor) + :ndBrainAgentContinuePolicyGradient(src.m_policy) ,m_robot(nullptr) { } @@ -1418,7 +1418,7 @@ namespace ndQuadruped_2 hyperParameters.m_numberOfObservations = ND_AGENT_INPUT_SIZE; m_master = ndSharedPtr(new ndBrainAgentContinuePolicyGradient_TrainerMaster(hyperParameters)); - m_bestActor = ndSharedPtr(new ndBrain(*m_master->GetActor())); + m_bestActor = ndSharedPtr(new ndBrain(*m_master->GetPolicyNetwork())); m_master->SetName(CONTROLLER_NAME); ndModelArticulation* const visualModel = CreateModel(scene, matrix); @@ -1557,7 +1557,7 @@ namespace ndQuadruped_2 if (m_lastEpisode != m_master->GetEposideCount()) { m_maxScore = rewardTrajectory; - m_bestActor->CopyFrom(*m_master->GetActor()); + m_bestActor->CopyFrom(*m_master->GetPolicyNetwork()); ndExpandTraceMessage("best actor episode: %d\treward %f\ttrajectoryFrames: %f\n", m_master->GetEposideCount(), 100.0f * m_master->GetAverageScore() / m_horizon, m_master->GetAverageFrames()); m_lastEpisode = m_master->GetEposideCount(); } @@ -1578,9 +1578,9 @@ namespace ndQuadruped_2 { char fileName[1024]; m_modelIsTrained = true; - m_master->GetActor()->CopyFrom(*(*m_bestActor)); + m_master->GetPolicyNetwork()->CopyFrom(*(*m_bestActor)); ndGetWorkingFileName(m_master->GetName().GetStr(), fileName); - m_master->GetActor()->SaveToFile(fileName); + m_master->GetPolicyNetwork()->SaveToFile(fileName); ndExpandTraceMessage("saving to file: %s\n", fileName); ndExpandTraceMessage("training complete\n"); ndUnsigned64 timer = ndGetTimeInMicroseconds() - m_timer; diff --git a/newton-4.00/applications/ndSandbox/demos/ndUnicycle.cpp b/newton-4.00/applications/ndSandbox/demos/ndUnicycle.cpp index 83e8ae44d..86192e503 100644 --- a/newton-4.00/applications/ndSandbox/demos/ndUnicycle.cpp +++ b/newton-4.00/applications/ndSandbox/demos/ndUnicycle.cpp @@ -512,7 +512,7 @@ namespace ndUnicycle hyperParameters.m_discountFactor = ndReal(m_discountFactor); m_master = ndSharedPtr(new ndBrainAgentContinuePolicyGradient_TrainerMaster(hyperParameters)); - m_bestActor = ndSharedPtr< ndBrain>(new ndBrain(*m_master->GetActor())); + m_bestActor = ndSharedPtr< ndBrain>(new ndBrain(*m_master->GetPolicyNetwork())); m_master->SetName(CONTROLLER_NAME); @@ -650,7 +650,7 @@ namespace ndUnicycle if (m_lastEpisode != m_master->GetEposideCount()) { m_maxScore = rewardTrajectory; - m_bestActor->CopyFrom(*m_master->GetActor()); + m_bestActor->CopyFrom(*m_master->GetPolicyNetwork()); ndExpandTraceMessage("best actor episode: %d\treward %f\ttrajectoryFrames: %f\n", m_master->GetEposideCount(), 100.0f * m_master->GetAverageScore() / m_horizon, m_master->GetAverageFrames()); m_lastEpisode = m_master->GetEposideCount(); } @@ -671,9 +671,9 @@ namespace ndUnicycle { char fileName[1024]; m_modelIsTrained = true; - m_master->GetActor()->CopyFrom(*(*m_bestActor)); + m_master->GetPolicyNetwork()->CopyFrom(*(*m_bestActor)); ndGetWorkingFileName(m_master->GetName().GetStr(), fileName); - m_master->GetActor()->SaveToFile(fileName); + m_master->GetPolicyNetwork()->SaveToFile(fileName); ndExpandTraceMessage("saving to file: %s\n", fileName); ndExpandTraceMessage("training complete\n"); ndUnsigned64 timer = ndGetTimeInMicroseconds() - m_timer; diff --git a/newton-4.00/applications/ndSandbox/main.cpp b/newton-4.00/applications/ndSandbox/main.cpp index abcf0b423..9f84176e9 100644 --- a/newton-4.00/applications/ndSandbox/main.cpp +++ b/newton-4.00/applications/ndSandbox/main.cpp @@ -27,6 +27,24 @@ int main(int, char**) // ndTrace(("%g\n", x)); //} + //ndArray xxxx; + //for (int y = 0; y < 256; y++) + //{ + // for (int x = 0; x < 256; x++) + // { + // ndVector p(ndFloat32(x), ndFloat32(y), 0.0f, 0.0f); + // xxxx.PushBack(p); + // xxxx.PushBack(p); + // xxxx.PushBack(p); + // xxxx.PushBack(p); + // xxxx.PushBack(p); + // xxxx.PushBack(p); + // } + //} + //ndArray index; + //index.SetCount(xxxx.GetCount()); + //ndInt32 vertexCount = ndVertexListToIndexList(&xxxx[0].m_x, sizeof(ndVector), 3, ndInt32(xxxx.GetCount()), &index[0], ndFloat32(1.0e-6f)); + ndDemoEntityManager demos; demos.Run(); return 0; diff --git a/newton-4.00/applications/ndSandbox/ndDemoEntityManager.cpp b/newton-4.00/applications/ndSandbox/ndDemoEntityManager.cpp index ba5e6128d..073959763 100644 --- a/newton-4.00/applications/ndSandbox/ndDemoEntityManager.cpp +++ b/newton-4.00/applications/ndSandbox/ndDemoEntityManager.cpp @@ -46,7 +46,7 @@ //#define DEFAULT_SCENE 6 // basic Trigger //#define DEFAULT_SCENE 7 // object Placement //#define DEFAULT_SCENE 8 // particle fluid -//#define DEFAULT_SCENE 9 // static mesh collision +#define DEFAULT_SCENE 9 // static mesh collision //#define DEFAULT_SCENE 10 // static user mesh collision //#define DEFAULT_SCENE 11 // basic joints //#define DEFAULT_SCENE 12 // basic vehicle @@ -58,7 +58,7 @@ //#define DEFAULT_SCENE 18 // cart pole continue controller //#define DEFAULT_SCENE 19 // unit cycle controller //#define DEFAULT_SCENE 20 // simple industrial robot -#define DEFAULT_SCENE 21 // advanced industrial robot +//#define DEFAULT_SCENE 21 // advanced industrial robot //#define DEFAULT_SCENE 22 // quadruped test 1 //#define DEFAULT_SCENE 23 // quadruped test 2 //#define DEFAULT_SCENE 24 // quadruped test 3 diff --git a/newton-4.00/sdk/dBrain/ndBrainAgentContinuePolicyGradient.cpp b/newton-4.00/sdk/dBrain/ndBrainAgentContinuePolicyGradient.cpp index fa564bad4..95eafe369 100644 --- a/newton-4.00/sdk/dBrain/ndBrainAgentContinuePolicyGradient.cpp +++ b/newton-4.00/sdk/dBrain/ndBrainAgentContinuePolicyGradient.cpp @@ -26,13 +26,13 @@ ndBrainAgentContinuePolicyGradient::ndBrainAgentContinuePolicyGradient(const ndSharedPtr& actor) :ndBrainAgent() - ,m_actor(actor) + ,m_policy(actor) { } ndBrainAgentContinuePolicyGradient::ndBrainAgentContinuePolicyGradient(const ndBrainAgentContinuePolicyGradient& src) :ndBrainAgent(src) - ,m_actor(src.m_actor) + ,m_policy(src.m_policy) { } @@ -85,16 +85,16 @@ void ndBrainAgentContinuePolicyGradient::OptimizeStep() void ndBrainAgentContinuePolicyGradient::Step() { - ndInt32 bufferSize = m_actor->CalculateWorkingBufferSize(); + ndInt32 bufferSize = m_policy->CalculateWorkingBufferSize(); ndBrainFloat* const bufferMem = ndAlloca(ndBrainFloat, bufferSize); - ndBrainFloat* const actionBuffer = ndAlloca(ndBrainFloat, m_actor->GetOutputSize()); - ndBrainFloat* const observationBuffer = ndAlloca(ndBrainFloat, m_actor->GetInputSize()); + ndBrainFloat* const actionBuffer = ndAlloca(ndBrainFloat, m_policy->GetOutputSize()); + ndBrainFloat* const observationBuffer = ndAlloca(ndBrainFloat, m_policy->GetInputSize()); ndBrainMemVector workingBuffer(bufferMem, bufferSize); - ndBrainMemVector actions(actionBuffer, m_actor->GetOutputSize()); - ndBrainMemVector observations(observationBuffer, m_actor->GetInputSize()); + ndBrainMemVector actions(actionBuffer, m_policy->GetOutputSize()); + ndBrainMemVector observations(observationBuffer, m_policy->GetInputSize()); GetObservation(observationBuffer); - m_actor->MakePrediction(observations, actions, workingBuffer); + m_policy->MakePrediction(observations, actions, workingBuffer); ApplyActions(&actions[0]); } diff --git a/newton-4.00/sdk/dBrain/ndBrainAgentContinuePolicyGradient.h b/newton-4.00/sdk/dBrain/ndBrainAgentContinuePolicyGradient.h index da06963a5..0842e8e1e 100644 --- a/newton-4.00/sdk/dBrain/ndBrainAgentContinuePolicyGradient.h +++ b/newton-4.00/sdk/dBrain/ndBrainAgentContinuePolicyGradient.h @@ -45,7 +45,7 @@ class ndBrainAgentContinuePolicyGradient: public ndBrainAgent void Save(ndBrainSave* const loadSave); void InitWeights(); - ndSharedPtr m_actor; + ndSharedPtr m_policy; }; #endif diff --git a/newton-4.00/sdk/dBrain/ndBrainAgentContinuePolicyGradient_Trainer.cpp b/newton-4.00/sdk/dBrain/ndBrainAgentContinuePolicyGradient_Trainer.cpp index 90e2685ac..87db08450 100644 --- a/newton-4.00/sdk/dBrain/ndBrainAgentContinuePolicyGradient_Trainer.cpp +++ b/newton-4.00/sdk/dBrain/ndBrainAgentContinuePolicyGradient_Trainer.cpp @@ -258,7 +258,7 @@ ndBrainAgentContinuePolicyGradient_Trainer::~ndBrainAgentContinuePolicyGradient_ ndBrain* ndBrainAgentContinuePolicyGradient_Trainer::GetActor() { - return m_master->GetActor(); + return m_master->GetPolicyNetwork(); } bool ndBrainAgentContinuePolicyGradient_Trainer::IsTerminal() const @@ -289,7 +289,7 @@ void ndBrainAgentContinuePolicyGradient_Trainer::Step() ndBrainMemVector observation(m_trajectory.GetObservations(entryIndex), m_master->m_numberOfObservations); GetObservation(&observation[0]); - m_master->m_actor.MakePrediction(observation, actions, m_workingBuffer); + m_master->m_policy.MakePrediction(observation, actions, m_workingBuffer); SelectAction(actions); ApplyActions(&actions[0]); @@ -337,8 +337,8 @@ void ndBrainAgentContinuePolicyGradient_Trainer::SaveTrajectory() // *************************************************************************************** ndBrainAgentContinuePolicyGradient_TrainerMaster::ndBrainAgentContinuePolicyGradient_TrainerMaster(const HyperParameters& hyperParameters) :ndBrainThreadPool() - ,m_actor() - ,m_baseLineValue() + ,m_policy() + ,m_value() ,m_optimizer(nullptr) ,m_trainers() ,m_weightedTrainer() @@ -408,20 +408,20 @@ ndBrainAgentContinuePolicyGradient_TrainerMaster::ndBrainAgentContinuePolicyGrad for (ndInt32 i = 0; i < layers.GetCount(); ++i) { - m_actor.AddLayer(layers[i]); + m_policy.AddLayer(layers[i]); } - m_actor.InitWeights(); - ndAssert(!strcmp((m_actor[m_actor.GetCount() - 1])->GetLabelId(), "ndBrainLayerActivationTanh")); + m_policy.InitWeights(); + ndAssert(!strcmp((m_policy[m_policy.GetCount() - 1])->GetLabelId(), "ndBrainLayerActivationTanh")); m_trainers.SetCount(0); m_auxiliaryTrainers.SetCount(0); for (ndInt32 i = 0; i < m_bashBufferSize; ++i) { - ndBrainTrainer* const trainer = new ndBrainTrainer(&m_actor); + ndBrainTrainer* const trainer = new ndBrainTrainer(&m_policy); m_trainers.PushBack(trainer); - ndBrainTrainer* const auxiliaryTrainer = new ndBrainTrainer(&m_actor); + ndBrainTrainer* const auxiliaryTrainer = new ndBrainTrainer(&m_policy); m_auxiliaryTrainers.PushBack(auxiliaryTrainer); } @@ -442,28 +442,28 @@ ndBrainAgentContinuePolicyGradient_TrainerMaster::ndBrainAgentContinuePolicyGrad layers.PushBack(new ndBrainLayerLinear(layers[layers.GetCount() - 1]->GetOutputSize(), 1)); for (ndInt32 i = 0; i < layers.GetCount(); ++i) { - m_baseLineValue.AddLayer(layers[i]); + m_value.AddLayer(layers[i]); } - m_baseLineValue.InitWeights(); + m_value.InitWeights(); - ndAssert(m_baseLineValue.GetOutputSize() == 1); - ndAssert(m_baseLineValue.GetInputSize() == m_actor.GetInputSize()); - ndAssert(!strcmp((m_baseLineValue[m_baseLineValue.GetCount() - 1])->GetLabelId(), "ndBrainLayerLinear")); + ndAssert(m_value.GetOutputSize() == 1); + ndAssert(m_value.GetInputSize() == m_policy.GetInputSize()); + ndAssert(!strcmp((m_value[m_value.GetCount() - 1])->GetLabelId(), "ndBrainLayerLinear")); m_baseLineValueTrainers.SetCount(0); for (ndInt32 i = 0; i < m_bashBufferSize; ++i) { - ndBrainTrainer* const trainer = new ndBrainTrainer(&m_baseLineValue); + ndBrainTrainer* const trainer = new ndBrainTrainer(&m_value); m_baseLineValueTrainers.PushBack(trainer); } m_baseLineValueOptimizer = new ndBrainOptimizerAdam(); m_baseLineValueOptimizer->SetRegularizer(ndBrainFloat(1.0e-4f)); - m_baseValueWorkingBufferSize = m_baseLineValue.CalculateWorkingBufferSize(); + m_baseValueWorkingBufferSize = m_value.CalculateWorkingBufferSize(); m_workingBuffer.SetCount(m_baseValueWorkingBufferSize * hyperParameters.m_threadsCount); - //m_actor.SaveToFile("xxxx1.xxx"); + //m_policy.SaveToFile("xxxx1.xxx"); } ndBrainAgentContinuePolicyGradient_TrainerMaster::~ndBrainAgentContinuePolicyGradient_TrainerMaster() @@ -484,14 +484,14 @@ ndBrainAgentContinuePolicyGradient_TrainerMaster::~ndBrainAgentContinuePolicyGra delete[] m_randomGenerator; } -ndBrain* ndBrainAgentContinuePolicyGradient_TrainerMaster::GetActor() +ndBrain* ndBrainAgentContinuePolicyGradient_TrainerMaster::GetPolicyNetwork() { - return &m_actor; + return &m_policy; } -ndBrain* ndBrainAgentContinuePolicyGradient_TrainerMaster::GetCritic() +ndBrain* ndBrainAgentContinuePolicyGradient_TrainerMaster::GetValueNetwork() { - return &m_baseLineValue; + return &m_value; } const ndString& ndBrainAgentContinuePolicyGradient_TrainerMaster::GetName() const @@ -668,7 +668,7 @@ void ndBrainAgentContinuePolicyGradient_TrainerMaster::OptimizeCritic() for (ndInt32 i = iterator++; i < count; i = iterator++) { const ndBrainMemVector observation(m_trajectoryAccumulator.GetObservations(i), m_numberOfObservations); - m_baseLineValue.MakePrediction(observation, actions, workingBuffer); + m_value.MakePrediction(observation, actions, workingBuffer); ndBrainFloat baseLine = actions[0]; ndBrainFloat reward = m_trajectoryAccumulator.GetReward(i); ndBrainFloat advantage = reward - baseLine; diff --git a/newton-4.00/sdk/dBrain/ndBrainAgentContinuePolicyGradient_Trainer.h b/newton-4.00/sdk/dBrain/ndBrainAgentContinuePolicyGradient_Trainer.h index 40e2ae784..a6a5e7142 100644 --- a/newton-4.00/sdk/dBrain/ndBrainAgentContinuePolicyGradient_Trainer.h +++ b/newton-4.00/sdk/dBrain/ndBrainAgentContinuePolicyGradient_Trainer.h @@ -136,8 +136,8 @@ class ndBrainAgentContinuePolicyGradient_TrainerMaster : public ndBrainThreadPoo ndBrainAgentContinuePolicyGradient_TrainerMaster(const HyperParameters& hyperParameters); virtual ~ndBrainAgentContinuePolicyGradient_TrainerMaster(); - ndBrain* GetActor(); - ndBrain* GetCritic(); + ndBrain* GetValueNetwork(); + ndBrain* GetPolicyNetwork(); const ndString& GetName() const; void SetName(const ndString& name); @@ -158,8 +158,8 @@ class ndBrainAgentContinuePolicyGradient_TrainerMaster : public ndBrainThreadPoo void UpdateBaseLineValue(); ndBrainAgentContinuePolicyGradient_Trainer::ndRandomGenerator* GetRandomGenerator(); - ndBrain m_actor; - ndBrain m_baseLineValue; + ndBrain m_policy; + ndBrain m_value; ndBrainOptimizerAdam* m_optimizer; ndArray m_trainers; ndArray m_weightedTrainer; diff --git a/newton-4.00/sdk/dBrain/ndBrainAgentDDPG.h b/newton-4.00/sdk/dBrain/ndBrainAgentDDPG.h index c2510adc3..25261ef75 100644 --- a/newton-4.00/sdk/dBrain/ndBrainAgentDDPG.h +++ b/newton-4.00/sdk/dBrain/ndBrainAgentDDPG.h @@ -45,13 +45,13 @@ class ndBrainAgentDDPG: public ndBrainAgent void Save(ndBrainSave* const loadSave); void InitWeights(); - ndSharedPtr m_actor; + ndSharedPtr m_policy; }; template ndBrainAgentDDPG::ndBrainAgentDDPG(const ndSharedPtr& actor) :ndBrainAgent() - ,m_actor(actor) + ,m_policy(actor) { } @@ -112,7 +112,7 @@ void ndBrainAgentDDPG::Step() ndBrainFixSizeVector observations; GetObservation(&observations[0]); - m_actor->MakePrediction(observations, actions); + m_policy->MakePrediction(observations, actions); ApplyActions(&actions[0]); } diff --git a/newton-4.00/sdk/dBrain/ndBrainAgentDDPG_Trainer.h b/newton-4.00/sdk/dBrain/ndBrainAgentDDPG_Trainer.h index 579f055e4..cafe3c351 100644 --- a/newton-4.00/sdk/dBrain/ndBrainAgentDDPG_Trainer.h +++ b/newton-4.00/sdk/dBrain/ndBrainAgentDDPG_Trainer.h @@ -112,7 +112,7 @@ class ndBrainAgentDDPG_Trainer: public ndBrainAgent, public ndBrainThreadPool void CalculateQvalue(const ndBrainVector& state, const ndBrainVector& actions); - ndBrain m_actor; + ndBrain m_policy; ndBrain m_critic; ndBrain m_targetActor; ndBrain m_targetCritic; @@ -143,7 +143,7 @@ class ndBrainAgentDDPG_Trainer: public ndBrainAgent, public ndBrainThreadPool template ndBrainAgentDDPG_Trainer::ndBrainAgentDDPG_Trainer(const HyperParameters& hyperParameters) :ndBrainAgent() - ,m_actor() + ,m_policy() ,m_critic() ,m_targetActor() ,m_targetCritic() @@ -181,7 +181,7 @@ ndBrainAgentDDPG_Trainer::ndBrainAgentDDPG_Trainer(const H layers.PushBack(new ndBrainLayerActivationTanh(actionDim)); for (ndInt32 i = 0; i < layers.GetCount(); ++i) { - m_actor.AddLayer(layers[i]); + m_policy.AddLayer(layers[i]); m_targetActor.AddLayer(layers[i]->Clone()); } @@ -203,8 +203,8 @@ ndBrainAgentDDPG_Trainer::ndBrainAgentDDPG_Trainer(const H } ndAssert(m_critic.GetOutputSize() == 1); - ndAssert(m_critic.GetInputSize() == (m_actor.GetInputSize() + m_actor.GetOutputSize())); - ndAssert(!strcmp((m_actor[m_actor.GetCount() - 1])->GetLabelId(), "ndBrainLayerActivationTanh")); + ndAssert(m_critic.GetInputSize() == (m_policy.GetInputSize() + m_policy.GetOutputSize())); + ndAssert(!strcmp((m_policy[m_policy.GetCount() - 1])->GetLabelId(), "ndBrainLayerActivationTanh")); m_actorTrainers.SetCount(0); m_criticTrainers.SetCount(0); @@ -212,7 +212,7 @@ ndBrainAgentDDPG_Trainer::ndBrainAgentDDPG_Trainer(const H for (ndInt32 i = 0; i < m_bashBufferSize; ++i) { - m_actorTrainers.PushBack(new ndBrainTrainer(&m_actor)); + m_actorTrainers.PushBack(new ndBrainTrainer(&m_policy)); m_criticTrainers.PushBack(new ndBrainTrainer(&m_critic)); } @@ -247,10 +247,10 @@ bool ndBrainAgentDDPG_Trainer::IsTrainer() const template void ndBrainAgentDDPG_Trainer::InitWeights() { - m_actor.InitWeights(); + m_policy.InitWeights(); m_critic.InitWeights(); - m_targetActor.CopyFrom(m_actor); + m_targetActor.CopyFrom(m_policy); m_targetCritic.CopyFrom(m_critic); } @@ -436,7 +436,7 @@ void ndBrainAgentDDPG_Trainer::BackPropagateActor(const nd ParallelExecute(PropagateBash); m_actorOptimizer->Update(this, m_actorTrainers, -m_actorLearnRate); - m_targetActor.SoftCopy(m_actor, m_softTargetFactor); + m_targetActor.SoftCopy(m_policy, m_softTargetFactor); } template @@ -455,7 +455,7 @@ void ndBrainAgentDDPG_Trainer::BackPropagate() template void ndBrainAgentDDPG_Trainer::Save(ndBrainSave* const loadSave) { - loadSave->Save(&m_actor); + loadSave->Save(&m_policy); } template @@ -510,7 +510,7 @@ template void ndBrainAgentDDPG_Trainer::Step() { GetObservation(&m_currentTransition.m_observation[0]); - m_actor.MakePrediction(m_currentTransition.m_observation, m_currentTransition.m_action); + m_policy.MakePrediction(m_currentTransition.m_observation, m_currentTransition.m_action); // explore environment SelectAction(&m_currentTransition.m_action[0]); diff --git a/newton-4.00/sdk/dBrain/ndBrainAgentDQN.h b/newton-4.00/sdk/dBrain/ndBrainAgentDQN.h index 17ed3f399..8432f7877 100644 --- a/newton-4.00/sdk/dBrain/ndBrainAgentDQN.h +++ b/newton-4.00/sdk/dBrain/ndBrainAgentDQN.h @@ -49,13 +49,13 @@ class ndBrainAgentDQN: public ndBrainAgent void InitWeights(); ndInt32 SelectBestAction(const ndBrainVector& actions) const; - ndSharedPtr m_actor; + ndSharedPtr m_policy; }; template ndBrainAgentDQN::ndBrainAgentDQN(const ndSharedPtr& actor) :ndBrainAgent() - ,m_actor(actor) + ,m_policy(actor) { } @@ -116,7 +116,7 @@ void ndBrainAgentDQN::Step() ndBrainFixSizeVector observations; GetObservation(&observations[0]); - m_actor->MakePrediction(observations, actions); + m_policy->MakePrediction(observations, actions); ndBrainFloat bestAction = ndBrainFloat(actions.ArgMax()); ApplyActions(&bestAction); diff --git a/newton-4.00/sdk/dBrain/ndBrainAgentDQN_Trainer.h b/newton-4.00/sdk/dBrain/ndBrainAgentDQN_Trainer.h index 781ee24e6..ffb6bc684 100644 --- a/newton-4.00/sdk/dBrain/ndBrainAgentDQN_Trainer.h +++ b/newton-4.00/sdk/dBrain/ndBrainAgentDQN_Trainer.h @@ -102,7 +102,7 @@ class ndBrainAgentDQN_Trainer: public ndBrainAgent, public ndBrainThreadPool void AddExploration(ndBrainFloat* const actions); protected: - ndBrain m_actor; + ndBrain m_policy; ndBrain m_target; ndBrainOptimizerAdam* m_optimizer; ndArray m_trainers; @@ -129,7 +129,7 @@ template ndBrainAgentDQN_Trainer::ndBrainAgentDQN_Trainer(const HyperParameters& hyperParameters) :ndBrainAgent() ,ndBrainThreadPool() - ,m_actor() + ,m_policy() ,m_target() ,m_optimizer(nullptr) ,m_replayBuffer() @@ -161,7 +161,7 @@ ndBrainAgentDQN_Trainer::ndBrainAgentDQN_Trainer(const Hyp for (ndInt32 i = 0; i < layers.GetCount(); ++i) { - m_actor.AddLayer(layers[i]); + m_policy.AddLayer(layers[i]); m_target.AddLayer(layers[i]->Clone()); } @@ -169,7 +169,7 @@ ndBrainAgentDQN_Trainer::ndBrainAgentDQN_Trainer(const Hyp SetThreadCount(hyperParameters.m_threadsCount); for (ndInt32 i = 0; i < m_bashBufferSize; ++i) { - ndBrainTrainer* const trainer = new ndBrainTrainer(&m_actor); + ndBrainTrainer* const trainer = new ndBrainTrainer(&m_policy); m_trainers.PushBack(trainer); } @@ -200,8 +200,8 @@ bool ndBrainAgentDQN_Trainer::IsTrainer() const template void ndBrainAgentDQN_Trainer::InitWeights() { - m_actor.InitWeights(); - m_target.CopyFrom(m_actor); + m_policy.InitWeights(); + m_target.CopyFrom(m_policy); } template @@ -305,14 +305,14 @@ void ndBrainAgentDQN_Trainer::BackPropagate() if ((m_frameCount % m_targetUpdatePeriod) == (m_targetUpdatePeriod - 1)) { // update on line network - m_target.CopyFrom(m_actor); + m_target.CopyFrom(m_policy); } } template void ndBrainAgentDQN_Trainer::Save(ndBrainSave* const loadSave) { - loadSave->Save(&m_actor); + loadSave->Save(&m_policy); } template @@ -411,7 +411,7 @@ void ndBrainAgentDQN_Trainer::Step() ndBrainFixSizeVector actions; GetObservation(&m_currentTransition.m_observation[0]); - m_actor.MakePrediction(m_currentTransition.m_observation, actions); + m_policy.MakePrediction(m_currentTransition.m_observation, actions); AddExploration(&actions[0]); ndBrainFloat bestAction = ndBrainFloat(m_currentTransition.m_action[0]); diff --git a/newton-4.00/sdk/dBrain/ndBrainAgentDiscretePolicyGradient.cpp b/newton-4.00/sdk/dBrain/ndBrainAgentDiscretePolicyGradient.cpp index 63926b483..742512fb1 100644 --- a/newton-4.00/sdk/dBrain/ndBrainAgentDiscretePolicyGradient.cpp +++ b/newton-4.00/sdk/dBrain/ndBrainAgentDiscretePolicyGradient.cpp @@ -25,7 +25,7 @@ ndBrainAgentDiscretePolicyGradient::ndBrainAgentDiscretePolicyGradient(const ndSharedPtr& actor) :ndBrainAgent() - ,m_actor(actor) + ,m_policy(actor) { } @@ -79,16 +79,16 @@ void ndBrainAgentDiscretePolicyGradient::OptimizeStep() void ndBrainAgentDiscretePolicyGradient::Step() { - ndInt32 bufferSize = m_actor->CalculateWorkingBufferSize(); + ndInt32 bufferSize = m_policy->CalculateWorkingBufferSize(); ndBrainFloat* const bufferMem = ndAlloca(ndBrainFloat, bufferSize); - ndBrainFloat* const actionBuffer = ndAlloca(ndBrainFloat, m_actor->GetOutputSize()); - ndBrainFloat* const observationBuffer = ndAlloca(ndBrainFloat, m_actor->GetInputSize()); + ndBrainFloat* const actionBuffer = ndAlloca(ndBrainFloat, m_policy->GetOutputSize()); + ndBrainFloat* const observationBuffer = ndAlloca(ndBrainFloat, m_policy->GetInputSize()); ndBrainMemVector workingBuffer(bufferMem, bufferSize); - ndBrainMemVector actions(actionBuffer, m_actor->GetOutputSize()); - ndBrainMemVector observations(observationBuffer, m_actor->GetInputSize()); + ndBrainMemVector actions(actionBuffer, m_policy->GetOutputSize()); + ndBrainMemVector observations(observationBuffer, m_policy->GetInputSize()); GetObservation(observationBuffer); - m_actor->MakePrediction(observations, actions, workingBuffer); + m_policy->MakePrediction(observations, actions, workingBuffer); ndBrainFloat bestAction = ndBrainFloat(actions.ArgMax()); ApplyActions(&bestAction); diff --git a/newton-4.00/sdk/dBrain/ndBrainAgentDiscretePolicyGradient.h b/newton-4.00/sdk/dBrain/ndBrainAgentDiscretePolicyGradient.h index 5da57a10b..d3db3460f 100644 --- a/newton-4.00/sdk/dBrain/ndBrainAgentDiscretePolicyGradient.h +++ b/newton-4.00/sdk/dBrain/ndBrainAgentDiscretePolicyGradient.h @@ -46,7 +46,7 @@ class ndBrainAgentDiscretePolicyGradient: public ndBrainAgent void InitWeights(); ndInt32 GetEpisodeFrames() const; - ndSharedPtr m_actor; + ndSharedPtr m_policy; }; #endif diff --git a/newton-4.00/sdk/dBrain/ndBrainAgentDiscretePolicyGradient_Trainer.cpp b/newton-4.00/sdk/dBrain/ndBrainAgentDiscretePolicyGradient_Trainer.cpp index f7f588cea..42e1a7f7c 100644 --- a/newton-4.00/sdk/dBrain/ndBrainAgentDiscretePolicyGradient_Trainer.cpp +++ b/newton-4.00/sdk/dBrain/ndBrainAgentDiscretePolicyGradient_Trainer.cpp @@ -186,7 +186,7 @@ ndBrainAgentDiscretePolicyGradient_Trainer::~ndBrainAgentDiscretePolicyGradient_ ndBrain* ndBrainAgentDiscretePolicyGradient_Trainer::GetActor() { - return m_master->GetActor(); + return m_master->GetPolicyNetwork(); } bool ndBrainAgentDiscretePolicyGradient_Trainer::IsTerminal() const @@ -236,7 +236,7 @@ void ndBrainAgentDiscretePolicyGradient_Trainer::Step() ndBrainMemVector observation(m_trajectory.GetObservations(entryIndex), m_master->m_numberOfObservations); GetObservation(&observation[0]); - m_master->m_actor.MakePrediction(observation, probability, m_workingBuffer); + m_master->m_policy.MakePrediction(observation, probability, m_workingBuffer); ndBrainFloat action = ndBrainFloat(SelectAction(probability)); m_trajectory.SetAction(entryIndex, action); @@ -287,8 +287,8 @@ void ndBrainAgentDiscretePolicyGradient_Trainer::SaveTrajectory() // *************************************************************************************** ndBrainAgentDiscretePolicyGradient_TrainerMaster::ndBrainAgentDiscretePolicyGradient_TrainerMaster(const HyperParameters& hyperParameters) :ndBrainThreadPool() - ,m_actor() - ,m_baseLineValue() + ,m_policy() + ,m_value() ,m_optimizer(nullptr) ,m_trainers() ,m_weightedTrainer() @@ -353,20 +353,20 @@ ndBrainAgentDiscretePolicyGradient_TrainerMaster::ndBrainAgentDiscretePolicyGrad layers.PushBack(new ndBrainLayerActivationSoftmax(m_numberOfActions)); for (ndInt32 i = 0; i < layers.GetCount(); ++i) { - m_actor.AddLayer(layers[i]); + m_policy.AddLayer(layers[i]); } - m_actor.InitWeights(); - ndAssert(!strcmp((m_actor[m_actor.GetCount() - 1])->GetLabelId(), "ndBrainLayerActivationSoftmax")); + m_policy.InitWeights(); + ndAssert(!strcmp((m_policy[m_policy.GetCount() - 1])->GetLabelId(), "ndBrainLayerActivationSoftmax")); m_trainers.SetCount(0); m_auxiliaryTrainers.SetCount(0); for (ndInt32 i = 0; i < m_bashBufferSize; ++i) { - ndBrainTrainer* const trainer = new ndBrainTrainer(&m_actor); + ndBrainTrainer* const trainer = new ndBrainTrainer(&m_policy); m_trainers.PushBack(trainer); - ndBrainTrainer* const auxiliaryTrainer = new ndBrainTrainer(&m_actor); + ndBrainTrainer* const auxiliaryTrainer = new ndBrainTrainer(&m_policy); m_auxiliaryTrainers.PushBack(auxiliaryTrainer); } @@ -387,25 +387,25 @@ ndBrainAgentDiscretePolicyGradient_TrainerMaster::ndBrainAgentDiscretePolicyGrad layers.PushBack(new ndBrainLayerLinear(layers[layers.GetCount() - 1]->GetOutputSize(), 1)); for (ndInt32 i = 0; i < layers.GetCount(); ++i) { - m_baseLineValue.AddLayer(layers[i]); + m_value.AddLayer(layers[i]); } - m_baseLineValue.InitWeights(); + m_value.InitWeights(); - ndAssert(m_baseLineValue.GetOutputSize() == 1); - ndAssert(m_baseLineValue.GetInputSize() == m_actor.GetInputSize()); - ndAssert(!strcmp((m_baseLineValue[m_baseLineValue.GetCount() - 1])->GetLabelId(), "ndBrainLayerLinear")); + ndAssert(m_value.GetOutputSize() == 1); + ndAssert(m_value.GetInputSize() == m_policy.GetInputSize()); + ndAssert(!strcmp((m_value[m_value.GetCount() - 1])->GetLabelId(), "ndBrainLayerLinear")); m_baseLineValueTrainers.SetCount(0); for (ndInt32 i = 0; i < m_bashBufferSize; ++i) { - ndBrainTrainer* const trainer = new ndBrainTrainer(&m_baseLineValue); + ndBrainTrainer* const trainer = new ndBrainTrainer(&m_value); m_baseLineValueTrainers.PushBack(trainer); } m_baseLineValueOptimizer = new ndBrainOptimizerAdam(); m_baseLineValueOptimizer->SetRegularizer(ndBrainFloat(1.0e-4f)); - m_baseValueWorkingBufferSize = m_baseLineValue.CalculateWorkingBufferSize(); + m_baseValueWorkingBufferSize = m_value.CalculateWorkingBufferSize(); m_workingBuffer.SetCount(m_baseValueWorkingBufferSize * hyperParameters.m_threadsCount); } @@ -427,14 +427,14 @@ ndBrainAgentDiscretePolicyGradient_TrainerMaster::~ndBrainAgentDiscretePolicyGra delete[] m_randomGenerator; } -ndBrain* ndBrainAgentDiscretePolicyGradient_TrainerMaster::GetActor() +ndBrain* ndBrainAgentDiscretePolicyGradient_TrainerMaster::GetPolicyNetwork() { - return &m_actor; + return &m_policy; } -ndBrain* ndBrainAgentDiscretePolicyGradient_TrainerMaster::GetCritic() +ndBrain* ndBrainAgentDiscretePolicyGradient_TrainerMaster::GetValueNetwork() { - return &m_baseLineValue; + return &m_value; } const ndString& ndBrainAgentDiscretePolicyGradient_TrainerMaster::GetName() const @@ -586,7 +586,7 @@ void ndBrainAgentDiscretePolicyGradient_TrainerMaster::OptimizeCritic() for (ndInt32 i = iterator++; i < count; i = iterator++) { const ndBrainMemVector observation(m_trajectoryAccumulator.GetObservations(i), m_numberOfObservations); - m_baseLineValue.MakePrediction(observation, actions, workingBuffer); + m_value.MakePrediction(observation, actions, workingBuffer); ndBrainFloat baseLine = actions[0]; ndBrainFloat reward = m_trajectoryAccumulator.GetReward(i); ndBrainFloat advantage = reward - baseLine; diff --git a/newton-4.00/sdk/dBrain/ndBrainAgentDiscretePolicyGradient_Trainer.h b/newton-4.00/sdk/dBrain/ndBrainAgentDiscretePolicyGradient_Trainer.h index 7f1ecd6f4..5e9bfbffd 100644 --- a/newton-4.00/sdk/dBrain/ndBrainAgentDiscretePolicyGradient_Trainer.h +++ b/newton-4.00/sdk/dBrain/ndBrainAgentDiscretePolicyGradient_Trainer.h @@ -135,8 +135,8 @@ class ndBrainAgentDiscretePolicyGradient_TrainerMaster : public ndBrainThreadPoo ndBrainAgentDiscretePolicyGradient_TrainerMaster(const HyperParameters& hyperParameters); virtual ~ndBrainAgentDiscretePolicyGradient_TrainerMaster(); - ndBrain* GetActor(); - ndBrain* GetCritic(); + ndBrain* GetValueNetwork(); + ndBrain* GetPolicyNetwork(); const ndString& GetName() const; void SetName(const ndString& name); @@ -157,8 +157,8 @@ class ndBrainAgentDiscretePolicyGradient_TrainerMaster : public ndBrainThreadPoo void UpdateBaseLineValue(); ndBrainAgentDiscretePolicyGradient_Trainer::ndRandomGenerator* GetRandomGenerator(); - ndBrain m_actor; - ndBrain m_baseLineValue; + ndBrain m_policy; + ndBrain m_value; ndBrainOptimizerAdam* m_optimizer; ndArray m_trainers; ndArray m_weightedTrainer;