Skip to content

Commit

Permalink
renamed neural net agents to policy and value networks.
Browse files Browse the repository at this point in the history
  • Loading branch information
JulioJerez committed Sep 27, 2024
1 parent 9c88017 commit acc5c8d
Show file tree
Hide file tree
Showing 22 changed files with 155 additions and 137 deletions.
Binary file modified newton-4.00/applications/media/marineRocks1.fbx
Binary file not shown.
Binary file modified newton-4.00/applications/media/marineRocks2.fbx
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ namespace ndAdvancedRobot
}

ndController(const ndController& src)
:ndBrainAgentContinuePolicyGradient(src.m_actor)
:ndBrainAgentContinuePolicyGradient(src.m_policy)
,m_robot(nullptr)
{
}
Expand Down Expand Up @@ -1078,7 +1078,7 @@ namespace ndAdvancedRobot
hyperParameters.m_numberOfObservations = ND_AGENT_INPUT_SIZE;

m_master = ndSharedPtr<ndBrainAgentContinuePolicyGradient_TrainerMaster>(new ndBrainAgentContinuePolicyGradient_TrainerMaster(hyperParameters));
m_bestActor = ndSharedPtr<ndBrain>(new ndBrain(*m_master->GetActor()));
m_bestActor = ndSharedPtr<ndBrain>(new ndBrain(*m_master->GetPolicyNetwork()));

snprintf(name, sizeof(name), "%s.dnn", CONTROLLER_NAME);
m_master->SetName(name);
Expand All @@ -1088,12 +1088,12 @@ namespace ndAdvancedRobot
snprintf(name, sizeof(name), "%s_critic.dnn", CONTROLLER_NAME);
ndGetWorkingFileName(name, fileName);
ndSharedPtr<ndBrain> critic(ndBrainLoad::Load(fileName));
m_master->GetCritic()->CopyFrom(**critic);
m_master->GetValueNetwork()->CopyFrom(**critic);

snprintf(name, sizeof(name), "%s_actor.dnn", CONTROLLER_NAME);
ndGetWorkingFileName(name, fileName);
ndSharedPtr<ndBrain> actor(ndBrainLoad::Load(fileName));
m_master->GetActor()->CopyFrom(**actor);
m_master->GetPolicyNetwork()->CopyFrom(**actor);
#endif

auto SpawnModel = [this, scene, &visualMesh, floor](const ndMatrix& matrix)
Expand Down Expand Up @@ -1215,7 +1215,7 @@ namespace ndAdvancedRobot
if (m_lastEpisode != m_master->GetEposideCount())
{
m_maxScore = rewardTrajectory;
m_bestActor->CopyFrom(*m_master->GetActor());
m_bestActor->CopyFrom(*m_master->GetPolicyNetwork());
ndExpandTraceMessage("best actor episode: %u\treward %f\ttrajectoryFrames: %f\n", m_master->GetEposideCount(), 100.0f * m_master->GetAverageScore() / m_horizon, m_master->GetAverageFrames());
m_lastEpisode = m_master->GetEposideCount();
}
Expand All @@ -1237,13 +1237,13 @@ namespace ndAdvancedRobot
m_saveScore = ndFloor(rewardTrajectory) + 2.0f;

// save partial controller in case of crash
ndBrain* const actor = m_master->GetActor();
ndBrain* const actor = m_master->GetPolicyNetwork();
char name[256];
snprintf(name, sizeof(name), "%s_actor.dnn", CONTROLLER_NAME);
ndGetWorkingFileName(name, fileName);
actor->SaveToFile(fileName);

ndBrain* const critic = m_master->GetCritic();
ndBrain* const critic = m_master->GetValueNetwork();
snprintf(name, sizeof(name), "%s_critic.dnn", CONTROLLER_NAME);
ndGetWorkingFileName(name, fileName);
critic->SaveToFile(fileName);
Expand All @@ -1254,9 +1254,9 @@ namespace ndAdvancedRobot
{
char fileName[1024];
m_modelIsTrained = true;
m_master->GetActor()->CopyFrom(*(*m_bestActor));
m_master->GetPolicyNetwork()->CopyFrom(*(*m_bestActor));
ndGetWorkingFileName(m_master->GetName().GetStr(), fileName);
m_master->GetActor()->SaveToFile(fileName);
m_master->GetPolicyNetwork()->SaveToFile(fileName);
ndExpandTraceMessage("saving to file: %s\n", fileName);
ndExpandTraceMessage("training complete\n");
ndUnsigned64 timer = ndGetTimeInMicroseconds() - m_timer;
Expand Down
18 changes: 9 additions & 9 deletions newton-4.00/applications/ndSandbox/demos/ndCartpoleContinue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ namespace ndCarpole_1
}

ndController(const ndController& src)
:ndBrainAgentContinuePolicyGradient(src.m_actor)
:ndBrainAgentContinuePolicyGradient(src.m_policy)
,m_robot(nullptr)
{
}
Expand Down Expand Up @@ -330,7 +330,7 @@ namespace ndCarpole_1
hyperParameters.m_discountFactor = ndReal(m_discountFactor);

m_master = ndSharedPtr<ndBrainAgentContinuePolicyGradient_TrainerMaster>(new ndBrainAgentContinuePolicyGradient_TrainerMaster(hyperParameters));
m_bestActor = ndSharedPtr< ndBrain>(new ndBrain(*m_master->GetActor()));
m_bestActor = ndSharedPtr< ndBrain>(new ndBrain(*m_master->GetPolicyNetwork()));

snprintf(name, sizeof(name), "%s.dnn", CONTROLLER_NAME);
m_master->SetName(name);
Expand All @@ -340,12 +340,12 @@ namespace ndCarpole_1
snprintf(name, sizeof(name), "%s_critic.dnn", CONTROLLER_NAME);
ndGetWorkingFileName(name, fileName);
ndSharedPtr<ndBrain> critic(ndBrainLoad::Load(fileName));
m_master->GetCritic()->CopyFrom(**critic);
m_master->GetValueNetwork()->CopyFrom(**critic);

snprintf(name, sizeof(name), "%s_actor.dnn", CONTROLLER_NAME);
ndGetWorkingFileName(name, fileName);
ndSharedPtr<ndBrain> actor(ndBrainLoad::Load(fileName));
m_master->GetActor()->CopyFrom(**actor);
m_master->GetPolicyNetwork()->CopyFrom(**actor);
#endif

ndWorld* const world = scene->GetWorld();
Expand Down Expand Up @@ -471,7 +471,7 @@ namespace ndCarpole_1
if (m_lastEpisode != m_master->GetEposideCount())
{
m_maxScore = rewardTrajectory;
m_bestActor->CopyFrom(*m_master->GetActor());
m_bestActor->CopyFrom(*m_master->GetPolicyNetwork());
ndExpandTraceMessage("best actor episode: %d\treward %f\ttrajectoryFrames: %f\n", m_master->GetEposideCount(), 100.0f * m_master->GetAverageScore() / m_horizon, m_master->GetAverageFrames());
m_lastEpisode = m_master->GetEposideCount();
}
Expand All @@ -483,13 +483,13 @@ namespace ndCarpole_1
m_saveScore = ndFloor(rewardTrajectory) + 2.0f;

// save partial controller in case of crash
ndBrain* const actor = m_master->GetActor();
ndBrain* const actor = m_master->GetPolicyNetwork();
char name[256];
snprintf(name, sizeof(name), "%s_actor.dnn", CONTROLLER_NAME);
ndGetWorkingFileName(name, fileName);
actor->SaveToFile(fileName);

ndBrain* const critic = m_master->GetCritic();
ndBrain* const critic = m_master->GetValueNetwork();
snprintf(name, sizeof(name), "%s_critic.dnn", CONTROLLER_NAME);
ndGetWorkingFileName(name, fileName);
critic->SaveToFile(fileName);
Expand All @@ -510,9 +510,9 @@ namespace ndCarpole_1
{
char fileName[1024];
m_modelIsTrained = true;
m_master->GetActor()->CopyFrom(*(*m_bestActor));
m_master->GetPolicyNetwork()->CopyFrom(*(*m_bestActor));
ndGetWorkingFileName(m_master->GetName().GetStr(), fileName);
m_master->GetActor()->SaveToFile(fileName);
m_master->GetPolicyNetwork()->SaveToFile(fileName);
ndExpandTraceMessage("saving to file: %s\n", fileName);
ndExpandTraceMessage("training complete\n");
ndUnsigned64 timer = ndGetTimeInMicroseconds() - m_timer;
Expand Down
18 changes: 9 additions & 9 deletions newton-4.00/applications/ndSandbox/demos/ndCartpoleDiscrete.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ namespace ndCarpole_0
}

ndController(const ndController& src)
:ndBrainAgentDiscretePolicyGradient(src.m_actor)
:ndBrainAgentDiscretePolicyGradient(src.m_policy)
,m_robot(nullptr)
{
}
Expand Down Expand Up @@ -369,7 +369,7 @@ namespace ndCarpole_0
hyperParameters.m_discountFactor = ndReal(m_discountFactor);

m_master = ndSharedPtr<ndBrainAgentDiscretePolicyGradient_TrainerMaster>(new ndBrainAgentDiscretePolicyGradient_TrainerMaster(hyperParameters));
m_bestActor = ndSharedPtr< ndBrain>(new ndBrain(*m_master->GetActor()));
m_bestActor = ndSharedPtr< ndBrain>(new ndBrain(*m_master->GetPolicyNetwork()));

snprintf(name, sizeof(name), "%s.dnn", CONTROLLER_NAME);
m_master->SetName(name);
Expand All @@ -379,12 +379,12 @@ namespace ndCarpole_0
snprintf(name, sizeof(name), "%s_critic.dnn", CONTROLLER_NAME);
ndGetWorkingFileName(name, fileName);
ndSharedPtr<ndBrain> critic(ndBrainLoad::Load(fileName));
m_master->GetCritic()->CopyFrom(**critic);
m_master->GetValueNetwork()->CopyFrom(**critic);

snprintf(name, sizeof(name), "%s_actor.dnn", CONTROLLER_NAME);
ndGetWorkingFileName(name, fileName);
ndSharedPtr<ndBrain> actor(ndBrainLoad::Load(fileName));
m_master->GetActor()->CopyFrom(**actor);
m_master->GetPolicyNetwork()->CopyFrom(**actor);
#endif

ndWorld* const world = scene->GetWorld();
Expand Down Expand Up @@ -510,7 +510,7 @@ namespace ndCarpole_0
if (m_lastEpisode != m_master->GetEposideCount())
{
m_maxScore = rewardTrajectory;
m_bestActor->CopyFrom(*m_master->GetActor());
m_bestActor->CopyFrom(*m_master->GetPolicyNetwork());
ndExpandTraceMessage("best actor episode: %d\treward %f\ttrajectoryFrames: %f\n", m_master->GetEposideCount(), 100.0f * m_master->GetAverageScore() / m_horizon, m_master->GetAverageFrames());
m_lastEpisode = m_master->GetEposideCount();
}
Expand All @@ -522,13 +522,13 @@ namespace ndCarpole_0
m_saveScore = ndFloor(rewardTrajectory) + 2.0f;

// save partial controller in case of crash
ndBrain* const actor = m_master->GetActor();
ndBrain* const actor = m_master->GetPolicyNetwork();
char name[256];
snprintf(name, sizeof(name), "%s_actor.dnn", CONTROLLER_NAME);
ndGetWorkingFileName(name, fileName);
actor->SaveToFile(fileName);

ndBrain* const critic = m_master->GetCritic();
ndBrain* const critic = m_master->GetValueNetwork();
snprintf(name, sizeof(name), "%s_critic.dnn", CONTROLLER_NAME);
ndGetWorkingFileName(name, fileName);
critic->SaveToFile(fileName);
Expand All @@ -549,9 +549,9 @@ namespace ndCarpole_0
{
char fileName[1024];
m_modelIsTrained = true;
m_master->GetActor()->CopyFrom(*(*m_bestActor));
m_master->GetPolicyNetwork()->CopyFrom(*(*m_bestActor));
ndGetWorkingFileName(m_master->GetName().GetStr(), fileName);
m_master->GetActor()->SaveToFile(fileName);
m_master->GetPolicyNetwork()->SaveToFile(fileName);
ndExpandTraceMessage("saving to file: %s\n", fileName);
ndExpandTraceMessage("training complete\n");
ndUnsigned64 timer = ndGetTimeInMicroseconds() - m_timer;
Expand Down
10 changes: 5 additions & 5 deletions newton-4.00/applications/ndSandbox/demos/ndQuadrupedTest_1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ namespace ndQuadruped_1
}

ndController(const ndController& src)
:ndBrainAgentContinuePolicyGradient(src.m_actor)
:ndBrainAgentContinuePolicyGradient(src.m_policy)
, m_robot(nullptr)
{
}
Expand Down Expand Up @@ -1275,7 +1275,7 @@ namespace ndQuadruped_1
hyperParameters.m_numberOfObservations = ND_AGENT_INPUT_SIZE;

m_master = ndSharedPtr<ndBrainAgentContinuePolicyGradient_TrainerMaster>(new ndBrainAgentContinuePolicyGradient_TrainerMaster(hyperParameters));
m_bestActor = ndSharedPtr<ndBrain>(new ndBrain(*m_master->GetActor()));
m_bestActor = ndSharedPtr<ndBrain>(new ndBrain(*m_master->GetPolicyNetwork()));
m_master->SetName(CONTROLLER_NAME);

auto SpawnModel = [this, scene](const ndMatrix& matrix, bool debug)
Expand Down Expand Up @@ -1412,7 +1412,7 @@ namespace ndQuadruped_1
if (m_lastEpisode != m_master->GetEposideCount())
{
m_maxScore = rewardTrajectory;
m_bestActor->CopyFrom(*m_master->GetActor());
m_bestActor->CopyFrom(*m_master->GetPolicyNetwork());
ndExpandTraceMessage("best actor episode: %d\treward %f\ttrajectoryFrames: %f\n", m_master->GetEposideCount(), 100.0f * m_master->GetAverageScore() / m_horizon, m_master->GetAverageFrames());
m_lastEpisode = m_master->GetEposideCount();
}
Expand All @@ -1433,9 +1433,9 @@ namespace ndQuadruped_1
{
char fileName[1024];
m_modelIsTrained = true;
m_master->GetActor()->CopyFrom(*(*m_bestActor));
m_master->GetPolicyNetwork()->CopyFrom(*(*m_bestActor));
ndGetWorkingFileName(m_master->GetName().GetStr(), fileName);
m_master->GetActor()->SaveToFile(fileName);
m_master->GetPolicyNetwork()->SaveToFile(fileName);
ndExpandTraceMessage("saving to file: %s\n", fileName);
ndExpandTraceMessage("training complete\n");
ndUnsigned64 timer = ndGetTimeInMicroseconds() - m_timer;
Expand Down
10 changes: 5 additions & 5 deletions newton-4.00/applications/ndSandbox/demos/ndQuadrupedTest_2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ namespace ndQuadruped_2
}

ndController(const ndController& src)
:ndBrainAgentContinuePolicyGradient(src.m_actor)
:ndBrainAgentContinuePolicyGradient(src.m_policy)
,m_robot(nullptr)
{
}
Expand Down Expand Up @@ -1418,7 +1418,7 @@ namespace ndQuadruped_2
hyperParameters.m_numberOfObservations = ND_AGENT_INPUT_SIZE;

m_master = ndSharedPtr<ndBrainAgentContinuePolicyGradient_TrainerMaster>(new ndBrainAgentContinuePolicyGradient_TrainerMaster(hyperParameters));
m_bestActor = ndSharedPtr<ndBrain>(new ndBrain(*m_master->GetActor()));
m_bestActor = ndSharedPtr<ndBrain>(new ndBrain(*m_master->GetPolicyNetwork()));
m_master->SetName(CONTROLLER_NAME);

ndModelArticulation* const visualModel = CreateModel(scene, matrix);
Expand Down Expand Up @@ -1557,7 +1557,7 @@ namespace ndQuadruped_2
if (m_lastEpisode != m_master->GetEposideCount())
{
m_maxScore = rewardTrajectory;
m_bestActor->CopyFrom(*m_master->GetActor());
m_bestActor->CopyFrom(*m_master->GetPolicyNetwork());
ndExpandTraceMessage("best actor episode: %d\treward %f\ttrajectoryFrames: %f\n", m_master->GetEposideCount(), 100.0f * m_master->GetAverageScore() / m_horizon, m_master->GetAverageFrames());
m_lastEpisode = m_master->GetEposideCount();
}
Expand All @@ -1578,9 +1578,9 @@ namespace ndQuadruped_2
{
char fileName[1024];
m_modelIsTrained = true;
m_master->GetActor()->CopyFrom(*(*m_bestActor));
m_master->GetPolicyNetwork()->CopyFrom(*(*m_bestActor));
ndGetWorkingFileName(m_master->GetName().GetStr(), fileName);
m_master->GetActor()->SaveToFile(fileName);
m_master->GetPolicyNetwork()->SaveToFile(fileName);
ndExpandTraceMessage("saving to file: %s\n", fileName);
ndExpandTraceMessage("training complete\n");
ndUnsigned64 timer = ndGetTimeInMicroseconds() - m_timer;
Expand Down
8 changes: 4 additions & 4 deletions newton-4.00/applications/ndSandbox/demos/ndUnicycle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -512,7 +512,7 @@ namespace ndUnicycle
hyperParameters.m_discountFactor = ndReal(m_discountFactor);

m_master = ndSharedPtr<ndBrainAgentContinuePolicyGradient_TrainerMaster>(new ndBrainAgentContinuePolicyGradient_TrainerMaster(hyperParameters));
m_bestActor = ndSharedPtr< ndBrain>(new ndBrain(*m_master->GetActor()));
m_bestActor = ndSharedPtr< ndBrain>(new ndBrain(*m_master->GetPolicyNetwork()));

m_master->SetName(CONTROLLER_NAME);

Expand Down Expand Up @@ -650,7 +650,7 @@ namespace ndUnicycle
if (m_lastEpisode != m_master->GetEposideCount())
{
m_maxScore = rewardTrajectory;
m_bestActor->CopyFrom(*m_master->GetActor());
m_bestActor->CopyFrom(*m_master->GetPolicyNetwork());
ndExpandTraceMessage("best actor episode: %d\treward %f\ttrajectoryFrames: %f\n", m_master->GetEposideCount(), 100.0f * m_master->GetAverageScore() / m_horizon, m_master->GetAverageFrames());
m_lastEpisode = m_master->GetEposideCount();
}
Expand All @@ -671,9 +671,9 @@ namespace ndUnicycle
{
char fileName[1024];
m_modelIsTrained = true;
m_master->GetActor()->CopyFrom(*(*m_bestActor));
m_master->GetPolicyNetwork()->CopyFrom(*(*m_bestActor));
ndGetWorkingFileName(m_master->GetName().GetStr(), fileName);
m_master->GetActor()->SaveToFile(fileName);
m_master->GetPolicyNetwork()->SaveToFile(fileName);
ndExpandTraceMessage("saving to file: %s\n", fileName);
ndExpandTraceMessage("training complete\n");
ndUnsigned64 timer = ndGetTimeInMicroseconds() - m_timer;
Expand Down
18 changes: 18 additions & 0 deletions newton-4.00/applications/ndSandbox/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,24 @@ int main(int, char**)
// ndTrace(("%g\n", x));
//}

//ndArray<ndVector> xxxx;
//for (int y = 0; y < 256; y++)
//{
// for (int x = 0; x < 256; x++)
// {
// ndVector p(ndFloat32(x), ndFloat32(y), 0.0f, 0.0f);
// xxxx.PushBack(p);
// xxxx.PushBack(p);
// xxxx.PushBack(p);
// xxxx.PushBack(p);
// xxxx.PushBack(p);
// xxxx.PushBack(p);
// }
//}
//ndArray<ndInt32> index;
//index.SetCount(xxxx.GetCount());
//ndInt32 vertexCount = ndVertexListToIndexList(&xxxx[0].m_x, sizeof(ndVector), 3, ndInt32(xxxx.GetCount()), &index[0], ndFloat32(1.0e-6f));

ndDemoEntityManager demos;
demos.Run();
return 0;
Expand Down
4 changes: 2 additions & 2 deletions newton-4.00/applications/ndSandbox/ndDemoEntityManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
//#define DEFAULT_SCENE 6 // basic Trigger
//#define DEFAULT_SCENE 7 // object Placement
//#define DEFAULT_SCENE 8 // particle fluid
//#define DEFAULT_SCENE 9 // static mesh collision
#define DEFAULT_SCENE 9 // static mesh collision
//#define DEFAULT_SCENE 10 // static user mesh collision
//#define DEFAULT_SCENE 11 // basic joints
//#define DEFAULT_SCENE 12 // basic vehicle
Expand All @@ -58,7 +58,7 @@
//#define DEFAULT_SCENE 18 // cart pole continue controller
//#define DEFAULT_SCENE 19 // unit cycle controller
//#define DEFAULT_SCENE 20 // simple industrial robot
#define DEFAULT_SCENE 21 // advanced industrial robot
//#define DEFAULT_SCENE 21 // advanced industrial robot
//#define DEFAULT_SCENE 22 // quadruped test 1
//#define DEFAULT_SCENE 23 // quadruped test 2
//#define DEFAULT_SCENE 24 // quadruped test 3
Expand Down
16 changes: 8 additions & 8 deletions newton-4.00/sdk/dBrain/ndBrainAgentContinuePolicyGradient.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@

ndBrainAgentContinuePolicyGradient::ndBrainAgentContinuePolicyGradient(const ndSharedPtr<ndBrain>& actor)
:ndBrainAgent()
,m_actor(actor)
,m_policy(actor)
{
}

ndBrainAgentContinuePolicyGradient::ndBrainAgentContinuePolicyGradient(const ndBrainAgentContinuePolicyGradient& src)
:ndBrainAgent(src)
,m_actor(src.m_actor)
,m_policy(src.m_policy)
{
}

Expand Down Expand Up @@ -85,16 +85,16 @@ void ndBrainAgentContinuePolicyGradient::OptimizeStep()

void ndBrainAgentContinuePolicyGradient::Step()
{
ndInt32 bufferSize = m_actor->CalculateWorkingBufferSize();
ndInt32 bufferSize = m_policy->CalculateWorkingBufferSize();
ndBrainFloat* const bufferMem = ndAlloca(ndBrainFloat, bufferSize);
ndBrainFloat* const actionBuffer = ndAlloca(ndBrainFloat, m_actor->GetOutputSize());
ndBrainFloat* const observationBuffer = ndAlloca(ndBrainFloat, m_actor->GetInputSize());
ndBrainFloat* const actionBuffer = ndAlloca(ndBrainFloat, m_policy->GetOutputSize());
ndBrainFloat* const observationBuffer = ndAlloca(ndBrainFloat, m_policy->GetInputSize());

ndBrainMemVector workingBuffer(bufferMem, bufferSize);
ndBrainMemVector actions(actionBuffer, m_actor->GetOutputSize());
ndBrainMemVector observations(observationBuffer, m_actor->GetInputSize());
ndBrainMemVector actions(actionBuffer, m_policy->GetOutputSize());
ndBrainMemVector observations(observationBuffer, m_policy->GetInputSize());

GetObservation(observationBuffer);
m_actor->MakePrediction(observations, actions, workingBuffer);
m_policy->MakePrediction(observations, actions, workingBuffer);
ApplyActions(&actions[0]);
}
Loading

0 comments on commit acc5c8d

Please sign in to comment.