NeuroMod.DAModGain = 0 by default, esp in vspatch; vspatch uses std i…

…nit wts and learns on CaSpkD instead of GeIntNorm -- gets nice differentiated reps and works well. vspatch test case tests probabilistic reward learning; works well.
emer · Apr 1, 2024 · a5a674a · a5a674a
1 parent e7171f5
commit a5a674a
Show file tree

Hide file tree

Showing 11 changed files with 48 additions and 31 deletions.
diff --git a/PVLV.md b/PVLV.md
@@ -38,16 +38,16 @@ Note that we use anatomical labels for computationally-specified functions consi
 
 In contrast to the minus-plus phase-based timing of cortical learning, the RL-based learning in PVLV is generally organized on trial-wise boundaries, with some factors computed online within the trial.  Here is a schematic, for an intermediate about of positive CS learning and VSPatch prediction of a positive US outcome, with an "Eat" action that drives the US:
 
-| Trial Step:  |   0      |   1  |   2  |   3         |
-| ------------ | -------- | ---- | ---- | ----------- |
-| Event / Act  |  CS      |      | Eat  |  US         |
-| SC -> ACh    |  ++      |      |      |             |
-| BLA          |  ++      |      |  Rp  |  R          |
-| BLA dw       | tr=S*ACh |      |      | R(R-Rp)tr   |
-| OFC          |  BLA->   |  PT  |  PT  | reset PT    |
-| VSPatch = VP |          |      |  ++ Rp  |             |
-| VP dw        |          |      |      | Sp Rp DA    |
-| DA           | ++ (BLA) |      |      | ++ (US-VPp) |
+| Trial Step:  |   0       |   1  |   2   |   3         |
+| ------------ | --------- | ---- | ----- | ----------- |
+| Event / Act  |  CS       |      | Eat   |  +++ US     |
+| SC -> ACh    |  +++      |      |       |             |
+| BLA          |  ++       |      |  Rp   |  R          |
+| BLA dw       | tr=S ⋅ ACh |      |       | R(R-Rp)tr   |
+| OFC          |  BLA->    |  PT  |  PT   | reset PT    |
+| VSPatch = VP |           |      | ++ Rp |             |
+| VP dw        |           |      |       | Sp ⋅ Rp ⋅ DA |
+| DA           | ++ (BLA)  |      |       | + (US-VPp)  |
 
 * Rp = receiving activity on previous trial
 * DA at US is computed at start of trial in PVLV.NewState, based on VS D1 - D2 on prev trial.
@@ -190,8 +190,6 @@ The learning rule here is a standard "3 factor" dopamine-modulated learning, ver
 
 where `DAlr` is the dopamine-signed learning rate factor for D1 vs. D2, which is a function of US for the current trial (applied at start of a trial) minus VSPatch _from the prior time step_. Thus the prediction error in VSPatch relative to US reward drives learning, such that it will always adjust to reduce error, consistent with standard Rescorla-Wagner / TD learning rules.
 
-Also, the learning factor for the `Rp` receiving activity on the prior time step is the `GeIntNorm` Max-normalized value, not raw activity, because VSPatch neurons can be relatively inactive at the start (this is done by setting `SpkPrv` to `GeIntNorm` for this layer type only).
-
 # Negative USs and Costs
 
 There are two qualitatively-different types of negative outcome values, which require distinct pathways within the model:

diff --git a/axon/gtigen.go b/axon/gtigen.go
diff --git a/axon/layerparams.go b/axon/layerparams.go
@@ -949,10 +949,6 @@ func (ly *LayerParams) NewStateNeuron(ctx *Context, ni, di uint32, vals *LayerVa
 	SetNrnV(ctx, ni, di, SpkMax, 0)
 	SetNrnV(ctx, ni, di, SpkMaxCa, 0)
 
-	if ly.LayType == VSPatchLayer {
-		SetNrnV(ctx, ni, di, SpkPrv, NrnV(ctx, ni, di, GeIntNorm))
-	}
-
 	ly.Acts.DecayState(ctx, ni, di, ly.Acts.Decay.Act, ly.Acts.Decay.Glong, ly.Acts.Decay.AHP)
 	// Note: synapse-level Ca decay happens in DWt
 	ly.Acts.KNaNewState(ctx, ni, di)

diff --git a/axon/neuromod.go b/axon/neuromod.go
@@ -60,7 +60,7 @@ type NeuroModParams struct {
 	// valence coding of this layer -- may affect specific layer types but does not directly affect neuromodulators currently
 	Valence ValenceTypes
 
-	// multiplicative factor on overall DA modulation specified by DAMod -- resulting overall gain factor is: 1 + DAModGain * DA, where DA is appropriate DA-driven factor
+	// dopamine modulation of excitatory and inhibitory conductances (i.e., "performance dopamine" effect -- this does NOT affect learning dopamine modulation in terms of RLrate): g *= 1 + (DAModGain * DA)
 	DAModGain float32
 
 	// modulate the sign of the learning rate factor according to the DA sign, taking into account the DAMod sign reversal for D2Mod, also using BurstGain and DipGain to modulate DA value -- otherwise, only the magnitude of the learning rate is modulated as a function of raw DA magnitude according to DALRateMod (without additional gain factors)
@@ -86,7 +86,7 @@ type NeuroModParams struct {
 
 func (nm *NeuroModParams) Defaults() {
 	// nm.DAMod is typically set by BuildConfig -- don't reset here
-	nm.DAModGain = 0.5
+	nm.DAModGain = 0
 	nm.DALRateMod = 0
 	nm.AChLRateMod = 0
 	nm.BurstGain = 1

diff --git a/axon/pvlv_prjns.go b/axon/pvlv_prjns.go
@@ -56,8 +56,8 @@ func (pj *PrjnParams) VSPatchDefaults() {
 	pj.SWts.Adapt.On.SetBool(false)
 	pj.SWts.Adapt.SigGain = 1
 	pj.SWts.Init.SPct = 0
-	pj.SWts.Init.Mean = 0.1
-	pj.SWts.Init.Var = 0.05
+	pj.SWts.Init.Mean = 0.5
+	pj.SWts.Init.Var = 0.25
 	pj.SWts.Init.Sym.SetBool(false)
 	pj.Learn.Trace.Tau = 1
 	pj.Learn.Trace.LearnThr = 0 // 0.3

diff --git a/axon/shaders/gpu_newstate_neuron.spv b/axon/shaders/gpu_newstate_neuron.spv
diff --git a/examples/pcore_ds/params.go b/examples/pcore_ds/params.go
@@ -24,10 +24,11 @@ var ParamSets = netparams.Sets{
 			}},
 		{Sel: ".MatrixLayer", Desc: "all mtx",
 			Params: params.Params{
-				"Layer.Inhib.Pool.Gi":            "0.5",  // 0.5 > others
-				"Layer.Learn.NeuroMod.BurstGain": "0.1",  // 0.1 == 0.2 > 0.05 > 0.5 -- key lrate modulator
-				"Layer.Learn.RLRate.On":          "true", // note: applied for tr update trials
-				"Layer.Learn.TrgAvgAct.On":       "true", // true > false
+				"Layer.Inhib.Pool.Gi":             "0.5",  // 0.5 > others
+				"Layer.Learn.NeuroMod.BurstGain":  "0.1",  // 0.1 == 0.2 > 0.05 > 0.5 -- key lrate modulator
+				"Layer.Learn.NeuroMod.DAModGain":  "0.2",  // was 0.5
+				"Layer.Learn.RLRate.On":           "true", // note: applied for tr update trials
+				"Layer.Learn.TrgAvgAct.RescaleOn": "true", // true > false
 			},
 			Hypers: params.Hypers{
 				"Layer.Learn.NeuroMod.BurstGain": {"Tweak": "-"},

diff --git a/examples/pvlv/params.go b/examples/pvlv/params.go
@@ -118,9 +118,9 @@ var ParamSets = netparams.Sets{
 			}},
 		{Sel: ".VSPatchPrjn", Desc: "",
 			Params: params.Params{
-				"Prjn.PrjnScale.Abs":        "6",
-				"Prjn.Learn.Trace.LearnThr": "0.1",
-				"Prjn.Learn.LRate.Base":     "0.2", // 0.05 def -- todo: needs faster
+				"Prjn.PrjnScale.Abs":        "3",
+				"Prjn.Learn.Trace.LearnThr": "0",
+				"Prjn.Learn.LRate.Base":     "0.05", // 0.05 def -- todo: needs faster
 			}},
 		{Sel: "#OFCposUSPTToOFCposUSPT", Desc: "",
 			Params: params.Params{

diff --git a/examples/vspatch/config.go b/examples/vspatch/config.go
@@ -68,7 +68,7 @@ type RunConfig struct {
 	NEpochs int `default:"30"`
 
 	// total number of trials per epoch.  Should be an even multiple of NData.
-	NTrials int `default:"32"`
+	NTrials int `default:"128"`
 }
 
 // LogConfig has config parameters related to logging data

diff --git a/examples/vspatch/params.go b/examples/vspatch/params.go
@@ -17,18 +17,27 @@ var ParamSets = netparams.Sets{
 			Params: params.Params{
 				"Layer.Acts.Clamp.Ge": "1.0", // 1.5 is def, was 0.6 (too low)
 			}},
+		{Sel: "#State", Desc: "",
+			Params: params.Params{
+				"Layer.Inhib.ActAvg.Nominal": "0.2",
+			}},
 		{Sel: ".VSPatchLayer", Desc: "",
 			Params: params.Params{
+				"Layer.Inhib.Pool.On":              "false",
+				"Layer.Inhib.Pool.Gi":              "0.2",
 				"Layer.Learn.NeuroMod.DipGain":     "1",    // boa requires balanced..
 				"Layer.Learn.TrgAvgAct.GiBaseInit": "0",    // 0.5 default; 0 better
 				"Layer.Learn.RLRate.SigmoidMin":    "0.05", // 0.05 def
 				"Layer.Learn.NeuroMod.AChLRateMod": "0",
+				"Layer.Learn.NeuroMod.DAModGain":   "0", // this is actual perf mod
 			}},
 		{Sel: ".VSPatchPrjn", Desc: "",
 			Params: params.Params{
-				"Prjn.PrjnScale.Abs":        "6",
+				"Prjn.PrjnScale.Abs":        "2",
 				"Prjn.Learn.Trace.LearnThr": "0",
 				"Prjn.Learn.LRate.Base":     "0.05", // 0.05 def
+				"Prjn.SWts.Init.Mean":       "0.5",
+				"Prjn.SWts.Init.Var":        "0.25",
 			}},
 	},
 }
diff --git a/examples/vspatch/vspatch_env.go b/examples/vspatch/vspatch_env.go
@@ -30,6 +30,9 @@ type VSPatchEnv struct {
 	// trial counter is for the step within condition
 	Trial env.Ctr `view:"inline"`
 
+	// if true, reward value is a probability of getting a 1 reward
+	Probs bool
+
 	// number of conditions, each of which can have a different reward value
 	NConds int
 
@@ -82,6 +85,7 @@ func (ev *VSPatchEnv) Desc() string {
 }
 
 func (ev *VSPatchEnv) Defaults() {
+	ev.Probs = true
 	ev.NConds = 4
 	ev.NTrials = 3
 	ev.NUnitsY = 5
@@ -176,7 +180,16 @@ func (ev *VSPatchEnv) Step() bool {
 	ev.RenderState(ev.Sequence.Cur, ev.Trial.Cur)
 	ev.Rew = 0
 	if ev.Trial.Cur == ev.NTrials-1 {
-		ev.Rew = ev.CondVals[ev.Sequence.Cur]
+		rv := ev.CondVals[ev.Sequence.Cur]
+		if ev.Probs {
+			if erand.BoolP32(rv, -1, &ev.Rand) {
+				ev.Rew = 1
+			} else {
+				ev.Rew = 0.001
+			}
+		} else {
+			ev.Rew = rv
+		}
 	}
 	ev.Sequence.Same()
 	if ev.Trial.Incr() {