From 03538d749ab7a2a0bc448d0db92da52e275c458f Mon Sep 17 00:00:00 2001 From: Benjamin Redelings Date: Sat, 7 Oct 2023 13:26:10 -0400 Subject: [PATCH] Draft of peeling to a node with a sequence. --- src/substitution/substitution.cc | 149 ++++++++++++++++++++++++++++++- 1 file changed, 148 insertions(+), 1 deletion(-) diff --git a/src/substitution/substitution.cc b/src/substitution/substitution.cc index 7f8c5630b..606940b88 100644 --- a/src/substitution/substitution.cc +++ b/src/substitution/substitution.cc @@ -1233,7 +1233,154 @@ namespace substitution { } object_ptr - peel_branch2(const EVector& LCB, + peel_branch2(const EVector& sequence, + const alphabet& a, + const EVector& smap, + const EVector& LCB, + const EVector& A, + const EVector& transition_P, + const Matrix& F) + { + total_peel_internal_branches++; + + const int n_models = transition_P.size(); + const int n_states = transition_P[0].as_>().size1(); + const int matrix_size = n_models * n_states; + + // get the relationships with the sub-alignments for the (two) branches behind b0 + + // Do this before accessing matrices or other_subst + int n_branches_in = LCB.size(); + int L = sequence.size(); + auto LCB_OUT = object_ptr(new Likelihood_Cache_Branch(L, n_models, n_states)); +#ifndef NDEBUG + assert(A.size() == n_branches_in); + for(int i=0; i>().length2() == L); + assert(A[i].as_>().length1() == LCB[i].as_().n_columns()); + } +#endif + + // scratch matrix + double* S = LCB_OUT->scratch(0); + + Matrix ones(n_models, n_states); + element_assign(ones, 1); + + log_prod total; + int total_scale = 0; + vector AL(n_branches_in); + for(int j=0; j < n_branches_in;j++) + AL[j] = A[j].as_>().size(); + + vector s(n_branches_in, 0); + int s_out = 0; + vector i(n_branches_in, 0); + for(;;) + { + for(int j =0;j < n_branches_in; j++) + { + auto& a = A[j].as_>(); + auto& lcb = LCB[j].as_(); + auto& ij = i[j]; + auto& sj = s[j]; + while (ij < AL[j] and not a.has_character2(ij)) + { + assert(a.has_character1(ij)); + double p_col = element_prod_sum(F.begin(), lcb[sj], matrix_size ); + assert(std::isnan(p_col) or (0 <= p_col and p_col <= 1.00000000001)); + total *= p_col; + total_scale += lcb.scale(sj); + ij++; + sj++; + } + } + if (i.back() >= AL.back()) + { + for(int j=0;j>().has_character2(i[j])); + } + + int letter = sequence[s_out].as_int(); + + int scale = 0; + for(int k=0; k>().has_character1(i[j])) + { + auto& lcb = LCB[j].as_(); + element_prod_assign(S, lcb[s[j]], matrix_size); + scale += lcb.scale(s[j]); + s[j]++; + } + } + + // We need to zero out the inconsistent characters. + // Observing the complete state doesn't decouple subtrees unless there is only 1 mixture component. + if (letter >= 0) + { + auto& ok = a.letter_fmask(letter); + for(int m=0;m>(); + + // compute the distribution at the target (parent) node - multiple letters + for(int s1=0;s1scale(s_out++) = scale; + } + + LCB_OUT->other_subst.log() += total_scale*log_scale_min; + LCB_OUT->other_subst = total; + for(int j=0;jother_subst *= LCB[j].as_().other_subst; + return LCB_OUT; + } + + object_ptr + peel_branch3(const EVector& LCB, const EVector& A, const EVector& transition_P, const Matrix& F)