From 461ee1bc5ecbc0d381e52a69fa13f2a8ddfb0891 Mon Sep 17 00:00:00 2001
From: Tian Xie <xie@semcog.org>
Date: Thu, 27 Jun 2024 12:06:03 -0400
Subject: [PATCH] #56 Added baseyear hh/p seeds before hh_transition to handle
 missing hh samples

---
 models.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/models.py b/models.py
index 160d4d3..449f175 100644
--- a/models.py
+++ b/models.py
@@ -636,9 +636,24 @@ def households_transition(
     region_ct[max_cols] = region_ct[max_cols].replace(-1, np.inf)
     region_ct[max_cols] += 1
     region_hh = households.to_frame(households.local_columns + ["large_area_id"])
+    region_hh.index = region_hh.index.astype(int)
 
     region_p = persons.to_frame(persons.local_columns)
     region_p.index = region_p.index.astype(int)
+    # issue #56
+    # append hh_seeds and p_seeds to the end 
+    hh_seeds = orca.get_table('hh_seeds').to_frame().reset_index()[region_hh.columns]
+    p_seeds = orca.get_table('p_seeds').to_frame().reset_index()#[region_p.columns]
+    max_hh_idx,max_p_idx = max(region_hh.index), max(region_p.index)
+    hh_seeds.index = list(range(max_hh_idx+1, max_hh_idx+len(hh_seeds)+1))
+    hh_seeds.index.name = 'household_id'
+    p_seeds.index = list(range(max_p_idx+1, max_p_idx+len(p_seeds)+1))
+    p_seeds.index.name = 'person_id'
+    # map hh_id back to p_seeds
+    p_seeds['household_id'] = p_seeds['seed_id'].map(hh_seeds.reset_index().set_index('seed_id')['household_id'])
+    # append
+    region_hh = pd.concat((region_hh, hh_seeds), axis=0)
+    region_p = pd.concat((region_p, p_seeds), axis=0)
 
     if "changed_hhs" in orca.list_tables():
         ## add changed hhs and persons from previous year back (ensure transition sample availability )