Merge pull request #74 from worldbank/v1.1

V1.1
worldbank · May 23, 2019 · 4aada18 · 4aada18
2 parents 8f539e3 + aca09b4
commit 4aada18
Show file tree

Hide file tree

Showing 13 changed files with 521 additions and 92 deletions.
diff --git a/src/ado_files/iecodebook.ado b/src/ado_files/iecodebook.ado
@@ -1,4 +1,4 @@
-*! version 1.0 31JAN2018  DIME Analytics [email protected]
+*! version 1.1 20MAY2019  DIME Analytics [email protected]
 
 // Main syntax *********************************************************************************
 

diff --git a/src/ado_files/iecompdup.ado b/src/ado_files/iecompdup.ado
@@ -1,4 +1,4 @@
-*! version 1.0 31JAN2019 DIME Analytics [email protected]
+*! version 1.1 20MAY2019  DIME Analytics [email protected]
 
 	capture program drop iecompdup
 	program iecompdup , rclass

diff --git a/src/ado_files/ieduplicates.ado b/src/ado_files/ieduplicates.ado
diff --git a/src/ado_files/iefieldkit.ado b/src/ado_files/iefieldkit.ado
@@ -1,11 +1,11 @@
-*! version 1.0 31JAN2018  DIME Analytics [email protected]
+*! version 1.1 20MAY2019  DIME Analytics [email protected]
 
 capture program drop iefieldkit
 program iefieldkit, rclass
 
 	* UPDATE THESE LOCALS FOR EACH NEW VERSION PUBLISHED
-	local version "1.0"
-	local versionDate "31JAN2019"
+	local version "1.1"
+	local versionDate "20MAY2019"
 
 	syntax [anything]
 

diff --git a/src/ado_files/ietestform.ado b/src/ado_files/ietestform.ado
@@ -1,4 +1,4 @@
-*! version 1.0 31JAN2018  DIME Analytics [email protected]
+*! version 1.1 20MAY2019  DIME Analytics [email protected]
 
 capture program drop ietestform
 		program ietestform , rclass
@@ -490,7 +490,7 @@ qui {
 
 	*Variables that must be included every time
 	local name_vars 		"name"
-	local cmd_vars  		"type required" // Include as needed eventually. "readonly appearance"
+	local cmd_vars  		"type required appearance" // Include as needed eventually. "readonly"
 	local msg_vars  		"`labelvars'"
 	local code_vars 		"" // Include as needed eventually. " constraint  relevance  calculation repeat_count choice_filter"
 
@@ -619,8 +619,8 @@ qui {
 		inlist(type, "start", "end", "deviceid", "subscriberid", "simserial", "phonenumber", "username", "caseid") | /// Default meta types doen not need to be required
 		missing(type)) /// Rows that are not fields shold be skipped
 
-	*List and output non-note fields that are not required
-	gen nonnote_nonrequired = (req_relevant == 1 & type != "note" & lower(required) != "yes")
+	*List and output non-note, non-label fields that are not required
+	gen nonnote_nonrequired = (req_relevant == 1 & type != "note" & appearance != "label" & lower(required) != "yes")
 	count if nonnote_nonrequired == 1
 	if `r(N)' > 0 {
 
@@ -639,6 +639,16 @@ qui {
 		noi report_file add , report_tempfile("`report_tempfile'") wikifragment("Required_Column") message("`error_msg'")  table("list row type name if note_required == 1") testname("REQUIRED NOTE TYPE FIELD")
 	}
 
+	*List and output required label fields in field-list groups
+	gen label_required 		= (req_relevant == 1 & appearance == "label" & lower(required) == "yes")
+	count if label_required == 1
+	if `r(N)' > 0 {
+
+		*Prepare message and write it
+		local error_msg "Fields with appearance [label] (inside a field-list group) must not be required. Label fields are currently required in the following rows:"
+		noi report_file add , report_tempfile("`report_tempfile'") wikifragment("Required_Column") message("`error_msg'")  table("list row type name if label_required == 1") testname("REQUIRED LABEL FIELD")
+	}
+
 }
 end
 
@@ -740,7 +750,7 @@ qui {
 				local beginrow = subinstr("`beginrow'","#","", 1)	//Remove the parse char "#"
 
 				*If the name are not the same it is most likely a different group or repeat group that is incorrectly being closed
-				if "`endname'" != "`beginname'"  {
+				if "`endname'" != "`beginname'" & !missing("`endname'") {
 
 					local error_msg "begin_`begintype' [`beginname'] on row `beginrow' and end_`endtype' [`endname'] on row `endrow'"
 
@@ -960,7 +970,7 @@ qui {
 
 		local error_msg "These variable names are longer then 32 characters. That is allowed in the data formats used in SurveyCTO - and is therefore allowed in their test - but will cause an error when the data is imported to Stata. The following names should be shortened:"
 
-		noi report_file add , report_tempfile("`report_tempfile'") testname("TOO LONG FIELD NAMES")  message("`error_msg'") wikifragment("Field_Name_Length") able("list row type name if longname == 1")
+		noi report_file add , report_tempfile("`report_tempfile'") testname("TOO LONG FIELD NAMES")  message("`error_msg'") wikifragment("Field_Name_Length") table("list row type name if longname == 1")
 
 	}
 

diff --git a/src/help_files/iecodebook.sthlp b/src/help_files/iecodebook.sthlp
@@ -1,5 +1,5 @@
 {smcl}
-{* 31 Jan 2019}{...}
+{* 20 May 2019}{...}
 {hline}
 help for {hi:iecodebook}
 {hline}

diff --git a/src/help_files/iecompdup.sthlp b/src/help_files/iecompdup.sthlp
@@ -1,5 +1,5 @@
 {smcl}
-{* 31 Jan 2019}{...}
+{* 20 May 2019}{...}
 {hline}
 help for {hi:iecompdup}
 {hline}

diff --git a/src/help_files/ieduplicates.sthlp b/src/help_files/ieduplicates.sthlp
@@ -1,5 +1,5 @@
 {smcl}
-{* 31 Jan 2019}{...}
+{* 20 May 2019}{...}
 {hline}
 help for {hi:ieduplicates}
 {hline}
@@ -20,6 +20,12 @@ command please see the {browse "https://dimewiki.worldbank.org/wiki/Ieduplicates
 , {cmdab:fol:der(}{it:string}{cmd:)} {cmdab:unique:vars(}{it:varlist}{cmd:)}
 [{cmdab:keep:vars(}{it:varlist}{cmd:)} {cmdab:tostringok} {cmdab:droprest}
 {cmdab:nodaily} {cmdab:suf:fix(}{it:string}{cmd:)}
+{cmdab:duplistid(}{it:string}{cmd:)} {cmdab:datelisted(}{it:string}{cmd:)}
+{cmdab:datefixed(}{it:string}{cmd:)} {cmdab:correct(}{it:string}{cmd:)}
+{cmdab:drop(}{it:string}{cmd:)} {cmdab:newid(}{it:string}{cmd:)}
+{cmdab:initials(}{it:string}{cmd:)} {cmdab:notes(}{it:string}{cmd:)}]{p_end}
+
+
 
 {phang2}where {it:ID_varname} is the variable that will be controlled for duplicates
 
@@ -32,8 +38,23 @@ command please see the {browse "https://dimewiki.worldbank.org/wiki/Ieduplicates
 {synopt :{cmdab:keep:vars(}{it:varlist}{cmd:)}}variables used to be included in the Excel report in addition to {it:ID_varname} and {cmdab:unique:vars()} {p_end}
 {synopt :{cmdab:tostringok}}allows {it:ID_varname} to be recasted to string if required{p_end}
 {synopt :{cmdab:droprest}}disables the requirement that duplicates must be explicitly deleted{p_end}
-{synopt :{cmdab:suf:fix(}{it:string}{cmd:)}}allows the user to add a suffix to the filename of the Excel report{p_end}
 {synopt :{cmdab:nodaily}}disables daily back-up copies of the Excel report{p_end}
+{synopt :{cmdab:suf:fix(}{it:string}{cmd:)}}allows the user to add a suffix to the filename of the Excel report{p_end}
+
+{pstd}{it:    {ul:{hi:Excel variable name options:}}}{p_end}
+
+{pstd}This option allows users to customize the column names in the report Excel spreadsheet. This option is intended for situations when the dataset already has variable(s) named the same as the default Excel spreadsheet name. {p_end}
+
+{synopt :{cmdab:duplistid(}{it:string}{cmd:)}}customizes variable {it:duplistid}{p_end}
+{synopt :{cmdab:datelisted(}{it:string}{cmd:)}}customizes variable {it:datelisted}{p_end}
+{synopt :{cmdab:datefixed(}{it:string}{cmd:)}}customizes variable {it:datefixed}{p_end}
+{synopt :{cmdab:correct(}{it:string}{cmd:)}}customizes variable {it:correct}{p_end}
+{synopt :{cmdab:drop(}{it:string}{cmd:)}}customizes variable {it:drop}{p_end}
+{synopt :{cmdab:newid(}{it:string}{cmd:)}}customizes variable {it:newid}{p_end}
+{synopt :{cmdab:initials(}{it:string}{cmd:)}}customizes variable {it:initials}{p_end}
+{synopt :{cmdab:notes(}{it:string}{cmd:)}}customizes variable {it:notes}{p_end}
+
+
 {synoptline}
 
 {title:Description}
@@ -55,11 +76,11 @@ needs to be specified in order to have a unique reference for each row in the Ex
 observations in the Excel report, either on its own or together with {it:ID_varname}. {cmd:ieduplicates}
 then returns the data set without these duplicates.
 
-{pstd}The Excel report includes three columns called {it:correct}, {it:drop} and {it:newID}.
+{pstd}The Excel report includes three columns called {it:correct}, {it:drop} and {it:newid}.
 Each of them represents one way to correct the duplicates. If {it:correct} is indicated with
 a "Yes" then that observation is kept unchanged, if {it:drop} is indicated with a "Yes" then
-that observation is deleted and if {it:newID} is indicated then that observation is assigned
-a new ID using the value in column {it:newID}. After corrections are entered, the report should
+that observation is deleted and if {it:newid} is indicated then that observation is assigned
+a new ID using the value in column {it:newid}. After corrections are entered, the report should
 be saved in the same location {cmdab:fol:der(}{it:string}{cmd:)} without any changes to its name.
 
 {pstd}Before outputting a new report {cmd:ieduplicates} always checks if there already is an
@@ -99,15 +120,15 @@ drop and assign a new ID to. For data integrity reasons, be careful not to expor
 Excel files including both identifying variables and names together with {it:ID_varname}.
 
 {phang}{cmdab:tostringok} allows {it:ID_varname} to be turned into a string variable in case
-{it:ID_varname} is numeric but a value listed in {it:newID} is non-numeric. Otherwise an error is generated.
+{it:ID_varname} is numeric but a value listed in {it:newid} is non-numeric. Otherwise an error is generated.
 
 {phang}{cmdab:droprest} disables the requirement that duplicates must be explicitly deleted.
 The default is that if one of the duplicates in a group of duplicates has a
 correction, then that correction is only valid if all other duplicates in that
 group have a correction as well. For example, if there are four observations with
 the same value for {it:ID_varname} and one is correct, one needs a new ID and
 two are incorrect and should be deleted. Then the first one is indicated to be
-kept in the {it:correct} column, the second one is given a new ID in {it:newID}
+kept in the {it:correct} column, the second one is given a new ID in {it:newid}
 and the other two observations must be indicated for deletion in {it:drop}
 unless {cmdab:droprest}. The first two corrections are not considered valid and
 will cause an error in case if {cmdab:droprest} is not specified and the other
@@ -132,6 +153,16 @@ report in a sub-folder called Daily in the folder specified in {cmdab:folder()}.
 the folder /Daily/ does not exist, then it is created unless the
 option {cmdab:nodaily} is used.
 
+{title:Excel variable name options:}
+
+{phang}{cmdab:duplistid(}{it:string}{cmd:)} {cmdab:datelisted(}{it:string}{cmd:)}
+{cmdab:datefixed(}{it:string}{cmd:)} {cmdab:correct(}{it:string}{cmd:)}
+{cmdab:drop(}{it:string}{cmd:)} {cmdab:newid(}{it:string}{cmd:)}
+{cmdab:initials(}{it:string}{cmd:)} {cmdab:notes(}{it:string}{cmd:)}
+allow the user to set a unique name for each default variable names (e.g. {it:duplistid}, {it:datelisted}, etc.) in the Excel report spreadsheet.
+This is meant to be used when the variable name already exists in the dataset. To avoid error, the command offers a way to modify the variable name in the Excel Report spreadsheet. {p_end}
+
+
 {title:The Excel Report}
 
 {pstd}A report of duplicates will be created in {cmdab:fol:der(}{it:string}{cmd:)}
@@ -142,40 +173,40 @@ that day, then that report will be overwritten.
 
 {pstd}All duplicates in a group of duplicates must have a correction indicated. If
 one or more duplicates are indicated as correct in {it:correct} or assigned a new
-ID in {it:newID}, then all other duplicates with the same value in {it:ID_varname} must
+ID in {it:newid}, then all other duplicates with the same value in {it:ID_varname} must
 be explicitly indicated for deletion. This requirement may (but probably
 shouldn't) be disabled by option {cmdab:droprest}.
 
 {dlgtab:Columns in Excel Report filled in automatically:}
 
-{phang}{it:dupListID} stores an auto incremented duplicate list ID that is used
+{phang}{it:duplistid} stores an auto incremented duplicate list ID that is used
 to maintain the sort order in the Excel Report regardless of how the data in memory
 is sorted at the time {cmd:ieduplicates} is executed.
 
-{phang}{it:dateListed} stores the date the duplicate was first identified.
+{phang}{it:datelisted} stores the date the duplicate was first identified.
 
-{phang}{it:dateFixed} stores the date a valid correction was imported the first
+{phang}{it:datefixed} stores the date a valid correction was imported the first
 time for that duplicate.
 
 {dlgtab:Columns in Excel Report to be filled in manually by a user:}
 
 {phang}{it:correct} is used to indicate that the duplicate should be kept. Valid values are
 restricted to "yes" and "y" to reduce the risk of unintended entries. The values
 are not sensitive to case. All valid values are changed to "yes" lower case when
-imported. If {it:correct} is indicated then both {it:drop} and {it:newID} must be
+imported. If {it:correct} is indicated then both {it:drop} and {it:newid} must be
 left empty.
 
 {phang}{it:drop} is used to indicate that the duplicate should be deleted. Valid values are
 restricted to "yes" and "y" to reduce the risk of unintended entries. The values
 are not sensitive to case. All valid values are changed to "yes" lower case when
-imported. If {it:drop} is indicated then both {it:correct} and {it:newID} must be
+imported. If {it:drop} is indicated then both {it:correct} and {it:newid} must be
 left empty.
 
-{phang}{it:newID} is used to assign a new ID values to a duplicate. If {it:ID_varname}
-is a string then all values are valid for {it:newID}. If {it:ID_varname} is numeric then
+{phang}{it:newid} is used to assign a new ID values to a duplicate. If {it:ID_varname}
+is a string then all values are valid for {it:newid}. If {it:ID_varname} is numeric then
 only digits are valid, unless the option {cmdab:tostringok} is specified.
-If {cmdab:tostringok} is specified and {it:newID} is non-numeric, then {it:ID_varname}
-is recasted to a string variable. If {it:newID} is indicated then both {it:correct} and {it:drop} must be
+If {cmdab:tostringok} is specified and {it:newid} is non-numeric, then {it:ID_varname}
+is recasted to a string variable. If {it:newid} is indicated then both {it:correct} and {it:drop} must be
 left empty.
 
 {phang}{it:initials} allows the team working with this data to keep track on who
@@ -245,7 +276,7 @@ unresolved duplicates were found
 {hi:Example 4.} Using the Excel file. The table below could be the report generated in Example 2 above. Make the viewer window wider and reload the page if the table below does not display properly!
 
 {col 3}{c TLC}{hline 116}{c TRC}
-{col 3}{c |}{col 4}HHID{col 10}dupListID{col 21}dateListed{col 33}dateFixed{col 44}correct{col 53}drop{col 59}newID{col 65}initials{col 75}note{col 94}KEY{col 107}enumerator{col 120}{c |}
+{col 3}{c |}{col 4}HHID{col 10}duplistid{col 21}datelisted{col 33}datefixed{col 44}correct{col 53}drop{col 59}newid{col 65}initials{col 75}notes{col 94}KEY{col 107}enumerator{col 120}{c |}
 {col 3}{c LT}{hline 116}{c RT}
 {col 3}{c |}{col 4}4321{col 10}1{col 21}27Dec2015{col 33}02Jan2016{col 44}yes{col 53}   {col 59}    {col 65}KB{col 75}double submission{col 94}{it:uniquevalue}{col 107}{it:keepvarvalue}{col 120}{c |}
 {col 3}{c |}{col 4}4321{col 10}2{col 21}27Dec2015{col 33}02Jan2016{col 44}   {col 53}yes{col 59}    {col 65}KB{col 75}double submission{col 94}{it:uniquevalue}{col 107}{it:keepvarvalue}{col 120}{c |}
@@ -260,19 +291,41 @@ unresolved duplicates were found
 {pmore}The table above shows an example of an Excel report with 4 duplicates groups with
 two duplicates in each groups. The duplicates in 4321 and in 1145 have both been corrected
 but 7365 and 9834 are still unresolved. Before any observation was corrected, all observations had
-{it:dateFixed}, {it:correct}, {it:drop}, {it:newID}, {it:initials} and {it:note} empty just like the observations for ID 7365 and 9834. {it:dateFixed}
+{it:datefixed}, {it:correct}, {it:drop}, {it:newid}, {it:initials} and {it:note} empty just like the observations for ID 7365 and 9834. {it:datefixed}
 is not updated by the user, the command adds this date the first time the correction is made.
 
-{pmore}Observation with dupListID == 5 was found to have been
-assigned the incorrect ID while the data was collected. This observation is assigned the correct ID in {it:newID}
-and observation dupListID == 6 is indicated to be correct. Someone with initials IB made this
+{pmore}Observation with duplistid == 5 was found to have been
+assigned the incorrect ID while the data was collected. This observation is assigned the correct ID in {it:newid}
+and observation duplistid == 6 is indicated to be correct. Someone with initials IB made this
 correction and made a note. This note can and should be more descriptive but is kept short in this example.
 
-{pmore}Observations with dupListID == 1 and dupListID == 2 were identified as a duplicate submissions of the same
+{pmore}Observations with duplistid == 1 and duplistid == 2 were identified as a duplicate submissions of the same
 observation. One is kept and one is dropped, usually it does not matter which you keep and which you drop, but that should be confirmed.
 
 {pmore}Both corrections described in the example would have been easily identified using this command's sister command {help iecompdup}.
 
+
+
+{phang}
+{hi:Example 5.} {inp:ieduplicates HHID, folder(C:\myImpactEvaluation\baseline\data) uniquevars(KEY) drop(out) notes(notes_enumerators)}
+
+{col 3}{c TLC}{hline 103}{c TRC}
+{col 3}{c |}{col 4}HHID{col 10}duplistid{col 21}datelisted{col 33}datefixed{col 44}correct{col 53}out{col 59}newid{col 65}initials{col 75}notes_enumerators{col 94}KEY{col 107}{c |}
+{col 3}{c LT}{hline 103}{c RT}
+{col 3}{c |}{col 4}4321{col 10}1{col 21}27Dec2015{col 33}02Jan2016{col 44}yes{col 53}   {col 59}    {col 65}KB{col 75}double submission{col 94}{it:uniquevalue}{col 107}{c |}
+{col 3}{c |}{col 4}4321{col 10}2{col 21}27Dec2015{col 33}02Jan2016{col 44}   {col 53}yes{col 59}    {col 65}KB{col 75}double submission{col 94}{it:uniquevalue}{col 107}{c |}
+{col 3}{c |}{col 4}7365{col 10}3{col 21}03Jan2016{col 33}         {col 44}   {col 53}   {col 59}    {col 65}  {col 75}                 {col 94}{it:uniquevalue}{col 107}{c |}
+{col 3}{c |}{col 4}7365{col 10}4{col 21}03Jan2016{col 33}         {col 44}   {col 53}   {col 59}    {col 65}  {col 75}                 {col 94}{it:uniquevalue}{col 107}{c |}
+{col 3}{c |}{col 4}1145{col 10}5{col 21}03Jan2016{col 33}11Jan2016{col 44}   {col 53}   {col 59}1245{col 65}IB{col 75}incorrect id     {col 94}{it:uniquevalue}{col 107}{c |}
+{col 3}{c |}{col 4}1145{col 10}6{col 21}03Jan2016{col 33}11Jan2016{col 44}yes{col 53}   {col 59}    {col 65}IB{col 75}correct id       {col 94}{it:uniquevalue}{col 107}{c |}
+{col 3}{c |}{col 4}9834{col 10}7{col 21}11Jan2016{col 33}         {col 44}   {col 53}   {col 59}    {col 65}  {col 75}                 {col 94}{it:uniquevalue}{col 107}{c |}
+{col 3}{c |}{col 4}9834{col 10}8{col 21}11Jan2016{col 33}         {col 44}   {col 53}   {col 59}    {col 65}  {col 75}                 {col 94}{it:uniquevalue}{col 107}{c |}
+{col 3}{c BLC}{hline 103}{c BRC}
+
+{pmore} The variable names in Excel Report is now changed to the user speficied. If the user changed any of the variable names in the Excel Report, when importing the Excel file back to apply the decisions, run exactly the same code:{p_end}
+{pmore}{inp:ieduplicates HHID, folder(C:\myImpactEvaluation\baseline\data) uniquevars(KEY) drop(out) notes(notes_enumerators)}{p_end}
+
+
 {title:Acknowledgements}
 
 {phang}We would like to acknowledge the help in testing and proofreading we received in relation to this command and help file from (in alphabetic order):{p_end}

diff --git a/src/help_files/iefieldkit.sthlp b/src/help_files/iefieldkit.sthlp
@@ -1,5 +1,5 @@
 {smcl}
-{* 31 Jan 2019}{...}
+{* 20 May 2019}{...}
 {hline}
 help for {hi:iefieldkit}
 {hline}

diff --git a/src/help_files/ietestform.sthlp b/src/help_files/ietestform.sthlp
@@ -1,5 +1,5 @@
 {smcl}
-{* 31 Jan 2019}{...}
+{* 20 May 2019}{...}
 {hline}
 help for {hi:ietestform}
 {hline}