From 92d15d2a9500c0ab9332550670c582fde3e89207 Mon Sep 17 00:00:00 2001 From: Unknown Date: Tue, 5 Feb 2019 17:32:52 -0500 Subject: [PATCH 01/39] added code to set excel col name to default if no option is specified --- src/ado_files/ieduplicates.ado | 78 ++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 33 deletions(-) diff --git a/src/ado_files/ieduplicates.ado b/src/ado_files/ieduplicates.ado index f5e89c46..aff8838e 100644 --- a/src/ado_files/ieduplicates.ado +++ b/src/ado_files/ieduplicates.ado @@ -6,7 +6,8 @@ qui { - syntax varname , FOLder(string) UNIQUEvars(varlist) [KEEPvars(varlist) tostringok droprest nodaily SUFfix(string)] + syntax varname , FOLder(string) UNIQUEvars(varlist) [KEEPvars(varlist) tostringok droprest nodaily SUFfix(string) /// + dupListID(string) dateListed(string) dateFixed(string) correct(string) drop(string) newID(string) initials(string) notes(string)] version 11.0 @@ -34,7 +35,7 @@ ************************************************************************ ***********************************************************************/ - *Tempfiles to be uses + *Tempfiles to be used tempfile originalData preppedReport datawithreportmerged dataToReturn ** Save a version of original data that can be brought @@ -52,9 +53,17 @@ local argumentVars `idvar' `uniquevars' `keepvars' * Create a list of the variables created by this command to put in the report - local excelVars dupListID dateListed dateFixed correct drop newID initials notes + *For optioin to change var names. Setting a default name of columns (in case user did not specify the variable name) + local deafultvars dupListID dateListed dateFixed correct drop newID initials notes + + foreach deafultvar of local deafultvars { + if "``deafultvar''" == "" local "`deafultvar'" = "`deafultvar'" + } + * Test that nu variable with the name needed for the excel report already exist in the data set + local excelVars `dupListID' `dateListed' `dateFixed' `correct' `drop' `newID' `initials' `notes' + foreach excelvar of local excelVars { cap confirm variable `excelvar' if _rc == 0 { @@ -151,7 +160,10 @@ ** All excelVars but dupListID and newID should be string. dupListID * should be numeric and the type of newID should be based on the user input foreach excelvar of local excelVars { - if !inlist("`excelvar'", "dupListID", "newID") { + + *SI_TODO: confirm variable that they exist in excel var name + + if !inlist("`excelvar'", "`dupListID'", "`newID'") { * Make original ID var string tostring `excelvar' , replace @@ -209,13 +221,13 @@ ******************/ * Make string input lower case and change "y" to "yes" - replace correct = lower(correct) - replace drop = lower(drop) - replace correct = "yes" if correct == "y" - replace drop = "yes" if drop == "y" + replace `correct' = lower(`correct') + replace `drop' = lower(`drop') + replace `correct' = "yes" if `correct' == "y" + replace `drop' = "yes" if `drop' == "y" *Check that variables are either empty or "yes" - gen `inputNotYes' = !((correct == "yes" | correct == "") & (drop == "yes" | drop == "")) + gen `inputNotYes' = !((`correct' == "yes" | `correct' == "") & (`drop' == "yes" | `drop' == "")) *Set local to 1 if error should be outputted cap assert `inputNotYes' == 0 @@ -228,7 +240,7 @@ * Count the number of corrections (correct drop newID) per * observation. Only one correction per observation is allowed. - egen `multiInp' = rownonmiss(correct drop newID), strok + egen `multiInp' = rownonmiss(`correct' `drop' `newID'), strok *Check that all rows have at most one correction cap assert `multiInp' == 0 | `multiInp' == 1 @@ -243,7 +255,7 @@ ******************/ *Generate dummy if correct column is set to yes - gen `yesCorrect' = (correct == "yes") + gen `yesCorrect' = (`correct' == "yes") *Count number of duplicates within duplicates where that dummy is 1 bys `idvar' : egen `groupNumCorrect' = total(`yesCorrect') @@ -266,13 +278,13 @@ ******************/ *Generate dummy if there is any correction for this observation - gen `anyCorrection' = !missing(correct) | !missing(newID) + gen `anyCorrection' = !missing(`correct') | !missing(`newID') *Count number of observations with any correction in suplicates group bys `idvar' : egen `groupAnyCorrection' = total(`anyCorrection') *Create dummy that indicates each place this error happens - gen `notDrop' = (missing(drop) & `groupAnyCorrection' > 0 & `anyCorrection' == 0) + gen `notDrop' = (missing(`drop') & `groupAnyCorrection' > 0 & `anyCorrection' == 0) * Check if option droprest is specified if "`droprest'" == "" { @@ -292,7 +304,7 @@ * for any observations without drop or any other correction * explicitly specified if the observation is in a duplicate * group with at least one observation has a correction - replace drop = "yes" if `notDrop' == 1 + replace `drop' = "yes" if `notDrop' == 1 } @@ -312,28 +324,28 @@ *Error multiple input if `local_multiInp' == 1 { display as error "{phang}The following observations have more than one correction. Only one correction (correct, drop or newID) per row is allowed{p_end}" - list `idvar' dupListID correct drop newID `uniquevars' if `multiInp' > 1 + list `idvar' `dupListID' `correct' `drop' `newID' `uniquevars' if `multiInp' > 1 di "" } *Error multiple correct if `local_multiCorr' == 1 { display as error "{phang}The following observations are in a duplicate group where more than one observation is listed as correct. Only one observation per duplicate group can be correct{p_end}" - list `idvar' dupListID correct drop newID `uniquevars' if `groupNumCorrect' > 1 + list `idvar' `dupListID' `correct' `drop' `newID' `uniquevars' if `groupNumCorrect' > 1 di "" } *Error in incorrect string if `local_inputNotYes' == 1 { display as error "{phang}The following observations have an answer in either correct or drop that is neither yes nor y{p_end}" - list `idvar' dupListID correct drop `uniquevars' if `inputNotYes' == 1 + list `idvar' `dupListID' `correct' `drop' `uniquevars' if `inputNotYes' == 1 di "" } *Error is not specfied as drop if `local_notDrop' == 1 { display as error "{phang}The following observations are not explicitly indicated as drop while other duplicates in the same duplicate group are corrected. Either manually indicate as drop or see option droprest{p_end}" - list `idvar' dupListID correct drop newID `uniquevars' if `notDrop' == 1 + list `idvar' `dupListID' `correct' `drop' `newID' `uniquevars' if `notDrop' == 1 di "" } @@ -398,7 +410,7 @@ *Explicitly drop temporary variable. Temporary variables might *be exported to excel so delete explicitly before that. Only *using tempvar here to create a name with no conflicts - drop `iedup_merge' + `drop' `iedup_merge' } @@ -453,7 +465,7 @@ foreach excelvar of local excelVars { *Create all variables apart from dupListID as string vars - if inlist("`excelvar'", "dupListID", "newID") { + if inlist("`excelvar'", "`dupListID'", "`newID'") { gen `excelvar' = . } else { @@ -469,7 +481,7 @@ * Generate a local that is 1 if there are new duplicates local unaddressedNewExcel 0 - count if missing(dateFixed) + count if missing(`dateFixed') if `r(N)' > 0 local unaddressedNewExcel 1 /****************** @@ -477,11 +489,11 @@ ******************/ * Add date first time duplicate was identified - replace dateListed = "`date'" if missing(dateListed) + replace ``dateListed'' = "`date'" if missing(`dateListed') ** Add today's date to variable dateFixed if dateFixed * is empty and at least one correction is added - replace dateFixed = "`date'" if missing(dateFixed) & (!missing(correct) | !missing(drop) | !missing(newID)) + replace `dateFixed' = "`date'" if missing(`dateFixed') & (!missing(`correct') | !missing(`drop') | !missing(`newID')) /****************** Section 5.3.2 Duplicate report list ID @@ -489,18 +501,18 @@ ** Sort after dupListID and after ID var for * duplicates currently without dupListID - sort dupListID `idvar' + sort `dupListID' `idvar' ** Assign dupListID 1 to the top row if no duplicate * list IDs have been generated so far. - replace dupListID = 1 if _n == 1 & missing(dupListID) + replace `dupListID' = 1 if _n == 1 & missing(`dupListID') ** Generate new IDs based on the row above instead of directly * from the row number. That prevents duplicates in the list in * case an observation is deleted. The first observation with * missing value will have an ID that is one digit higher than * the highest ID already in the list - replace dupListID = dupListID[_n - 1] + 1 if missing(dupListID) + replace `dupListID' = `dupListID'[_n - 1] + 1 if missing(`dupListID') /****************** Section 5.4 @@ -571,7 +583,7 @@ Drop duplicates listed for drop ******************/ - drop if drop == "yes" + drop if `drop' == "yes" /****************** Section 6.2 @@ -589,17 +601,17 @@ ******************/ *Test if there are any corrections by new ID - cap assert missing(newID) + cap assert missing(`newID') if _rc { local idtype : type `idvar' - local idtypeNew : type newID + local idtypeNew : type `newID' *If ID var is string but newID is not, then just make it string if substr("`idtype'",1,3) == "str" & substr("`idtypeNew'",1,3) != "str" { - tostring newID , replace - replace newID = "" if newID == "." + tostring `newID' , replace + replace `newID' = "" if `newID' == "." } *If ID var is numeric but the newID is loaded as string @@ -618,7 +630,7 @@ else { * Create a local with all non-numeric values - levelsof newID if missing(real(newID)), local(NaN_values) clean + levelsof `newID' if missing(real(`newID')), local(NaN_values) clean * Output error message di as error "{phang}The ID variable `idvar' is numeric but newID has these non-numeric values: `NaN_values'. Update newID to only contain numeric values or see option tostringok.{p_end}" @@ -628,7 +640,7 @@ } *After making sure that type is ok, update the IDs - replace `idvar' = newID if !missing(newID) + replace `idvar' = `newID' if !missing(`newID') From b07dc5921e8919dd7639758fabe42b808c166352 Mon Sep 17 00:00:00 2001 From: Unknown Date: Tue, 12 Feb 2019 13:51:00 -0500 Subject: [PATCH 02/39] fix to the code on option to change col names --- src/ado_files/ieduplicates.ado | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ado_files/ieduplicates.ado b/src/ado_files/ieduplicates.ado index aff8838e..454ea950 100644 --- a/src/ado_files/ieduplicates.ado +++ b/src/ado_files/ieduplicates.ado @@ -58,7 +58,7 @@ local deafultvars dupListID dateListed dateFixed correct drop newID initials notes foreach deafultvar of local deafultvars { - if "``deafultvar''" == "" local "`deafultvar'" = "`deafultvar'" + if "``deafultvar''" == "" local `deafultvar' = "`deafultvar'" } * Test that nu variable with the name needed for the excel report already exist in the data set From ab71f63a2e0ce9afdca73df2283fde8a72ceaa7e Mon Sep 17 00:00:00 2001 From: Unknown Date: Tue, 12 Feb 2019 16:57:06 -0500 Subject: [PATCH 03/39] added code to check spreadsheet variable name to be the same when importing: line 163 --- src/ado_files/ieduplicates.ado | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/ado_files/ieduplicates.ado b/src/ado_files/ieduplicates.ado index 454ea950..0eda553f 100644 --- a/src/ado_files/ieduplicates.ado +++ b/src/ado_files/ieduplicates.ado @@ -61,7 +61,7 @@ if "``deafultvar''" == "" local `deafultvar' = "`deafultvar'" } - * Test that nu variable with the name needed for the excel report already exist in the data set + * Test that no variable with the name needed for the excel report already exist in the data set local excelVars `dupListID' `dateListed' `dateFixed' `correct' `drop' `newID' `initials' `notes' foreach excelvar of local excelVars { @@ -121,7 +121,7 @@ Section 3 - Test input from Excel file - If Excel repirt exist, import it and test for invalid corrections + If Excel report exists, import it and test for invalid corrections made in the excel report. ************************************************************************ @@ -142,6 +142,7 @@ local fileExists 0 } + /****************** Section 3.2 If report exists, load file and check input, otherwise skip to section 4 @@ -160,9 +161,14 @@ ** All excelVars but dupListID and newID should be string. dupListID * should be numeric and the type of newID should be based on the user input foreach excelvar of local excelVars { - - *SI_TODO: confirm variable that they exist in excel var name - + cap confirm variable `excelvar' + if _rc !=0 { + *Variable does notexist, output error + noi display as error "{phang}The original spreadsheet variable name "{inp:`excelvar'}" no longer exist. Please change the spreadsheet variable name back to the origional name.{p_end}" + error 198 + exit + } + if !inlist("`excelvar'", "`dupListID'", "`newID'") { * Make original ID var string @@ -410,7 +416,7 @@ *Explicitly drop temporary variable. Temporary variables might *be exported to excel so delete explicitly before that. Only *using tempvar here to create a name with no conflicts - `drop' `iedup_merge' + drop `iedup_merge' } From 293c24437ed72c557106a2c49dce04b30f0aa91f Mon Sep 17 00:00:00 2001 From: Unknown Date: Tue, 12 Feb 2019 17:11:42 -0500 Subject: [PATCH 04/39] added comment on the added code --- src/ado_files/ieduplicates.ado | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/ado_files/ieduplicates.ado b/src/ado_files/ieduplicates.ado index 0eda553f..b3e27394 100644 --- a/src/ado_files/ieduplicates.ado +++ b/src/ado_files/ieduplicates.ado @@ -158,8 +158,7 @@ egen `count_nonmissing_values' = rownonmiss(_all), strok drop if `count_nonmissing_values' == 0 - ** All excelVars but dupListID and newID should be string. dupListID - * should be numeric and the type of newID should be based on the user input + * Check if the variable name in the excel spreadsheet remain unchanged from the original report outputted. foreach excelvar of local excelVars { cap confirm variable `excelvar' if _rc !=0 { @@ -169,6 +168,8 @@ exit } + ** All excelVars but dupListID and newID should be string. dupListID + * should be numeric and the type of newID should be based on the user input if !inlist("`excelvar'", "`dupListID'", "`newID'") { * Make original ID var string From 681f157da4fb7bdb90a891a1873372b3d9744528 Mon Sep 17 00:00:00 2001 From: Unknown Date: Thu, 21 Feb 2019 15:06:22 -0500 Subject: [PATCH 05/39] Adding section about new options to change variable name to the helpfile --- src/ado_files/ieduplicates.ado | 2 +- src/help_files/ieduplicates.sthlp | 53 +++++++++++++++++++++++++++++-- 2 files changed, 51 insertions(+), 4 deletions(-) diff --git a/src/ado_files/ieduplicates.ado b/src/ado_files/ieduplicates.ado index b3e27394..bbd3f956 100644 --- a/src/ado_files/ieduplicates.ado +++ b/src/ado_files/ieduplicates.ado @@ -163,7 +163,7 @@ cap confirm variable `excelvar' if _rc !=0 { *Variable does notexist, output error - noi display as error "{phang}The original spreadsheet variable name "{inp:`excelvar'}" no longer exist. Please change the spreadsheet variable name back to the origional name.{p_end}" + noi display as error "{phang}The original spreadsheet variable name {inp:`excelvar'} no longer exist. Please change the spreadsheet variable name back to the origional name.{p_end}" error 198 exit } diff --git a/src/help_files/ieduplicates.sthlp b/src/help_files/ieduplicates.sthlp index d02fbad9..2b0d8691 100644 --- a/src/help_files/ieduplicates.sthlp +++ b/src/help_files/ieduplicates.sthlp @@ -19,7 +19,13 @@ command please see the {browse "https://dimewiki.worldbank.org/wiki/Ieduplicates {it:ID_varname} , {cmdab:fol:der(}{it:string}{cmd:)} {cmdab:unique:vars(}{it:varlist}{cmd:)} [{cmdab:keep:vars(}{it:varlist}{cmd:)} {cmdab:tostringok} {cmdab:droprest} -{cmdab:nodaily} {cmdab:suf:fix(}{it:string}{cmd:)} +{cmdab:nodaily} {cmdab:suf:fix(}{it:string}{cmd:)} +{cmdab:dupListID(}{it:string}{cmd:)} {cmdab:dateListed(}{it:string}{cmd:)} +{cmdab:dateFixed(}{it:string}{cmd:)} {cmdab:correct(}{it:string}{cmd:)} +{cmdab:drop(}{it:string}{cmd:)} {cmdab:newID(}{it:string}{cmd:)} +{cmdab:initials(}{it:string}{cmd:)} {cmdab:notes(}{it:string}{cmd:)}]{p_end} + + {phang2}where {it:ID_varname} is the variable that will be controlled for duplicates @@ -32,8 +38,19 @@ command please see the {browse "https://dimewiki.worldbank.org/wiki/Ieduplicates {synopt :{cmdab:keep:vars(}{it:varlist}{cmd:)}}variables used to be included in the Excel report in addition to {it:ID_varname} and {cmdab:unique:vars()} {p_end} {synopt :{cmdab:tostringok}}allows {it:ID_varname} to be recasted to string if required{p_end} {synopt :{cmdab:droprest}}disables the requirement that duplicates must be explicitly deleted{p_end} -{synopt :{cmdab:suf:fix(}{it:string}{cmd:)}}allows the user to add a suffix to the filename of the Excel report{p_end} {synopt :{cmdab:nodaily}}disables daily back-up copies of the Excel report{p_end} +{synopt :{cmdab:suf:fix(}{it:string}{cmd:)}}allows the user to add a suffix to the filename of the Excel report{p_end} + +{synopt :{cmdab:dupListID(}{it:string}{cmd:)}}allows the user to customize the variable name "dupListID" in the the Excel report{p_end} +{synopt :{cmdab:dateListed(}{it:string}{cmd:)}}allows the user to customize the variable name "dateListed" in the the Excel report{p_end} +{synopt :{cmdab:dateFixed(}{it:string}{cmd:)}}allows the user to customize the variable name "dateFixed" in the the Excel report{p_end} +{synopt :{cmdab:correct(}{it:string}{cmd:)}}allows the user to customize the variable name "correct" in the the Excel report{p_end} +{synopt :{cmdab:drop(}{it:string}{cmd:)}}allows the user to customize the variable name "drop" in the the Excel report{p_end} +{synopt :{cmdab:newID(}{it:string}{cmd:)}}allows the user to customize the variable name "newID" in the the Excel report{p_end} +{synopt :{cmdab:initials(}{it:string}{cmd:)}}allows the user to customize the variable name "initials" in the the Excel report{p_end} +{synopt :{cmdab:notes(}{it:string}{cmd:)}}allows the user to customize the variable name "notes" in the the Excel report{p_end} + + {synoptline} {title:Description} @@ -132,6 +149,14 @@ report in a sub-folder called Daily in the folder specified in {cmdab:folder()}. the folder /Daily/ does not exist, then it is created unless the option {cmdab:nodaily} is used. +{phang}{cmdab:dupListID(}{it:string}{cmd:)} {cmdab:dateListed(}{it:string}{cmd:)} +{cmdab:dateFixed(}{it:string}{cmd:)} {cmdab:correct(}{it:string}{cmd:)} +{cmdab:drop(}{it:string}{cmd:)} {cmdab:newID(}{it:string}{cmd:)} +{cmdab:initials(}{it:string}{cmd:)} {cmdab:notes(}{it:string}{cmd:)} +allow the user to set a unique name for each default variable names (e.g. "dupListID", "dateListed", etc.) in the Excel report. +This is meant to be used when the variable name already exists in the dataset. To avoid error, the command offers a way to modify the variable name in the Excel Report. {p_end} + + {title:The Excel Report} {pstd}A report of duplicates will be created in {cmdab:fol:der(}{it:string}{cmd:)} @@ -245,7 +270,7 @@ unresolved duplicates were found {hi:Example 4.} Using the Excel file. The table below could be the report generated in Example 2 above. Make the viewer window wider and reload the page if the table below does not display properly! {col 3}{c TLC}{hline 116}{c TRC} -{col 3}{c |}{col 4}HHID{col 10}dupListID{col 21}dateListed{col 33}dateFixed{col 44}correct{col 53}drop{col 59}newID{col 65}initials{col 75}note{col 94}KEY{col 107}enumerator{col 120}{c |} +{col 3}{c |}{col 4}HHID{col 10}dupListID{col 21}dateListed{col 33}dateFixed{col 44}correct{col 53}drop{col 59}newID{col 65}initials{col 75}notes{col 94}KEY{col 107}enumerator{col 120}{c |} {col 3}{c LT}{hline 116}{c RT} {col 3}{c |}{col 4}4321{col 10}1{col 21}27Dec2015{col 33}02Jan2016{col 44}yes{col 53} {col 59} {col 65}KB{col 75}double submission{col 94}{it:uniquevalue}{col 107}{it:keepvarvalue}{col 120}{c |} {col 3}{c |}{col 4}4321{col 10}2{col 21}27Dec2015{col 33}02Jan2016{col 44} {col 53}yes{col 59} {col 65}KB{col 75}double submission{col 94}{it:uniquevalue}{col 107}{it:keepvarvalue}{col 120}{c |} @@ -273,6 +298,28 @@ observation. One is kept and one is dropped, usually it does not matter which yo {pmore}Both corrections described in the example would have been easily identified using this command's sister command {help iecompdup}. + + +{phang} +{hi:Example 5.} {inp:ieduplicates HHID, folder(C:\myImpactEvaluation\baseline\data) uniquevars(KEY) drop(out) notes(notes_enumerators)} + +{col 3}{c TLC}{hline 103}{c TRC} +{col 3}{c |}{col 4}HHID{col 10}dupListID{col 21}dateListed{col 33}dateFixed{col 44}correct{col 53}out{col 59}newID{col 65}initials{col 75}notes_enumerators{col 94}KEY{col 107}{c |} +{col 3}{c LT}{hline 103}{c RT} +{col 3}{c |}{col 4}4321{col 10}1{col 21}27Dec2015{col 33}02Jan2016{col 44}yes{col 53} {col 59} {col 65}KB{col 75}double submission{col 94}{it:uniquevalue}{col 107}{c |} +{col 3}{c |}{col 4}4321{col 10}2{col 21}27Dec2015{col 33}02Jan2016{col 44} {col 53}yes{col 59} {col 65}KB{col 75}double submission{col 94}{it:uniquevalue}{col 107}{c |} +{col 3}{c |}{col 4}7365{col 10}3{col 21}03Jan2016{col 33} {col 44} {col 53} {col 59} {col 65} {col 75} {col 94}{it:uniquevalue}{col 107}{c |} +{col 3}{c |}{col 4}7365{col 10}4{col 21}03Jan2016{col 33} {col 44} {col 53} {col 59} {col 65} {col 75} {col 94}{it:uniquevalue}{col 107}{c |} +{col 3}{c |}{col 4}1145{col 10}5{col 21}03Jan2016{col 33}11Jan2016{col 44} {col 53} {col 59}1245{col 65}IB{col 75}incorrect id {col 94}{it:uniquevalue}{col 107}{c |} +{col 3}{c |}{col 4}1145{col 10}6{col 21}03Jan2016{col 33}11Jan2016{col 44}yes{col 53} {col 59} {col 65}IB{col 75}correct id {col 94}{it:uniquevalue}{col 107}{c |} +{col 3}{c |}{col 4}9834{col 10}7{col 21}11Jan2016{col 33} {col 44} {col 53} {col 59} {col 65} {col 75} {col 94}{it:uniquevalue}{col 107}{c |} +{col 3}{c |}{col 4}9834{col 10}8{col 21}11Jan2016{col 33} {col 44} {col 53} {col 59} {col 65} {col 75} {col 94}{it:uniquevalue}{col 107}{c |} +{col 3}{c BLC}{hline 103}{c BRC} + +{pmore} The variable names in Excel Report is now changed to the user speficied. If the user changed any of the variable names in the Excel Report, when importing the Excel file back to apply the decisions, run exactly the same code:{p_end} +{pmore}{inp:ieduplicates HHID, folder(C:\myImpactEvaluation\baseline\data) uniquevars(KEY) drop(out) notes(notes_enumerators)}{p_end} + + {title:Acknowledgements} {phang}We would like to acknowledge the help in testing and proofreading we received in relation to this command and help file from (in alphabetic order):{p_end} From 41c123e11ce2706891d41bdbf2d434d955846985 Mon Sep 17 00:00:00 2001 From: Unknown Date: Thu, 21 Feb 2019 17:31:16 -0500 Subject: [PATCH 06/39] fixing upper case option names to lower case --- src/help_files/ieduplicates.sthlp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/help_files/ieduplicates.sthlp b/src/help_files/ieduplicates.sthlp index 2b0d8691..f48c9e9a 100644 --- a/src/help_files/ieduplicates.sthlp +++ b/src/help_files/ieduplicates.sthlp @@ -20,9 +20,9 @@ command please see the {browse "https://dimewiki.worldbank.org/wiki/Ieduplicates , {cmdab:fol:der(}{it:string}{cmd:)} {cmdab:unique:vars(}{it:varlist}{cmd:)} [{cmdab:keep:vars(}{it:varlist}{cmd:)} {cmdab:tostringok} {cmdab:droprest} {cmdab:nodaily} {cmdab:suf:fix(}{it:string}{cmd:)} -{cmdab:dupListID(}{it:string}{cmd:)} {cmdab:dateListed(}{it:string}{cmd:)} -{cmdab:dateFixed(}{it:string}{cmd:)} {cmdab:correct(}{it:string}{cmd:)} -{cmdab:drop(}{it:string}{cmd:)} {cmdab:newID(}{it:string}{cmd:)} +{cmdab:duplistid(}{it:string}{cmd:)} {cmdab:datelisted(}{it:string}{cmd:)} +{cmdab:datefixed(}{it:string}{cmd:)} {cmdab:correct(}{it:string}{cmd:)} +{cmdab:drop(}{it:string}{cmd:)} {cmdab:newid(}{it:string}{cmd:)} {cmdab:initials(}{it:string}{cmd:)} {cmdab:notes(}{it:string}{cmd:)}]{p_end} @@ -41,12 +41,12 @@ command please see the {browse "https://dimewiki.worldbank.org/wiki/Ieduplicates {synopt :{cmdab:nodaily}}disables daily back-up copies of the Excel report{p_end} {synopt :{cmdab:suf:fix(}{it:string}{cmd:)}}allows the user to add a suffix to the filename of the Excel report{p_end} -{synopt :{cmdab:dupListID(}{it:string}{cmd:)}}allows the user to customize the variable name "dupListID" in the the Excel report{p_end} -{synopt :{cmdab:dateListed(}{it:string}{cmd:)}}allows the user to customize the variable name "dateListed" in the the Excel report{p_end} -{synopt :{cmdab:dateFixed(}{it:string}{cmd:)}}allows the user to customize the variable name "dateFixed" in the the Excel report{p_end} +{synopt :{cmdab:duplistid(}{it:string}{cmd:)}}allows the user to customize the variable name "dupListID" in the the Excel report{p_end} +{synopt :{cmdab:datelisted(}{it:string}{cmd:)}}allows the user to customize the variable name "dateListed" in the the Excel report{p_end} +{synopt :{cmdab:datefixed(}{it:string}{cmd:)}}allows the user to customize the variable name "dateFixed" in the the Excel report{p_end} {synopt :{cmdab:correct(}{it:string}{cmd:)}}allows the user to customize the variable name "correct" in the the Excel report{p_end} {synopt :{cmdab:drop(}{it:string}{cmd:)}}allows the user to customize the variable name "drop" in the the Excel report{p_end} -{synopt :{cmdab:newID(}{it:string}{cmd:)}}allows the user to customize the variable name "newID" in the the Excel report{p_end} +{synopt :{cmdab:newid(}{it:string}{cmd:)}}allows the user to customize the variable name "newID" in the the Excel report{p_end} {synopt :{cmdab:initials(}{it:string}{cmd:)}}allows the user to customize the variable name "initials" in the the Excel report{p_end} {synopt :{cmdab:notes(}{it:string}{cmd:)}}allows the user to customize the variable name "notes" in the the Excel report{p_end} @@ -149,9 +149,9 @@ report in a sub-folder called Daily in the folder specified in {cmdab:folder()}. the folder /Daily/ does not exist, then it is created unless the option {cmdab:nodaily} is used. -{phang}{cmdab:dupListID(}{it:string}{cmd:)} {cmdab:dateListed(}{it:string}{cmd:)} -{cmdab:dateFixed(}{it:string}{cmd:)} {cmdab:correct(}{it:string}{cmd:)} -{cmdab:drop(}{it:string}{cmd:)} {cmdab:newID(}{it:string}{cmd:)} +{phang}{cmdab:duplistid(}{it:string}{cmd:)} {cmdab:datelisted(}{it:string}{cmd:)} +{cmdab:datefixed(}{it:string}{cmd:)} {cmdab:correct(}{it:string}{cmd:)} +{cmdab:drop(}{it:string}{cmd:)} {cmdab:newid(}{it:string}{cmd:)} {cmdab:initials(}{it:string}{cmd:)} {cmdab:notes(}{it:string}{cmd:)} allow the user to set a unique name for each default variable names (e.g. "dupListID", "dateListed", etc.) in the Excel report. This is meant to be used when the variable name already exists in the dataset. To avoid error, the command offers a way to modify the variable name in the Excel Report. {p_end} From 17b97b5ee48c0fab6ba8f9212f3c8e5ad03e5922 Mon Sep 17 00:00:00 2001 From: Unknown Date: Thu, 21 Feb 2019 18:11:07 -0500 Subject: [PATCH 07/39] fixing upper case option names to lowercase_.ado file --- src/ado_files/ieduplicates.ado | 76 +++++++++++++++++----------------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/src/ado_files/ieduplicates.ado b/src/ado_files/ieduplicates.ado index bbd3f956..7714e54b 100644 --- a/src/ado_files/ieduplicates.ado +++ b/src/ado_files/ieduplicates.ado @@ -7,7 +7,7 @@ qui { syntax varname , FOLder(string) UNIQUEvars(varlist) [KEEPvars(varlist) tostringok droprest nodaily SUFfix(string) /// - dupListID(string) dateListed(string) dateFixed(string) correct(string) drop(string) newID(string) initials(string) notes(string)] + duplistid(string) datelisted(string) datefixed(string) correct(string) drop(string) newid(string) initials(string) notes(string)] version 11.0 @@ -55,14 +55,14 @@ * Create a list of the variables created by this command to put in the report *For optioin to change var names. Setting a default name of columns (in case user did not specify the variable name) - local deafultvars dupListID dateListed dateFixed correct drop newID initials notes + local deafultvars duplistid datelisted datefixed correct drop newid initials notes foreach deafultvar of local deafultvars { if "``deafultvar''" == "" local `deafultvar' = "`deafultvar'" } * Test that no variable with the name needed for the excel report already exist in the data set - local excelVars `dupListID' `dateListed' `dateFixed' `correct' `drop' `newID' `initials' `notes' + local excelVars `duplistid' `datelisted' `datefixed' `correct' `drop' `newid' `initials' `notes' foreach excelvar of local excelVars { cap confirm variable `excelvar' @@ -168,9 +168,9 @@ exit } - ** All excelVars but dupListID and newID should be string. dupListID - * should be numeric and the type of newID should be based on the user input - if !inlist("`excelvar'", "`dupListID'", "`newID'") { + ** All excelVars but duplistid and newid should be string. duplistid + * should be numeric and the type of newid should be based on the user input + if !inlist("`excelvar'", "`duplistid'", "`newid'") { * Make original ID var string tostring `excelvar' , replace @@ -245,9 +245,9 @@ Make sure there are not too many corrections for a single observation ******************/ - * Count the number of corrections (correct drop newID) per + * Count the number of corrections (correct drop newid) per * observation. Only one correction per observation is allowed. - egen `multiInp' = rownonmiss(`correct' `drop' `newID'), strok + egen `multiInp' = rownonmiss(`correct' `drop' `newid'), strok *Check that all rows have at most one correction cap assert `multiInp' == 0 | `multiInp' == 1 @@ -278,14 +278,14 @@ Section 3.3.4 Make sure that either option droprest is specified, or that drop was correctly indicated for all observations. i.e.; if - correct or newID was indicated for at least one duplicate in + correct or newid was indicated for at least one duplicate in a duplicate group, then all other observations should be indicated as drop (unless droprest is specified) ******************/ *Generate dummy if there is any correction for this observation - gen `anyCorrection' = !missing(`correct') | !missing(`newID') + gen `anyCorrection' = !missing(`correct') | !missing(`newid') *Count number of observations with any correction in suplicates group bys `idvar' : egen `groupAnyCorrection' = total(`anyCorrection') @@ -330,29 +330,29 @@ *Error multiple input if `local_multiInp' == 1 { - display as error "{phang}The following observations have more than one correction. Only one correction (correct, drop or newID) per row is allowed{p_end}" - list `idvar' `dupListID' `correct' `drop' `newID' `uniquevars' if `multiInp' > 1 + display as error "{phang}The following observations have more than one correction. Only one correction (correct, drop or newid) per row is allowed{p_end}" + list `idvar' `duplistid' `correct' `drop' `newid' `uniquevars' if `multiInp' > 1 di "" } *Error multiple correct if `local_multiCorr' == 1 { display as error "{phang}The following observations are in a duplicate group where more than one observation is listed as correct. Only one observation per duplicate group can be correct{p_end}" - list `idvar' `dupListID' `correct' `drop' `newID' `uniquevars' if `groupNumCorrect' > 1 + list `idvar' `duplistid' `correct' `drop' `newid' `uniquevars' if `groupNumCorrect' > 1 di "" } *Error in incorrect string if `local_inputNotYes' == 1 { display as error "{phang}The following observations have an answer in either correct or drop that is neither yes nor y{p_end}" - list `idvar' `dupListID' `correct' `drop' `uniquevars' if `inputNotYes' == 1 + list `idvar' `duplistid' `correct' `drop' `uniquevars' if `inputNotYes' == 1 di "" } *Error is not specfied as drop if `local_notDrop' == 1 { display as error "{phang}The following observations are not explicitly indicated as drop while other duplicates in the same duplicate group are corrected. Either manually indicate as drop or see option droprest{p_end}" - list `idvar' `dupListID' `correct' `drop' `newID' `uniquevars' if `notDrop' == 1 + list `idvar' `duplistid' `correct' `drop' `newid' `uniquevars' if `notDrop' == 1 di "" } @@ -471,8 +471,8 @@ *Generate the excel variables used for indicating correction foreach excelvar of local excelVars { - *Create all variables apart from dupListID as string vars - if inlist("`excelvar'", "`dupListID'", "`newID'") { + *Create all variables apart from duplistid as string vars + if inlist("`excelvar'", "`duplistid'", "`newid'") { gen `excelvar' = . } else { @@ -488,7 +488,7 @@ * Generate a local that is 1 if there are new duplicates local unaddressedNewExcel 0 - count if missing(`dateFixed') + count if missing(`datefixed') if `r(N)' > 0 local unaddressedNewExcel 1 /****************** @@ -496,30 +496,30 @@ ******************/ * Add date first time duplicate was identified - replace ``dateListed'' = "`date'" if missing(`dateListed') + replace `datelisted' = "`date'" if missing(`datelisted') - ** Add today's date to variable dateFixed if dateFixed + ** Add today's date to variable datefixed if datefixed * is empty and at least one correction is added - replace `dateFixed' = "`date'" if missing(`dateFixed') & (!missing(`correct') | !missing(`drop') | !missing(`newID')) + replace `datefixed' = "`date'" if missing(`datefixed') & (!missing(`correct') | !missing(`drop') | !missing(`newid')) /****************** Section 5.3.2 Duplicate report list ID ******************/ - ** Sort after dupListID and after ID var for - * duplicates currently without dupListID - sort `dupListID' `idvar' + ** Sort after duplistid and after ID var for + * duplicates currently without duplistid + sort `duplistid' `idvar' - ** Assign dupListID 1 to the top row if no duplicate + ** Assign duplistid 1 to the top row if no duplicate * list IDs have been generated so far. - replace `dupListID' = 1 if _n == 1 & missing(`dupListID') + replace `duplistid' = 1 if _n == 1 & missing(`duplistid') ** Generate new IDs based on the row above instead of directly * from the row number. That prevents duplicates in the list in * case an observation is deleted. The first observation with * missing value will have an ID that is one digit higher than * the highest ID already in the list - replace `dupListID' = `dupListID'[_n - 1] + 1 if missing(`dupListID') + replace `duplistid' = `duplistid'[_n - 1] + 1 if missing(`duplistid') /****************** Section 5.4 @@ -603,25 +603,25 @@ /****************** Section 6.2.1 ID var in original file is string. Either - newID was imported as string or the variable + newid was imported as string or the variable is made string. Easy. ******************/ *Test if there are any corrections by new ID - cap assert missing(`newID') + cap assert missing(`newid') if _rc { local idtype : type `idvar' - local idtypeNew : type `newID' + local idtypeNew : type `newid' - *If ID var is string but newID is not, then just make it string + *If ID var is string but newid is not, then just make it string if substr("`idtype'",1,3) == "str" & substr("`idtypeNew'",1,3) != "str" { - tostring `newID' , replace - replace `newID' = "" if `newID' == "." + tostring `newid' , replace + replace `newid' = "" if `newid' == "." } - *If ID var is numeric but the newID is loaded as string + *If ID var is numeric but the newid is loaded as string else if substr("`idtype'",1,3) != "str" & substr("`idtypeNew'",1,3) == "str" { * Check if [tostringok] is specificed: @@ -637,23 +637,23 @@ else { * Create a local with all non-numeric values - levelsof `newID' if missing(real(`newID')), local(NaN_values) clean + levelsof `newid' if missing(real(`newid')), local(NaN_values) clean * Output error message - di as error "{phang}The ID variable `idvar' is numeric but newID has these non-numeric values: `NaN_values'. Update newID to only contain numeric values or see option tostringok.{p_end}" + di as error "{phang}The ID variable `idvar' is numeric but newid has these non-numeric values: `NaN_values'. Update newid to only contain numeric values or see option tostringok.{p_end}" error 109 exit } } *After making sure that type is ok, update the IDs - replace `idvar' = `newID' if !missing(`newID') + replace `idvar' = `newid' if !missing(`newid') /****************** Section 6.3 - Test that values in newID + Test that values in newid were neither used twice nor already existed ******************/ From 0229c2344cf206a18b599c5f52a73b44c4c6ba1f Mon Sep 17 00:00:00 2001 From: Unknown Date: Tue, 26 Feb 2019 18:24:42 -0500 Subject: [PATCH 08/39] .ado file fix: Check for spaces in user specified variable name --- src/ado_files/ieduplicates.ado | 37 ++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/src/ado_files/ieduplicates.ado b/src/ado_files/ieduplicates.ado index 7714e54b..ea94e888 100644 --- a/src/ado_files/ieduplicates.ado +++ b/src/ado_files/ieduplicates.ado @@ -54,16 +54,49 @@ * Create a list of the variables created by this command to put in the report + ******************** + * Test that each manually entered Excel varaible name is valid, or assigned the default name + *For optioin to change var names. Setting a default name of columns (in case user did not specify the variable name) local deafultvars duplistid datelisted datefixed correct drop newid initials notes - foreach deafultvar of local deafultvars { + + *trim user input. If no input string is empty, which returns an empty string + local `deafultvar' = trim("``deafultvar''") //trim() is older syntax, compare to strtrim() in Stata 15 and newer + + *If no user input for this var, assign default name if "``deafultvar''" == "" local `deafultvar' = "`deafultvar'" + + *Check for space in varname (only possible when user assign names manually) + if strpos("`deafultvar'", " ") != 0 { + + noi di as error "{phang}The Excel report variable name [`deafultvar'] should not contain any space. Please change the variable name.{p_end}" + noi di "" + error 198 + exit + } } - + + + ******************** + * Excel variables values are ok on their own, test in relation to each other and varaiblaes already in the data + * Test that no variable with the name needed for the excel report already exist in the data set local excelVars `duplistid' `datelisted' `datefixed' `correct' `drop' `newid' `initials' `notes' + * Check for duplicate variable names in the excelVars + local duplicated_names : list dups excelVars + if "`duplicated_names'" != "" { + + local duplicates : list uniq duplicatenames + noi display as error "{phang}The excel report variable name [`duplicates'] already exist within either the default variable name or modified name. Variable names in Excel report must be distinct. Please change the variable name.{p_end}" + noi di "" + error 198 + exit + } + + + * Check for duplicate variable names in the existing dataset foreach excelvar of local excelVars { cap confirm variable `excelvar' if _rc == 0 { From 755e7147074fad5f1a2b92fabe0b443fc7b20c67 Mon Sep 17 00:00:00 2001 From: Unknown Date: Tue, 5 Mar 2019 11:47:52 -0500 Subject: [PATCH 09/39] .ado file fixed `' for line 72 and 68 --- src/ado_files/ieduplicates.ado | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ado_files/ieduplicates.ado b/src/ado_files/ieduplicates.ado index ea94e888..0ad2842b 100644 --- a/src/ado_files/ieduplicates.ado +++ b/src/ado_files/ieduplicates.ado @@ -68,9 +68,9 @@ if "``deafultvar''" == "" local `deafultvar' = "`deafultvar'" *Check for space in varname (only possible when user assign names manually) - if strpos("`deafultvar'", " ") != 0 { + if strpos("``deafultvar''", " ") != 0 { - noi di as error "{phang}The Excel report variable name [`deafultvar'] should not contain any space. Please change the variable name.{p_end}" + noi di as error "{phang}The Excel report variable name [``deafultvar''] should not contain any space. Please change the variable name.{p_end}" noi di "" error 198 exit From b27248edec861cffa21e2423cbe642702da6a56e Mon Sep 17 00:00:00 2001 From: Unknown Date: Tue, 5 Mar 2019 16:37:04 -0500 Subject: [PATCH 10/39] adding test_script folder and ieduplicate_test_script.do --- .../ieduplicates/ieduplicate_test_script.do | 137 ++++++++++++++++++ src/test_scripts/mastertestfile.do | 10 ++ src/test_scripts/testfile.do | 8 + 3 files changed, 155 insertions(+) create mode 100644 src/test_scripts/ieduplicates/ieduplicate_test_script.do create mode 100644 src/test_scripts/mastertestfile.do create mode 100644 src/test_scripts/testfile.do diff --git a/src/test_scripts/ieduplicates/ieduplicate_test_script.do b/src/test_scripts/ieduplicates/ieduplicate_test_script.do new file mode 100644 index 00000000..20f246f4 --- /dev/null +++ b/src/test_scripts/ieduplicates/ieduplicate_test_script.do @@ -0,0 +1,137 @@ + +*Test for user specified variable names in Excel report +******************************************************** +cscript ieduplicates_test adofile ieduplicates +which ieduplicates + +clear all + +global base "C:\Users\Saori\Desktop\Semester 4\z Other\DIME\iedup test" +cd "$base" +qui do "C:\Users\Saori\Documents\Github\iefieldkit\src\ado_files\ieduplicates.ado" + + + + + +*Should return no error +*********************************** +*1) With no options + use "$base\ieduplicates_test", clear + ieduplicates iid , folder("$base") uniquevars(unique_id) + + use "$base\ieduplicates_test", clear + ieduplicates iid , folder("$base") uniquevars(unique_id) +*No error + rm "$base\iedupreport.xlsx" + + +*2) With options +*Keep vars + use "$base\ieduplicates_test", clear + ieduplicates iid , folder("$base") uniquevars(unique_id) /// + keepvars(rep rate age wgt) + + use "$base\ieduplicates_test", clear + ieduplicates iid , folder("$base") uniquevars(unique_id) /// + keepvars(rep rate age wgt) +*No error + rm "$base\iedupreport.xlsx" + + +*Tostringok + use "$base\ieduplicates_test", clear + ieduplicates iid , folder("$base") uniquevars(unique_id) + + use "$base\ieduplicates_test", clear + ieduplicates iid , folder("$base") uniquevars(unique_id) /// + tostringok +*No error + rm "$base\iedupreport.xlsx" + + + +*Droprest + use "$base\ieduplicates_test", clear + local new= _N+1 + set obs `new' + replace iid=80865 if _n==_N + replace unique_id=_N if _n==_N + save "$base\ieduplicates_test2", replace + ieduplicates iid , folder("$base") uniquevars(unique_id) + + use "$base\ieduplicates_test2", clear + ieduplicates iid , folder("$base") uniquevars(unique_id) /// + droprest +*No error + rm "$base\iedupreport.xlsx" + + + +*Nodaily + use "$base\ieduplicates_test", clear + ieduplicates iid , folder("$base") uniquevars(unique_id) /// + nodaily + + use "$base\ieduplicates_test", clear + ieduplicates iid , folder("$base") uniquevars(unique_id) /// + nodaily +*No error + rm "$base\iedupreport.xlsx" + + + +*Suffix + use "$base\ieduplicates_test", clear + ieduplicates iid , folder("$base") uniquevars(unique_id) /// + suffix(_test) + + use "$base\ieduplicates_test", clear + ieduplicates iid , folder("$base") uniquevars(unique_id) +*No error + rm "$base\iedupreport_test.xlsx" + rm "$base\iedupreport.xlsx" + + +*Excel var name specification + use "$base\ieduplicates_test", clear + ieduplicates iid , folder("$base") uniquevars(unique_id) /// + duplistid(" DuplicateID") datelisted("Date_Listed ") datefixed("DateFixed") correct(Keep_this) drop("This_is_mistake ") newid(ID) initials(Signature) notes(Remarks) + + use "$base\ieduplicates_test", clear + ieduplicates iid , folder("$base") uniquevars(unique_id) /// + duplistid(" DuplicateID") datelisted("Date_Listed ") datefixed("DateFixed") correct(Keep_this) drop("This_is_mistake ") newid(ID) initials(Signature) notes(Remarks) +*No error + rm "$base\iedupreport.xlsx" + + + +*3) With multiiple options +*All of the options + use "$base\ieduplicates_test2", clear + ieduplicates iid , folder("$base") uniquevars(unique_id) /// + keepvars(rep rate age wgt) tostringok droprest nodaily suffix(_test) /// + duplistid(" DuplicateID") datelisted("Date_Listed ") datefixed("DateFixed") correct(Keep_this) drop("This_is_mistake ") newid(ID) initials(Signature) notes(Remarks) + + use "$base\ieduplicates_test2", clear + ieduplicates iid , folder("$base") uniquevars(unique_id) /// + keepvars(rep rate age wgt) tostringok droprest nodaily suffix(_test) /// + duplistid(" DuplicateID") datelisted("Date_Listed ") datefixed("DateFixed") correct(Keep_this) drop("This_is_mistake ") newid(ID) initials(Signature) notes(Remarks) +*No error + rm "$base\iedupreport_test.xlsx" + + +* Mix of options at random + use "$base\ieduplicates_test2", clear + ieduplicates iid , folder("$base") uniquevars(unique_id) /// + tostringok droprest nodaily /// + duplistid(" DuplicateID") + + use "$base\ieduplicates_test2", clear + ieduplicates iid , folder("$base") uniquevars(unique_id) /// + tostringok droprest nodaily /// + duplistid(" DuplicateID") +*No error + rm "$base\iedupreport.xlsx" + + diff --git a/src/test_scripts/mastertestfile.do b/src/test_scripts/mastertestfile.do new file mode 100644 index 00000000..2cbd8fe6 --- /dev/null +++ b/src/test_scripts/mastertestfile.do @@ -0,0 +1,10 @@ +gl base "C:\Users\Saori\Documents\Github\iefieldkit\src\test" + +clear +discard +set more off +cd "$base" +quietly log using test, replace +do test +quietly log close + diff --git a/src/test_scripts/testfile.do b/src/test_scripts/testfile.do new file mode 100644 index 00000000..5982e62d --- /dev/null +++ b/src/test_scripts/testfile.do @@ -0,0 +1,8 @@ +cd "C:\Users\Saori\Documents\Github\iefieldkit\src\test" + +about +query compilenumber +do ieduplicate_test /* assert */ + +do merge /* merge/append */ +do nchi2 /* nchi2() function */ From 3c1419ea4f002a7c2a5c24e9b9cc3d7ad3e31f68 Mon Sep 17 00:00:00 2001 From: Unknown Date: Wed, 6 Mar 2019 09:34:25 -0500 Subject: [PATCH 11/39] adding file paths for each user --- .../ieduplicates/ieduplicate_test_script.do | 17 ++++++++++++----- src/test_scripts/testfile.do | 13 ++++++++++++- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/src/test_scripts/ieduplicates/ieduplicate_test_script.do b/src/test_scripts/ieduplicates/ieduplicate_test_script.do index 20f246f4..468c4bc8 100644 --- a/src/test_scripts/ieduplicates/ieduplicate_test_script.do +++ b/src/test_scripts/ieduplicates/ieduplicate_test_script.do @@ -6,11 +6,18 @@ which ieduplicates clear all -global base "C:\Users\Saori\Desktop\Semester 4\z Other\DIME\iedup test" -cd "$base" -qui do "C:\Users\Saori\Documents\Github\iefieldkit\src\ado_files\ieduplicates.ado" - - +di "The user for this case is: " c(username) + +if "`c(username)'" == "Saori" { + global base "C:\Users\Saori\Desktop\Semester 4\z Other\DIME\iedup test" + cd "$base" + qui do "C:\Users\Saori\Documents\Github\iefieldkit\src\ado_files\ieduplicates.ado" + +} + else { // *.... add other people's global here + di as err "Add path for your machine here" + e + } diff --git a/src/test_scripts/testfile.do b/src/test_scripts/testfile.do index 5982e62d..f3baab11 100644 --- a/src/test_scripts/testfile.do +++ b/src/test_scripts/testfile.do @@ -1,4 +1,15 @@ -cd "C:\Users\Saori\Documents\Github\iefieldkit\src\test" + +di "The user for this case is: " c(username) + +if "`c(username)'" == "Saori" { + gl base "C:\Users\Saori\Documents\Github\iefieldkit\src\test" +} + else { // *.... add other people's global here + di as err "Add path for your machine here" + e + } + +cd "$base" about query compilenumber From 0f0389f84e768891953908aa2c62d304ccc369ba Mon Sep 17 00:00:00 2001 From: Unknown Date: Tue, 12 Mar 2019 11:32:15 -0400 Subject: [PATCH 12/39] adding file paths for mastertestfile too --- src/test_scripts/mastertestfile.do | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/test_scripts/mastertestfile.do b/src/test_scripts/mastertestfile.do index 2cbd8fe6..412c47f6 100644 --- a/src/test_scripts/mastertestfile.do +++ b/src/test_scripts/mastertestfile.do @@ -1,4 +1,14 @@ -gl base "C:\Users\Saori\Documents\Github\iefieldkit\src\test" + +di "The user for this case is: " c(username) + +if "`c(username)'" == "Saori" { + gl base "C:\Users\Saori\Documents\Github\iefieldkit\src\test" +} + else { // *.... add other people's global here + di as err "Add path for your machine here" + e + } + clear discard From 20f7408bdfda651785ac296024034e2ee0d135d2 Mon Sep 17 00:00:00 2001 From: Unknown Date: Tue, 12 Mar 2019 17:53:22 -0400 Subject: [PATCH 13/39] planning structure for enhancement #60 --- src/ado_files/ieduplicates.ado | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/ado_files/ieduplicates.ado b/src/ado_files/ieduplicates.ado index 0ad2842b..be583ef3 100644 --- a/src/ado_files/ieduplicates.ado +++ b/src/ado_files/ieduplicates.ado @@ -82,7 +82,7 @@ * Excel variables values are ok on their own, test in relation to each other and varaiblaes already in the data * Test that no variable with the name needed for the excel report already exist in the data set - local excelVars `duplistid' `datelisted' `datefixed' `correct' `drop' `newid' `initials' `notes' + local excelVars `duplistid' `datelisted' `datefixed' `correct' `drop' `newid' `initials' `notes' `diff' * Check for duplicate variable names in the excelVars local duplicated_names : list dups excelVars @@ -267,7 +267,7 @@ replace `drop' = "yes" if `drop' == "y" *Check that variables are either empty or "yes" - gen `inputNotYes' = !((`correct' == "yes" | `correct' == "") & (`drop' == "yes" | `drop' == "")) + gen `inputNotYes' = !((`correct' == "yes" | `correct' == "") & (`drop' == "yes" | `drop' == "")) *Set local to 1 if error should be outputted cap assert `inputNotYes' == 0 @@ -487,6 +487,13 @@ Keep only duplicates for the report ******************/ + *add diffvars here + qui iecompdup iid, id(1) didi keepdiff more2ok + + `diff' + + + *Keep if observation is part of duplicate group keep if `dup' != 0 From face845252bc66e43ac3fe497bb827fe382fea72 Mon Sep 17 00:00:00 2001 From: Unknown Date: Tue, 26 Mar 2019 13:37:56 -0400 Subject: [PATCH 14/39] added enhancement code for adding variables on ieduplicate --- src/ado_files/ieduplicates.ado | 53 +++++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 11 deletions(-) diff --git a/src/ado_files/ieduplicates.ado b/src/ado_files/ieduplicates.ado index be583ef3..6b23e5dd 100644 --- a/src/ado_files/ieduplicates.ado +++ b/src/ado_files/ieduplicates.ado @@ -7,7 +7,7 @@ qui { syntax varname , FOLder(string) UNIQUEvars(varlist) [KEEPvars(varlist) tostringok droprest nodaily SUFfix(string) /// - duplistid(string) datelisted(string) datefixed(string) correct(string) drop(string) newid(string) initials(string) notes(string)] + duplistid(string) datelisted(string) datefixed(string) correct(string) drop(string) newid(string) initials(string) notes(string) listofdiffs(string)] version 11.0 @@ -58,7 +58,7 @@ * Test that each manually entered Excel varaible name is valid, or assigned the default name *For optioin to change var names. Setting a default name of columns (in case user did not specify the variable name) - local deafultvars duplistid datelisted datefixed correct drop newid initials notes + local deafultvars duplistid datelisted datefixed correct drop newid initials notes listofdiffs foreach deafultvar of local deafultvars { *trim user input. If no input string is empty, which returns an empty string @@ -82,7 +82,7 @@ * Excel variables values are ok on their own, test in relation to each other and varaiblaes already in the data * Test that no variable with the name needed for the excel report already exist in the data set - local excelVars `duplistid' `datelisted' `datefixed' `correct' `drop' `newid' `initials' `notes' `diff' + local excelVars `duplistid' `datelisted' `datefixed' `correct' `drop' `newid' `initials' `notes' `listofdiffs' * Check for duplicate variable names in the excelVars local duplicated_names : list dups excelVars @@ -478,6 +478,33 @@ tempvar dup duplicates tag `idvar', gen(`dup') + + *SI_NOTE: Add list of variables that are different between the two duplicated id value in excel report in 'listofdiffs' variable + levelsof `idvar' if `dup' > 0, local(list_dup_ids) + + foreach id of local list_dup_ids { + + count if `idvar' == `id' + + *Check if duplicated id has more than 2 duplicates + if `r(N)' > 2 { + + local difflist_`id' "Cannot list variables for IDs with more than 2 duplicates" + //replace `listofdiffs' = "Cannot list variables for IDs with more than 2 duplicates" if `idvar' == `id' + + } + else { + + *Get the list of variables that are different between the two duplicated id value + qui iecompdup `idvar', id(`id') + + *limit lenght if very long + local difflist_`id' "`r(diffvars)'" + //replace `listofdiffs' = "`r(diffvars)'" if `idvar' == `id' + } + + } + *Test if there are any duplicates cap assert `dup'==0 if _rc { @@ -487,13 +514,6 @@ Keep only duplicates for the report ******************/ - *add diffvars here - qui iecompdup iid, id(1) didi keepdiff more2ok - - `diff' - - - *Keep if observation is part of duplicate group keep if `dup' != 0 @@ -501,7 +521,7 @@ * If Excel file exists keep excel vars and * variables passed as arguments in the * command - keep `argumentVars' `excelVars' + keep `argumentVars' `excelVars' } else { * Keep only variables passed as arguments in @@ -519,8 +539,19 @@ gen `excelvar' = "" } } + + } + + + + //Assign the listdiff values + foreach id of local list_dup_ids { + replace `listofdiffs' = "`difflist_`id''" if `idvar' == `id' + } + + /****************** Section 5.3 Update the excel vars that are not updated manually From f30bd317ef7320b37bfe795d5d07116614e711a1 Mon Sep 17 00:00:00 2001 From: Unknown Date: Tue, 26 Mar 2019 16:20:42 -0400 Subject: [PATCH 15/39] adding limit to string length on "listofdiffs" variable in excel report --- src/ado_files/ieduplicates.ado | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/src/ado_files/ieduplicates.ado b/src/ado_files/ieduplicates.ado index 6b23e5dd..2332a5fa 100644 --- a/src/ado_files/ieduplicates.ado +++ b/src/ado_files/ieduplicates.ado @@ -479,7 +479,7 @@ duplicates tag `idvar', gen(`dup') - *SI_NOTE: Add list of variables that are different between the two duplicated id value in excel report in 'listofdiffs' variable + *Add list of variables that are different between the two duplicated id value in excel report in 'listofdiffs' variable levelsof `idvar' if `dup' > 0, local(list_dup_ids) foreach id of local list_dup_ids { @@ -498,9 +498,21 @@ *Get the list of variables that are different between the two duplicated id value qui iecompdup `idvar', id(`id') - *limit lenght if very long - local difflist_`id' "`r(diffvars)'" - //replace `listofdiffs' = "`r(diffvars)'" if `idvar' == `id' + *SI_NOTE: limit lenght if very long + + *255-29 (characters for "see iecompdup for full list")= 226 + if strlen("`r(diffvars)'") > 256 { + + local difflist_`id' = substr("`r(diffvars)'" ,1 ,226) + " see iecompdup for full list" + + } + else { + + local difflist_`id' "`r(diffvars)'" + //replace `listofdiffs' = "`r(diffvars)'" if `idvar' == `id' + + } + } } @@ -543,8 +555,7 @@ } - - + //Assign the listdiff values foreach id of local list_dup_ids { @@ -630,6 +641,9 @@ local daily_output " and a daily copy have been saved to the Daily folder" } + *Making listofdiffs come last + order `listofdiffs', last + *Export main report export excel using "`folder'/iedupreport`suffix'.xlsx" , firstrow(variables) replace nolabel From 923f0fac04d1c0401d3c16b2f790a6157191a366 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristoffer=20Bj=C3=A4rkefur?= Date: Tue, 2 Apr 2019 17:53:16 -0400 Subject: [PATCH 16/39] ietestform : output any required list labels #64 --- src/ado_files/ietestform.ado | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/ado_files/ietestform.ado b/src/ado_files/ietestform.ado index 81b108a5..fc3e492f 100644 --- a/src/ado_files/ietestform.ado +++ b/src/ado_files/ietestform.ado @@ -490,7 +490,7 @@ qui { *Variables that must be included every time local name_vars "name" - local cmd_vars "type required" // Include as needed eventually. "readonly appearance" + local cmd_vars "type required appearance" // Include as needed eventually. "readonly" local msg_vars "`labelvars'" local code_vars "" // Include as needed eventually. " constraint relevance calculation repeat_count choice_filter" @@ -638,6 +638,16 @@ qui { local error_msg "Fields of type note creates an impassable view that are impossible for the enumerator to sweep pass. Make sure that is the inentional behavior for the following fields:" noi report_file add , report_tempfile("`report_tempfile'") wikifragment("Required_Column") message("`error_msg'") table("list row type name if note_required == 1") testname("REQUIRED NOTE TYPE FIELD") } + + *List and output required label fields in field-list groups + gen label_required = (req_relevant == 1 & appearance == "label" & lower(required) == "yes") + count if label_required == 1 + if `r(N)' > 0 { + + *Prepare message and write it + local error_msg "Fields with appearance [label] (inside a field-list group) must not be required. Label fields are currently required in the following rows:" + noi report_file add , report_tempfile("`report_tempfile'") wikifragment("Required_Column") message("`error_msg'") table("list row type name if label_required == 1") testname("REQUIRED LABEL FIELD") + } } end From 391d0a1b79c7e2a41358ced65cc530ee1216e209 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristoffer=20Bj=C3=A4rkefur?= Date: Tue, 2 Apr 2019 18:11:04 -0400 Subject: [PATCH 17/39] ietestform : also exclude label fields from rewquired test --- src/ado_files/ietestform.ado | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ado_files/ietestform.ado b/src/ado_files/ietestform.ado index fc3e492f..74cb3eb8 100644 --- a/src/ado_files/ietestform.ado +++ b/src/ado_files/ietestform.ado @@ -619,8 +619,8 @@ qui { inlist(type, "start", "end", "deviceid", "subscriberid", "simserial", "phonenumber", "username", "caseid") | /// Default meta types doen not need to be required missing(type)) /// Rows that are not fields shold be skipped - *List and output non-note fields that are not required - gen nonnote_nonrequired = (req_relevant == 1 & type != "note" & lower(required) != "yes") + *List and output non-note, non-label fields that are not required + gen nonnote_nonrequired = (req_relevant == 1 & type != "note" & appearance != "label" & lower(required) != "yes") count if nonnote_nonrequired == 1 if `r(N)' > 0 { From 00ccb521525f4b56a5ce0a94eb2b2f46fcd368be Mon Sep 17 00:00:00 2001 From: Unknown Date: Wed, 10 Apr 2019 17:23:09 -0400 Subject: [PATCH 18/39] Added test script for "listofdiff" extention, minor phrasing fix ": see iedupcomp for..." on .ado file --- src/ado_files/ieduplicates.ado | 4 +- .../ieduplicates/ieduplicate_test_script.do | 66 +++++++++++++++++++ 2 files changed, 68 insertions(+), 2 deletions(-) diff --git a/src/ado_files/ieduplicates.ado b/src/ado_files/ieduplicates.ado index 2332a5fa..a281b16e 100644 --- a/src/ado_files/ieduplicates.ado +++ b/src/ado_files/ieduplicates.ado @@ -500,10 +500,10 @@ *SI_NOTE: limit lenght if very long - *255-29 (characters for "see iecompdup for full list")= 226 + *255-29 (characters for " :see iecompdup for full list")= 226 if strlen("`r(diffvars)'") > 256 { - local difflist_`id' = substr("`r(diffvars)'" ,1 ,226) + " see iecompdup for full list" + local difflist_`id' = substr("`r(diffvars)'" ,1 ,226) + " :see iecompdup for full list" } else { diff --git a/src/test_scripts/ieduplicates/ieduplicate_test_script.do b/src/test_scripts/ieduplicates/ieduplicate_test_script.do index 468c4bc8..c92e87d2 100644 --- a/src/test_scripts/ieduplicates/ieduplicate_test_script.do +++ b/src/test_scripts/ieduplicates/ieduplicate_test_script.do @@ -142,3 +142,69 @@ if "`c(username)'" == "Saori" { rm "$base\iedupreport.xlsx" +*ieduplicate listing of variables that differs + use "$cd\ieduplicates_test", clear + + gen this_a_fake_long_named_variable1 =. + replace this_a_fake_long_named_variable1=1 if unique_id==80865 + + gen this_a_fake_long_named_variable2 =. + replace this_a_fake_long_named_variable2=1 if unique_id==80865 + + gen this_a_fake_long_named_variable3 =. + replace this_a_fake_long_named_variable3=1 if unique_id==80865 + + gen this_a_fake_long_named_variable4 =. + replace this_a_fake_long_named_variable4=1 if unique_id==80865 + + gen this_a_fake_long_named_variable5 =. + replace this_a_fake_long_named_variable5=1 if unique_id==80865 + + gen this_a_fake_long_named_variable6 =. + replace this_a_fake_long_named_variable6=1 if unique_id==80865 + + gen this_a_fake_long_named_variable7 =. + replace this_a_fake_long_named_variable7=1 if unique_id==80865 + + gen this_a_fake_long_named_variable8 =. + replace this_a_fake_long_named_variable8=1 if unique_id==80865 + + gen this_a_fake_long_named_variable9 =. + replace this_a_fake_long_named_variable9=1 if unique_id==80865 + + save "$base\test3", replace + use "$base\test3", clear + + ieduplicates iid , folder("C:\Users\Saori\Desktop\Semester 4\z Other\DIME\iedup test") uniquevars(unique_id) /// + duplistid(" DuplicateID") datefixed(notes_enumerators) /// + tostringok droprest nodaily + + +* Test with duplicate IDs where all variables differ + use "$base\test3", clear + replace iid=80865 if iid==80866 + + save "$base\test4", replace + use "$base\test4", clear + + ieduplicates iid , folder("C:\Users\Saori\Desktop\Semester 4\z Other\DIME\iedup test") uniquevars(unique_id) /// + duplistid(" DuplicateID") datefixed(notes_enumerators) /// + tostringok droprest nodaily + + +* Test with duplicate IDs where only one variable differs + use "$base\test3", clear + expand 2 if iid==1 + replace unique_id=125224 if _n==125224 + + save "$base\test4", replace + use "$base\test4", clear + + ieduplicates iid , folder("C:\Users\Saori\Desktop\Semester 4\z Other\DIME\iedup test") uniquevars(unique_id) /// + duplistid(" DuplicateID") datefixed(notes_enumerators) /// + tostringok droprest nodaily + +*****************SI_NOTE: The report after importing the Excel Report, the variable "listofdiffs" include excel file's variables as well as the data set's. + +*No error + rm "$base\iedupreport.xlsx" From 87aba77238a2a9b07432d32906fc185f9deb8ab6 Mon Sep 17 00:00:00 2001 From: Unknown Date: Tue, 16 Apr 2019 09:39:05 -0400 Subject: [PATCH 19/39] changed test code option --- src/test_scripts/ieduplicates/ieduplicate_test_script.do | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test_scripts/ieduplicates/ieduplicate_test_script.do b/src/test_scripts/ieduplicates/ieduplicate_test_script.do index c92e87d2..26608672 100644 --- a/src/test_scripts/ieduplicates/ieduplicate_test_script.do +++ b/src/test_scripts/ieduplicates/ieduplicate_test_script.do @@ -189,7 +189,7 @@ if "`c(username)'" == "Saori" { ieduplicates iid , folder("C:\Users\Saori\Desktop\Semester 4\z Other\DIME\iedup test") uniquevars(unique_id) /// duplistid(" DuplicateID") datefixed(notes_enumerators) /// - tostringok droprest nodaily + tostringok droprest * Test with duplicate IDs where only one variable differs @@ -202,7 +202,7 @@ if "`c(username)'" == "Saori" { ieduplicates iid , folder("C:\Users\Saori\Desktop\Semester 4\z Other\DIME\iedup test") uniquevars(unique_id) /// duplistid(" DuplicateID") datefixed(notes_enumerators) /// - tostringok droprest nodaily + tostringok droprest *****************SI_NOTE: The report after importing the Excel Report, the variable "listofdiffs" include excel file's variables as well as the data set's. From d8cc16bf08b400cf57e6bcc835b8bf4162ca0e15 Mon Sep 17 00:00:00 2001 From: Unknown Date: Tue, 16 Apr 2019 13:42:51 -0400 Subject: [PATCH 20/39] Code fix: for problem that excel var appeared on listofdiffs --- src/ado_files/ieduplicates.ado | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/ado_files/ieduplicates.ado b/src/ado_files/ieduplicates.ado index a281b16e..078e4378 100644 --- a/src/ado_files/ieduplicates.ado +++ b/src/ado_files/ieduplicates.ado @@ -494,25 +494,31 @@ } else { - + *Get the list of variables that are different between the two duplicated id value qui iecompdup `idvar', id(`id') + local diffvars "`r(diffvars)'" + + * Only checking variables in the original data set and not variables in Excel report. + local diffvars: list diffvars - excelVars + *SI_NOTE: limit lenght if very long *255-29 (characters for " :see iecompdup for full list")= 226 - if strlen("`r(diffvars)'") > 256 { + if strlen("`diffvars'") > 256 { local difflist_`id' = substr("`r(diffvars)'" ,1 ,226) + " :see iecompdup for full list" } else { - local difflist_`id' "`r(diffvars)'" + local difflist_`id' "`diffvars'" //replace `listofdiffs' = "`r(diffvars)'" if `idvar' == `id' } + } } From 6f9359441e0b8d652384fa1832c17842abdeded5 Mon Sep 17 00:00:00 2001 From: Unknown Date: Tue, 16 Apr 2019 13:44:06 -0400 Subject: [PATCH 21/39] Deleting note for fix from test file --- src/test_scripts/ieduplicates/ieduplicate_test_script.do | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/test_scripts/ieduplicates/ieduplicate_test_script.do b/src/test_scripts/ieduplicates/ieduplicate_test_script.do index 26608672..3c7524cc 100644 --- a/src/test_scripts/ieduplicates/ieduplicate_test_script.do +++ b/src/test_scripts/ieduplicates/ieduplicate_test_script.do @@ -204,7 +204,5 @@ if "`c(username)'" == "Saori" { duplistid(" DuplicateID") datefixed(notes_enumerators) /// tostringok droprest -*****************SI_NOTE: The report after importing the Excel Report, the variable "listofdiffs" include excel file's variables as well as the data set's. - *No error rm "$base\iedupreport.xlsx" From 0b0ce4c199f617a790676e3ec0989483a50c110e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristoffer=20Bj=C3=A4rkefur?= Date: Tue, 14 May 2019 11:38:25 -0400 Subject: [PATCH 22/39] ieduplicates : updare error message for name conflict show that there is an option to solve this with now --- src/ado_files/ieduplicates.ado | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ado_files/ieduplicates.ado b/src/ado_files/ieduplicates.ado index 078e4378..cc5e32f6 100644 --- a/src/ado_files/ieduplicates.ado +++ b/src/ado_files/ieduplicates.ado @@ -101,7 +101,7 @@ cap confirm variable `excelvar' if _rc == 0 { *Variable exist, output error - noi display as error "{phang}The data set already have a variable called {inp:`excelvar'} which is a name that this command requires to be availible. Please change the name of the variable already in the data set. Future versions of this command will allow the user to rename the variables used by this command.{p_end}" + noi di as error "{phang}The Excel report variable [``excelvar''] cannot be created as that variable already exist in the data set. Use the option called ``excelvar''() to change the name of the variable to be created in the report.{p_end}" noi di "" error 198 exit From 4cb354c8a87c8d86911d76edf4d877a2c27f18f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristoffer=20Bj=C3=A4rkefur?= Date: Tue, 14 May 2019 11:42:52 -0400 Subject: [PATCH 23/39] ieduplicates : backward compatibility - create list of difference var --- src/ado_files/ieduplicates.ado | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/ado_files/ieduplicates.ado b/src/ado_files/ieduplicates.ado index cc5e32f6..84c5c217 100644 --- a/src/ado_files/ieduplicates.ado +++ b/src/ado_files/ieduplicates.ado @@ -194,13 +194,20 @@ * Check if the variable name in the excel spreadsheet remain unchanged from the original report outputted. foreach excelvar of local excelVars { cap confirm variable `excelvar' - if _rc !=0 { - *Variable does notexist, output error - noi display as error "{phang}The original spreadsheet variable name {inp:`excelvar'} no longer exist. Please change the spreadsheet variable name back to the origional name.{p_end}" + + *The listofdiffs var was not a part of the original vars so old report might not have it. So create it if it does not exits. + if _rc !=0 & "`excelvar'" == "listofdiffs" { + gen `excelvar' = "" + } + + *Required variable does not exist in Excel file, output error + else if _rc !=0 { + + noi display as error "{phang}The spreadsheet variable {inp:`excelvar'} does not exist. Either you changed the name of the variables in the spreadsheet, or you are using options to ieduplicates that are changing the variable names expected in the spreadsheet. Change the names in the spreadsheet, or use the command options to change the name of variable {inp:`excelvar'}.{p_end}" error 198 - exit + exit } - + ** All excelVars but duplistid and newid should be string. duplistid * should be numeric and the type of newid should be based on the user input if !inlist("`excelvar'", "`duplistid'", "`newid'") { From 0fd51334ab62961f8afaac5ac19fbb30e6f1f5b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristoffer=20Bj=C3=A4rkefur?= Date: Tue, 14 May 2019 11:43:19 -0400 Subject: [PATCH 24/39] ieduplicates : backward compatibility : old excel reports to lower --- src/ado_files/ieduplicates.ado | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ado_files/ieduplicates.ado b/src/ado_files/ieduplicates.ado index 84c5c217..448784c7 100644 --- a/src/ado_files/ieduplicates.ado +++ b/src/ado_files/ieduplicates.ado @@ -184,7 +184,7 @@ if `fileExists' { *Load excel file. Load all vars as string and use metadata from Section 1 - import excel "`folder'/iedupreport`suffix'.xlsx" , clear firstrow + import excel "`folder'/iedupreport`suffix'.xlsx" , clear firstrow case(lower) *Drop empty rows that otherwise create error in merge that requires unique key tempvar count_nonmissing_values From 0545bb37f431b9331b918eca8a123648b0077dd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristoffer=20Bj=C3=A4rkefur?= Date: Tue, 14 May 2019 11:44:45 -0400 Subject: [PATCH 25/39] ieduplicates : change truncated string --- src/ado_files/ieduplicates.ado | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/ado_files/ieduplicates.ado b/src/ado_files/ieduplicates.ado index 448784c7..3cd4428c 100644 --- a/src/ado_files/ieduplicates.ado +++ b/src/ado_files/ieduplicates.ado @@ -511,11 +511,11 @@ local diffvars: list diffvars - excelVars *SI_NOTE: limit lenght if very long - - *255-29 (characters for " :see iecompdup for full list")= 226 + + *255-29 (characters for " ||| List truncated, use iecompdup for full list")= 226 if strlen("`diffvars'") > 256 { - - local difflist_`id' = substr("`r(diffvars)'" ,1 ,226) + " :see iecompdup for full list" + + local difflist_`id' = substr("`r(diffvars)'" ,1 ,207) + " ||| List truncated, use iecompdup for full list" } else { From 97bb2520ecc6f155b4b55fbc884276a1d27f4114 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristoffer=20Bj=C3=A4rkefur?= Date: Tue, 14 May 2019 11:52:22 -0400 Subject: [PATCH 26/39] ieduplicates : update N>3 list of diff message --- src/ado_files/ieduplicates.ado | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/ado_files/ieduplicates.ado b/src/ado_files/ieduplicates.ado index 3cd4428c..88089938 100644 --- a/src/ado_files/ieduplicates.ado +++ b/src/ado_files/ieduplicates.ado @@ -493,15 +493,13 @@ count if `idvar' == `id' - *Check if duplicated id has more than 2 duplicates + *Check if duplicated id has more than 2 duplicates, as iecompdup must be run manually to check difference when there is more than 2 observations with same ID if `r(N)' > 2 { - - local difflist_`id' "Cannot list variables for IDs with more than 2 duplicates" - //replace `listofdiffs' = "Cannot list variables for IDs with more than 2 duplicates" if `idvar' == `id' + local difflist_`id' "Cannot list differences for duplicates for which 3 or more observations has the same ID, use command iecompdup instead." } else { - + *Get the list of variables that are different between the two duplicated id value qui iecompdup `idvar', id(`id') From 3a03ccaa09ca2e3339a81a4cc7fe3c9b3b129653 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristoffer=20Bj=C3=A4rkefur?= Date: Tue, 14 May 2019 11:54:06 -0400 Subject: [PATCH 27/39] ieduplicates : imporve comments and remove some dev notes --- src/ado_files/ieduplicates.ado | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/src/ado_files/ieduplicates.ado b/src/ado_files/ieduplicates.ado index 88089938..8f20b815 100644 --- a/src/ado_files/ieduplicates.ado +++ b/src/ado_files/ieduplicates.ado @@ -506,27 +506,19 @@ local diffvars "`r(diffvars)'" * Only checking variables in the original data set and not variables in Excel report. - local diffvars: list diffvars - excelVars - - *SI_NOTE: limit lenght if very long + local diffvars: list diffvars - excelVars + *Truncate list when longer than 256 to fit in old Stata string formats. *255-29 (characters for " ||| List truncated, use iecompdup for full list")= 226 if strlen("`diffvars'") > 256 { - local difflist_`id' = substr("`r(diffvars)'" ,1 ,207) + " ||| List truncated, use iecompdup for full list" - } else { - - local difflist_`id' "`diffvars'" - //replace `listofdiffs' = "`r(diffvars)'" if `idvar' == `id' - + *List of diff is short enough to show in its entirety + local difflist_`id' "`diffvars'" } - - } - - } + } *Test if there are any duplicates cap assert `dup'==0 From c7ae287138edc56281da29178025c61bc55fc2f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristoffer=20Bj=C3=A4rkefur?= Date: Tue, 14 May 2019 11:54:24 -0400 Subject: [PATCH 28/39] ieduplicates : remove excessive white space --- src/ado_files/ieduplicates.ado | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/src/ado_files/ieduplicates.ado b/src/ado_files/ieduplicates.ado index 8f20b815..b618da45 100644 --- a/src/ado_files/ieduplicates.ado +++ b/src/ado_files/ieduplicates.ado @@ -60,15 +60,15 @@ *For optioin to change var names. Setting a default name of columns (in case user did not specify the variable name) local deafultvars duplistid datelisted datefixed correct drop newid initials notes listofdiffs foreach deafultvar of local deafultvars { - + *trim user input. If no input string is empty, which returns an empty string local `deafultvar' = trim("``deafultvar''") //trim() is older syntax, compare to strtrim() in Stata 15 and newer *If no user input for this var, assign default name - if "``deafultvar''" == "" local `deafultvar' = "`deafultvar'" + if "``deafultvar''" == "" local `deafultvar' = "`deafultvar'" *Check for space in varname (only possible when user assign names manually) - if strpos("``deafultvar''", " ") != 0 { + if strpos("``deafultvar''", " ") != 0 { noi di as error "{phang}The Excel report variable name [``deafultvar''] should not contain any space. Please change the variable name.{p_end}" noi di "" @@ -77,7 +77,6 @@ } } - ******************** * Excel variables values are ok on their own, test in relation to each other and varaiblaes already in the data @@ -148,7 +147,6 @@ } - /*********************************************************************** ************************************************************************ @@ -191,7 +189,7 @@ egen `count_nonmissing_values' = rownonmiss(_all), strok drop if `count_nonmissing_values' == 0 - * Check if the variable name in the excel spreadsheet remain unchanged from the original report outputted. + * Check if the variable name in the excel spreadsheet remain unchanged from the original report outputted. foreach excelvar of local excelVars { cap confirm variable `excelvar' @@ -274,7 +272,7 @@ replace `drop' = "yes" if `drop' == "y" *Check that variables are either empty or "yes" - gen `inputNotYes' = !((`correct' == "yes" | `correct' == "") & (`drop' == "yes" | `drop' == "")) + gen `inputNotYes' = !((`correct' == "yes" | `correct' == "") & (`drop' == "yes" | `drop' == "")) *Set local to 1 if error should be outputted cap assert `inputNotYes' == 0 @@ -415,7 +413,6 @@ *Save imported data set with all corrections save `preppedReport' - } @@ -491,7 +488,7 @@ foreach id of local list_dup_ids { - count if `idvar' == `id' + count if `idvar' == `id' *Check if duplicated id has more than 2 duplicates, as iecompdup must be run manually to check difference when there is more than 2 observations with same ID if `r(N)' > 2 { @@ -536,7 +533,7 @@ * If Excel file exists keep excel vars and * variables passed as arguments in the * command - keep `argumentVars' `excelVars' + keep `argumentVars' `excelVars' } else { * Keep only variables passed as arguments in @@ -554,14 +551,14 @@ gen `excelvar' = "" } } - + } - + //Assign the listdiff values - foreach id of local list_dup_ids { + foreach id of local list_dup_ids { replace `listofdiffs' = "`difflist_`id''" if `idvar' == `id' } From 206d853314a13b8856e26f25d3d799c670041991 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristoffer=20Bj=C3=A4rkefur?= Date: Tue, 14 May 2019 12:03:56 -0400 Subject: [PATCH 29/39] ieduplicates help : section on excel var names - set up format --- src/help_files/ieduplicates.sthlp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/help_files/ieduplicates.sthlp b/src/help_files/ieduplicates.sthlp index f48c9e9a..d1fb85da 100644 --- a/src/help_files/ieduplicates.sthlp +++ b/src/help_files/ieduplicates.sthlp @@ -41,7 +41,11 @@ command please see the {browse "https://dimewiki.worldbank.org/wiki/Ieduplicates {synopt :{cmdab:nodaily}}disables daily back-up copies of the Excel report{p_end} {synopt :{cmdab:suf:fix(}{it:string}{cmd:)}}allows the user to add a suffix to the filename of the Excel report{p_end} -{synopt :{cmdab:duplistid(}{it:string}{cmd:)}}allows the user to customize the variable name "dupListID" in the the Excel report{p_end} +{pstd}{it: {ul:{hi:Excel variable name options:}}}{p_end} + +{pstd}SAORI: Explain these options here, and make{p_end} + +{synopt :{cmdab:duplistid(}{it:string}{cmd:)}}customizes variable {it:duplistid}{p_end} {synopt :{cmdab:datelisted(}{it:string}{cmd:)}}allows the user to customize the variable name "dateListed" in the the Excel report{p_end} {synopt :{cmdab:datefixed(}{it:string}{cmd:)}}allows the user to customize the variable name "dateFixed" in the the Excel report{p_end} {synopt :{cmdab:correct(}{it:string}{cmd:)}}allows the user to customize the variable name "correct" in the the Excel report{p_end} From 3a6203fd27dbe61ed7a1dbabce77b4b910590a11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristoffer=20Bj=C3=A4rkefur?= Date: Tue, 14 May 2019 12:42:29 -0400 Subject: [PATCH 30/39] iedupliactes help : add name options here too --- src/help_files/ieduplicates.sthlp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/help_files/ieduplicates.sthlp b/src/help_files/ieduplicates.sthlp index d1fb85da..4b434904 100644 --- a/src/help_files/ieduplicates.sthlp +++ b/src/help_files/ieduplicates.sthlp @@ -95,6 +95,10 @@ to keep track of who decided how to correct that duplicate and to use {it:notes} the correction was chosen. If {it:initials} and {it:notes} are used, then the Excel report also functions as an excellent documentation of the correction made. +{pstd}{it: {ul:{hi:Excel variable name options:}}}{p_end} + +{pstd}SAORI: Explain these options here, and make{p_end} + {space 4}{hline} {title:Options} From 8383351ba6ac2b0b309f8e1da4cc94ba56e01397 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristoffer=20Bj=C3=A4rkefur?= Date: Tue, 14 May 2019 14:10:31 -0400 Subject: [PATCH 31/39] ieduplicates : do not allow upper case in custom names --- src/ado_files/ieduplicates.ado | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/ado_files/ieduplicates.ado b/src/ado_files/ieduplicates.ado index b618da45..c2f146a6 100644 --- a/src/ado_files/ieduplicates.ado +++ b/src/ado_files/ieduplicates.ado @@ -64,6 +64,18 @@ *trim user input. If no input string is empty, which returns an empty string local `deafultvar' = trim("``deafultvar''") //trim() is older syntax, compare to strtrim() in Stata 15 and newer + *Test that the customized names only include lower case. This was a compromise + * needed to allow backward compatibility when all excel variable names are + * imported in lower case. This follows from the Excel variable names in first + * version had upper case letters, but Stata options cannot use upper case, and + * we want the two to be the same + if "``deafultvar''" != lower("``deafultvar''") { + noi di as error "{phang}For the puprpose of backward version compatibility, the names in option `deafultvar'(``deafultvar'') must not include any upper case letters.{p_end}" + noi di "" + error 198 + exit + } + *If no user input for this var, assign default name if "``deafultvar''" == "" local `deafultvar' = "`deafultvar'" From db8ac19ef430d0f31c9ed0427d3078f6b8d42762 Mon Sep 17 00:00:00 2001 From: Unknown Date: Tue, 14 May 2019 15:01:34 -0400 Subject: [PATCH 32/39] changed test file options to lower case, changed directory specification --- .../ieduplicates/ieduplicate_test_script.do | 35 ++++++++++--------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/src/test_scripts/ieduplicates/ieduplicate_test_script.do b/src/test_scripts/ieduplicates/ieduplicate_test_script.do index 3c7524cc..3efaba7e 100644 --- a/src/test_scripts/ieduplicates/ieduplicate_test_script.do +++ b/src/test_scripts/ieduplicates/ieduplicate_test_script.do @@ -9,9 +9,9 @@ clear all di "The user for this case is: " c(username) if "`c(username)'" == "Saori" { - global base "C:\Users\Saori\Desktop\Semester 4\z Other\DIME\iedup test" + global base "{directory}\iedup test" cd "$base" - qui do "C:\Users\Saori\Documents\Github\iefieldkit\src\ado_files\ieduplicates.ado" + qui do "{directory}\Github\iefieldkit\src\ado_files\ieduplicates.ado" } else { // *.... add other people's global here @@ -91,10 +91,10 @@ if "`c(username)'" == "Saori" { *Suffix use "$base\ieduplicates_test", clear ieduplicates iid , folder("$base") uniquevars(unique_id) /// - suffix(_test) + suffix(_test) use "$base\ieduplicates_test", clear - ieduplicates iid , folder("$base") uniquevars(unique_id) + ieduplicates iid , folder("$base") uniquevars(unique_id) *No error rm "$base\iedupreport_test.xlsx" rm "$base\iedupreport.xlsx" @@ -103,11 +103,12 @@ if "`c(username)'" == "Saori" { *Excel var name specification use "$base\ieduplicates_test", clear ieduplicates iid , folder("$base") uniquevars(unique_id) /// - duplistid(" DuplicateID") datelisted("Date_Listed ") datefixed("DateFixed") correct(Keep_this) drop("This_is_mistake ") newid(ID) initials(Signature) notes(Remarks) + duplistid(" duplicate_id") datelisted("date_listed ") datefixed("date_of_fixed") correct(keep_this) drop("this_is_mistake ") newid(id) initials(signature) notes(remarks) use "$base\ieduplicates_test", clear ieduplicates iid , folder("$base") uniquevars(unique_id) /// - duplistid(" DuplicateID") datelisted("Date_Listed ") datefixed("DateFixed") correct(Keep_this) drop("This_is_mistake ") newid(ID) initials(Signature) notes(Remarks) + duplistid(" duplicate_id") datelisted("date_listed ") datefixed("date_of_fixed") correct(keep_this) drop("this_is_mistake ") newid(id) initials(signature) notes(remarks) + *No error rm "$base\iedupreport.xlsx" @@ -115,15 +116,15 @@ if "`c(username)'" == "Saori" { *3) With multiiple options *All of the options - use "$base\ieduplicates_test2", clear + use "$base\ieduplicates_test", clear ieduplicates iid , folder("$base") uniquevars(unique_id) /// keepvars(rep rate age wgt) tostringok droprest nodaily suffix(_test) /// - duplistid(" DuplicateID") datelisted("Date_Listed ") datefixed("DateFixed") correct(Keep_this) drop("This_is_mistake ") newid(ID) initials(Signature) notes(Remarks) + duplistid(" duplicate_id") datelisted("date_listed ") datefixed("date_of_fixed") correct(keep_this) drop("this_is_mistake ") newid(id) initials(signature) notes(remarks) - use "$base\ieduplicates_test2", clear + use "$base\ieduplicates_test", clear ieduplicates iid , folder("$base") uniquevars(unique_id) /// keepvars(rep rate age wgt) tostringok droprest nodaily suffix(_test) /// - duplistid(" DuplicateID") datelisted("Date_Listed ") datefixed("DateFixed") correct(Keep_this) drop("This_is_mistake ") newid(ID) initials(Signature) notes(Remarks) + duplistid(" duplicate_id") datelisted("date_listed ") datefixed("date_of_fixed") correct(keep_this) drop("this_is_mistake ") newid(id) initials(signature) notes(remarks) *No error rm "$base\iedupreport_test.xlsx" @@ -132,18 +133,18 @@ if "`c(username)'" == "Saori" { use "$base\ieduplicates_test2", clear ieduplicates iid , folder("$base") uniquevars(unique_id) /// tostringok droprest nodaily /// - duplistid(" DuplicateID") + duplistid(" duplicate_id") use "$base\ieduplicates_test2", clear ieduplicates iid , folder("$base") uniquevars(unique_id) /// tostringok droprest nodaily /// - duplistid(" DuplicateID") + duplistid(" duplicate_id") *No error rm "$base\iedupreport.xlsx" *ieduplicate listing of variables that differs - use "$cd\ieduplicates_test", clear + use "$base\ieduplicates_test", clear gen this_a_fake_long_named_variable1 =. replace this_a_fake_long_named_variable1=1 if unique_id==80865 @@ -176,9 +177,10 @@ if "`c(username)'" == "Saori" { use "$base\test3", clear ieduplicates iid , folder("C:\Users\Saori\Desktop\Semester 4\z Other\DIME\iedup test") uniquevars(unique_id) /// - duplistid(" DuplicateID") datefixed(notes_enumerators) /// + duplistid(" duplicate_id") datefixed(notes_enumerators) /// tostringok droprest nodaily + rm "$base\iedupreport.xlsx" * Test with duplicate IDs where all variables differ use "$base\test3", clear @@ -188,9 +190,10 @@ if "`c(username)'" == "Saori" { use "$base\test4", clear ieduplicates iid , folder("C:\Users\Saori\Desktop\Semester 4\z Other\DIME\iedup test") uniquevars(unique_id) /// - duplistid(" DuplicateID") datefixed(notes_enumerators) /// + duplistid(" duplicate_id") datefixed(notes_enumerators) /// tostringok droprest + rm "$base\iedupreport.xlsx" * Test with duplicate IDs where only one variable differs use "$base\test3", clear @@ -201,7 +204,7 @@ if "`c(username)'" == "Saori" { use "$base\test4", clear ieduplicates iid , folder("C:\Users\Saori\Desktop\Semester 4\z Other\DIME\iedup test") uniquevars(unique_id) /// - duplistid(" DuplicateID") datefixed(notes_enumerators) /// + duplistid(" duplicate_id") datefixed(notes_enumerators) /// tostringok droprest *No error From 51434df07efde13800a1bd81baa84f1e39d1bd3c Mon Sep 17 00:00:00 2001 From: Unknown Date: Tue, 14 May 2019 15:35:55 -0400 Subject: [PATCH 33/39] helpfile update for excelvar options (lower case, simplifying the explanation on the top) --- src/help_files/ieduplicates.sthlp | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/src/help_files/ieduplicates.sthlp b/src/help_files/ieduplicates.sthlp index 4b434904..98838a40 100644 --- a/src/help_files/ieduplicates.sthlp +++ b/src/help_files/ieduplicates.sthlp @@ -43,16 +43,16 @@ command please see the {browse "https://dimewiki.worldbank.org/wiki/Ieduplicates {pstd}{it: {ul:{hi:Excel variable name options:}}}{p_end} -{pstd}SAORI: Explain these options here, and make{p_end} +{pstd}This option allows users to customize the column names in the report Excel spreadsheet. This option is intended for situations when the dataset already has variable(s) named the same as the default Excel spreadsheet name. {p_end} {synopt :{cmdab:duplistid(}{it:string}{cmd:)}}customizes variable {it:duplistid}{p_end} -{synopt :{cmdab:datelisted(}{it:string}{cmd:)}}allows the user to customize the variable name "dateListed" in the the Excel report{p_end} -{synopt :{cmdab:datefixed(}{it:string}{cmd:)}}allows the user to customize the variable name "dateFixed" in the the Excel report{p_end} -{synopt :{cmdab:correct(}{it:string}{cmd:)}}allows the user to customize the variable name "correct" in the the Excel report{p_end} -{synopt :{cmdab:drop(}{it:string}{cmd:)}}allows the user to customize the variable name "drop" in the the Excel report{p_end} -{synopt :{cmdab:newid(}{it:string}{cmd:)}}allows the user to customize the variable name "newID" in the the Excel report{p_end} -{synopt :{cmdab:initials(}{it:string}{cmd:)}}allows the user to customize the variable name "initials" in the the Excel report{p_end} -{synopt :{cmdab:notes(}{it:string}{cmd:)}}allows the user to customize the variable name "notes" in the the Excel report{p_end} +{synopt :{cmdab:datelisted(}{it:string}{cmd:)}}customizes variable {it:datelisted}{p_end} +{synopt :{cmdab:datefixed(}{it:string}{cmd:)}}customizes variable {it:datefixed}{p_end} +{synopt :{cmdab:correct(}{it:string}{cmd:)}}customizes variable {it:correct}{p_end} +{synopt :{cmdab:drop(}{it:string}{cmd:)}}customizes variable {it:drop}{p_end} +{synopt :{cmdab:newid(}{it:string}{cmd:)}}customizes variable {it:newid}{p_end} +{synopt :{cmdab:initials(}{it:string}{cmd:)}}customizes variable {it:initials}{p_end} +{synopt :{cmdab:notes(}{it:string}{cmd:)}}customizes variable {it:notes}{p_end} {synoptline} @@ -95,10 +95,6 @@ to keep track of who decided how to correct that duplicate and to use {it:notes} the correction was chosen. If {it:initials} and {it:notes} are used, then the Excel report also functions as an excellent documentation of the correction made. -{pstd}{it: {ul:{hi:Excel variable name options:}}}{p_end} - -{pstd}SAORI: Explain these options here, and make{p_end} - {space 4}{hline} {title:Options} @@ -157,12 +153,14 @@ report in a sub-folder called Daily in the folder specified in {cmdab:folder()}. the folder /Daily/ does not exist, then it is created unless the option {cmdab:nodaily} is used. +{title:Excel variable name options:} + {phang}{cmdab:duplistid(}{it:string}{cmd:)} {cmdab:datelisted(}{it:string}{cmd:)} {cmdab:datefixed(}{it:string}{cmd:)} {cmdab:correct(}{it:string}{cmd:)} {cmdab:drop(}{it:string}{cmd:)} {cmdab:newid(}{it:string}{cmd:)} {cmdab:initials(}{it:string}{cmd:)} {cmdab:notes(}{it:string}{cmd:)} -allow the user to set a unique name for each default variable names (e.g. "dupListID", "dateListed", etc.) in the Excel report. -This is meant to be used when the variable name already exists in the dataset. To avoid error, the command offers a way to modify the variable name in the Excel Report. {p_end} +allow the user to set a unique name for each default variable names (e.g. "dupListid", "datelisted", etc.) in the Excel report spreadsheet. +This is meant to be used when the variable name already exists in the dataset. To avoid error, the command offers a way to modify the variable name in the Excel Report spreadsheet. {p_end} {title:The Excel Report} From 72cb88e264a41c88f7890b6f1882113d2e70f275 Mon Sep 17 00:00:00 2001 From: Unknown Date: Tue, 14 May 2019 16:04:56 -0400 Subject: [PATCH 34/39] helpfile changing old variable names to lowercase --- src/help_files/ieduplicates.sthlp | 46 +++++++++++++++---------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/src/help_files/ieduplicates.sthlp b/src/help_files/ieduplicates.sthlp index 98838a40..6fcef50d 100644 --- a/src/help_files/ieduplicates.sthlp +++ b/src/help_files/ieduplicates.sthlp @@ -76,11 +76,11 @@ needs to be specified in order to have a unique reference for each row in the Ex observations in the Excel report, either on its own or together with {it:ID_varname}. {cmd:ieduplicates} then returns the data set without these duplicates. -{pstd}The Excel report includes three columns called {it:correct}, {it:drop} and {it:newID}. +{pstd}The Excel report includes three columns called {it:correct}, {it:drop} and {it:newid}. Each of them represents one way to correct the duplicates. If {it:correct} is indicated with a "Yes" then that observation is kept unchanged, if {it:drop} is indicated with a "Yes" then -that observation is deleted and if {it:newID} is indicated then that observation is assigned -a new ID using the value in column {it:newID}. After corrections are entered, the report should +that observation is deleted and if {it:newid} is indicated then that observation is assigned +a new ID using the value in column {it:newid}. After corrections are entered, the report should be saved in the same location {cmdab:fol:der(}{it:string}{cmd:)} without any changes to its name. {pstd}Before outputting a new report {cmd:ieduplicates} always checks if there already is an @@ -120,7 +120,7 @@ drop and assign a new ID to. For data integrity reasons, be careful not to expor Excel files including both identifying variables and names together with {it:ID_varname}. {phang}{cmdab:tostringok} allows {it:ID_varname} to be turned into a string variable in case -{it:ID_varname} is numeric but a value listed in {it:newID} is non-numeric. Otherwise an error is generated. +{it:ID_varname} is numeric but a value listed in {it:newid} is non-numeric. Otherwise an error is generated. {phang}{cmdab:droprest} disables the requirement that duplicates must be explicitly deleted. The default is that if one of the duplicates in a group of duplicates has a @@ -128,7 +128,7 @@ correction, then that correction is only valid if all other duplicates in that group have a correction as well. For example, if there are four observations with the same value for {it:ID_varname} and one is correct, one needs a new ID and two are incorrect and should be deleted. Then the first one is indicated to be -kept in the {it:correct} column, the second one is given a new ID in {it:newID} +kept in the {it:correct} column, the second one is given a new ID in {it:newid} and the other two observations must be indicated for deletion in {it:drop} unless {cmdab:droprest}. The first two corrections are not considered valid and will cause an error in case if {cmdab:droprest} is not specified and the other @@ -159,7 +159,7 @@ option {cmdab:nodaily} is used. {cmdab:datefixed(}{it:string}{cmd:)} {cmdab:correct(}{it:string}{cmd:)} {cmdab:drop(}{it:string}{cmd:)} {cmdab:newid(}{it:string}{cmd:)} {cmdab:initials(}{it:string}{cmd:)} {cmdab:notes(}{it:string}{cmd:)} -allow the user to set a unique name for each default variable names (e.g. "dupListid", "datelisted", etc.) in the Excel report spreadsheet. +allow the user to set a unique name for each default variable names (e.g. {it:duplistid}, {it:datelisted}, etc.) in the Excel report spreadsheet. This is meant to be used when the variable name already exists in the dataset. To avoid error, the command offers a way to modify the variable name in the Excel Report spreadsheet. {p_end} @@ -173,19 +173,19 @@ that day, then that report will be overwritten. {pstd}All duplicates in a group of duplicates must have a correction indicated. If one or more duplicates are indicated as correct in {it:correct} or assigned a new -ID in {it:newID}, then all other duplicates with the same value in {it:ID_varname} must +ID in {it:newid}, then all other duplicates with the same value in {it:ID_varname} must be explicitly indicated for deletion. This requirement may (but probably shouldn't) be disabled by option {cmdab:droprest}. {dlgtab:Columns in Excel Report filled in automatically:} -{phang}{it:dupListID} stores an auto incremented duplicate list ID that is used +{phang}{it:duplistid} stores an auto incremented duplicate list ID that is used to maintain the sort order in the Excel Report regardless of how the data in memory is sorted at the time {cmd:ieduplicates} is executed. -{phang}{it:dateListed} stores the date the duplicate was first identified. +{phang}{it:datelisted} stores the date the duplicate was first identified. -{phang}{it:dateFixed} stores the date a valid correction was imported the first +{phang}{it:datefixed} stores the date a valid correction was imported the first time for that duplicate. {dlgtab:Columns in Excel Report to be filled in manually by a user:} @@ -193,20 +193,20 @@ time for that duplicate. {phang}{it:correct} is used to indicate that the duplicate should be kept. Valid values are restricted to "yes" and "y" to reduce the risk of unintended entries. The values are not sensitive to case. All valid values are changed to "yes" lower case when -imported. If {it:correct} is indicated then both {it:drop} and {it:newID} must be +imported. If {it:correct} is indicated then both {it:drop} and {it:newid} must be left empty. {phang}{it:drop} is used to indicate that the duplicate should be deleted. Valid values are restricted to "yes" and "y" to reduce the risk of unintended entries. The values are not sensitive to case. All valid values are changed to "yes" lower case when -imported. If {it:drop} is indicated then both {it:correct} and {it:newID} must be +imported. If {it:drop} is indicated then both {it:correct} and {it:newid} must be left empty. -{phang}{it:newID} is used to assign a new ID values to a duplicate. If {it:ID_varname} -is a string then all values are valid for {it:newID}. If {it:ID_varname} is numeric then +{phang}{it:newid} is used to assign a new ID values to a duplicate. If {it:ID_varname} +is a string then all values are valid for {it:newid}. If {it:ID_varname} is numeric then only digits are valid, unless the option {cmdab:tostringok} is specified. -If {cmdab:tostringok} is specified and {it:newID} is non-numeric, then {it:ID_varname} -is recasted to a string variable. If {it:newID} is indicated then both {it:correct} and {it:drop} must be +If {cmdab:tostringok} is specified and {it:newid} is non-numeric, then {it:ID_varname} +is recasted to a string variable. If {it:newid} is indicated then both {it:correct} and {it:drop} must be left empty. {phang}{it:initials} allows the team working with this data to keep track on who @@ -276,7 +276,7 @@ unresolved duplicates were found {hi:Example 4.} Using the Excel file. The table below could be the report generated in Example 2 above. Make the viewer window wider and reload the page if the table below does not display properly! {col 3}{c TLC}{hline 116}{c TRC} -{col 3}{c |}{col 4}HHID{col 10}dupListID{col 21}dateListed{col 33}dateFixed{col 44}correct{col 53}drop{col 59}newID{col 65}initials{col 75}notes{col 94}KEY{col 107}enumerator{col 120}{c |} +{col 3}{c |}{col 4}HHID{col 10}duplistid{col 21}datelisted{col 33}datefixed{col 44}correct{col 53}drop{col 59}newid{col 65}initials{col 75}notes{col 94}KEY{col 107}enumerator{col 120}{c |} {col 3}{c LT}{hline 116}{c RT} {col 3}{c |}{col 4}4321{col 10}1{col 21}27Dec2015{col 33}02Jan2016{col 44}yes{col 53} {col 59} {col 65}KB{col 75}double submission{col 94}{it:uniquevalue}{col 107}{it:keepvarvalue}{col 120}{c |} {col 3}{c |}{col 4}4321{col 10}2{col 21}27Dec2015{col 33}02Jan2016{col 44} {col 53}yes{col 59} {col 65}KB{col 75}double submission{col 94}{it:uniquevalue}{col 107}{it:keepvarvalue}{col 120}{c |} @@ -291,15 +291,15 @@ unresolved duplicates were found {pmore}The table above shows an example of an Excel report with 4 duplicates groups with two duplicates in each groups. The duplicates in 4321 and in 1145 have both been corrected but 7365 and 9834 are still unresolved. Before any observation was corrected, all observations had -{it:dateFixed}, {it:correct}, {it:drop}, {it:newID}, {it:initials} and {it:note} empty just like the observations for ID 7365 and 9834. {it:dateFixed} +{it:datefixed}, {it:correct}, {it:drop}, {it:newid}, {it:initials} and {it:note} empty just like the observations for ID 7365 and 9834. {it:datefixed} is not updated by the user, the command adds this date the first time the correction is made. -{pmore}Observation with dupListID == 5 was found to have been -assigned the incorrect ID while the data was collected. This observation is assigned the correct ID in {it:newID} -and observation dupListID == 6 is indicated to be correct. Someone with initials IB made this +{pmore}Observation with duplistid == 5 was found to have been +assigned the incorrect ID while the data was collected. This observation is assigned the correct ID in {it:newid} +and observation duplistid == 6 is indicated to be correct. Someone with initials IB made this correction and made a note. This note can and should be more descriptive but is kept short in this example. -{pmore}Observations with dupListID == 1 and dupListID == 2 were identified as a duplicate submissions of the same +{pmore}Observations with duplistid == 1 and duplistid == 2 were identified as a duplicate submissions of the same observation. One is kept and one is dropped, usually it does not matter which you keep and which you drop, but that should be confirmed. {pmore}Both corrections described in the example would have been easily identified using this command's sister command {help iecompdup}. @@ -310,7 +310,7 @@ observation. One is kept and one is dropped, usually it does not matter which yo {hi:Example 5.} {inp:ieduplicates HHID, folder(C:\myImpactEvaluation\baseline\data) uniquevars(KEY) drop(out) notes(notes_enumerators)} {col 3}{c TLC}{hline 103}{c TRC} -{col 3}{c |}{col 4}HHID{col 10}dupListID{col 21}dateListed{col 33}dateFixed{col 44}correct{col 53}out{col 59}newID{col 65}initials{col 75}notes_enumerators{col 94}KEY{col 107}{c |} +{col 3}{c |}{col 4}HHID{col 10}duplistid{col 21}datelisted{col 33}datefixed{col 44}correct{col 53}out{col 59}newid{col 65}initials{col 75}notes_enumerators{col 94}KEY{col 107}{c |} {col 3}{c LT}{hline 103}{c RT} {col 3}{c |}{col 4}4321{col 10}1{col 21}27Dec2015{col 33}02Jan2016{col 44}yes{col 53} {col 59} {col 65}KB{col 75}double submission{col 94}{it:uniquevalue}{col 107}{c |} {col 3}{c |}{col 4}4321{col 10}2{col 21}27Dec2015{col 33}02Jan2016{col 44} {col 53}yes{col 59} {col 65}KB{col 75}double submission{col 94}{it:uniquevalue}{col 107}{c |} From 644cdebe67148e0b7a29aeffdc11aa230e33c399 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristoffer=20Bj=C3=A4rkefur?= Date: Tue, 14 May 2019 13:27:15 -0400 Subject: [PATCH 35/39] ietestform : small typo in option name for long field names --- src/ado_files/ietestform.ado | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ado_files/ietestform.ado b/src/ado_files/ietestform.ado index 74cb3eb8..95c8b13c 100644 --- a/src/ado_files/ietestform.ado +++ b/src/ado_files/ietestform.ado @@ -970,7 +970,7 @@ qui { local error_msg "These variable names are longer then 32 characters. That is allowed in the data formats used in SurveyCTO - and is therefore allowed in their test - but will cause an error when the data is imported to Stata. The following names should be shortened:" - noi report_file add , report_tempfile("`report_tempfile'") testname("TOO LONG FIELD NAMES") message("`error_msg'") wikifragment("Field_Name_Length") able("list row type name if longname == 1") + noi report_file add , report_tempfile("`report_tempfile'") testname("TOO LONG FIELD NAMES") message("`error_msg'") wikifragment("Field_Name_Length") table("list row type name if longname == 1") } From 349d3a916089d586d56bd6afbd5e8164490605eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristoffer=20Bj=C3=A4rkefur?= Date: Tue, 14 May 2019 19:05:25 -0400 Subject: [PATCH 36/39] ietestform : do not list empty endtype name twice --- src/ado_files/ietestform.ado | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ado_files/ietestform.ado b/src/ado_files/ietestform.ado index 95c8b13c..2e740876 100644 --- a/src/ado_files/ietestform.ado +++ b/src/ado_files/ietestform.ado @@ -750,7 +750,7 @@ qui { local beginrow = subinstr("`beginrow'","#","", 1) //Remove the parse char "#" *If the name are not the same it is most likely a different group or repeat group that is incorrectly being closed - if "`endname'" != "`beginname'" { + if "`endname'" != "`beginname'" & !missing("`endname'") { local error_msg "begin_`begintype' [`beginname'] on row `beginrow' and end_`endtype' [`endname'] on row `endrow'" From b68a3c65dce6ccc8117260f14ef432332daca71b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristoffer=20Bj=C3=A4rkefur?= Date: Mon, 20 May 2019 16:39:25 -0400 Subject: [PATCH 37/39] Update file version #71 - checkbox 3 --- src/ado_files/iecodebook.ado | 2 +- src/ado_files/iecompdup.ado | 2 +- src/ado_files/ieduplicates.ado | 2 +- src/ado_files/iefieldkit.ado | 2 +- src/ado_files/ietestform.ado | 6 +++--- src/help_files/iecodebook.sthlp | 2 +- src/help_files/iecompdup.sthlp | 2 +- src/help_files/ieduplicates.sthlp | 12 ++++++------ src/help_files/iefieldkit.sthlp | 2 +- src/help_files/ietestform.sthlp | 2 +- 10 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/ado_files/iecodebook.ado b/src/ado_files/iecodebook.ado index 010bbbaf..05d41617 100644 --- a/src/ado_files/iecodebook.ado +++ b/src/ado_files/iecodebook.ado @@ -1,4 +1,4 @@ -*! version 1.0 31JAN2018 DIME Analytics dimeanalytics@worldbank.org +*! version 1.1 20MAY2019 DIME Analytics dimeanalytics@worldbank.org // Main syntax ********************************************************************************* diff --git a/src/ado_files/iecompdup.ado b/src/ado_files/iecompdup.ado index 9528b74a..48d3541d 100644 --- a/src/ado_files/iecompdup.ado +++ b/src/ado_files/iecompdup.ado @@ -1,4 +1,4 @@ -*! version 1.0 31JAN2019 DIME Analytics dimeanalytics@worldbank.org +*! version 1.1 20MAY2019 DIME Analytics dimeanalytics@worldbank.org capture program drop iecompdup program iecompdup , rclass diff --git a/src/ado_files/ieduplicates.ado b/src/ado_files/ieduplicates.ado index c2f146a6..a620e22f 100644 --- a/src/ado_files/ieduplicates.ado +++ b/src/ado_files/ieduplicates.ado @@ -1,4 +1,4 @@ -*! version 1.0 31JAN2019 DIME Analytics dimeanalytics@worldbank.org +*! version 1.1 20MAY2019 DIME Analytics dimeanalytics@worldbank.org capture program drop ieduplicates program ieduplicates , rclass diff --git a/src/ado_files/iefieldkit.ado b/src/ado_files/iefieldkit.ado index d5949c26..f676944c 100644 --- a/src/ado_files/iefieldkit.ado +++ b/src/ado_files/iefieldkit.ado @@ -1,4 +1,4 @@ -*! version 1.0 31JAN2018 DIME Analytics dimeanalytics@worldbank.org +*! version 1.1 20MAY2019 DIME Analytics dimeanalytics@worldbank.org capture program drop iefieldkit program iefieldkit, rclass diff --git a/src/ado_files/ietestform.ado b/src/ado_files/ietestform.ado index 2e740876..50b7ff59 100644 --- a/src/ado_files/ietestform.ado +++ b/src/ado_files/ietestform.ado @@ -1,4 +1,4 @@ -*! version 1.0 31JAN2018 DIME Analytics dimeanalytics@worldbank.org +*! version 1.1 20MAY2019 DIME Analytics dimeanalytics@worldbank.org capture program drop ietestform program ietestform , rclass @@ -638,7 +638,7 @@ qui { local error_msg "Fields of type note creates an impassable view that are impossible for the enumerator to sweep pass. Make sure that is the inentional behavior for the following fields:" noi report_file add , report_tempfile("`report_tempfile'") wikifragment("Required_Column") message("`error_msg'") table("list row type name if note_required == 1") testname("REQUIRED NOTE TYPE FIELD") } - + *List and output required label fields in field-list groups gen label_required = (req_relevant == 1 & appearance == "label" & lower(required) == "yes") count if label_required == 1 @@ -647,7 +647,7 @@ qui { *Prepare message and write it local error_msg "Fields with appearance [label] (inside a field-list group) must not be required. Label fields are currently required in the following rows:" noi report_file add , report_tempfile("`report_tempfile'") wikifragment("Required_Column") message("`error_msg'") table("list row type name if label_required == 1") testname("REQUIRED LABEL FIELD") - } + } } end diff --git a/src/help_files/iecodebook.sthlp b/src/help_files/iecodebook.sthlp index 9e45c0d7..9a759823 100644 --- a/src/help_files/iecodebook.sthlp +++ b/src/help_files/iecodebook.sthlp @@ -1,5 +1,5 @@ {smcl} -{* 31 Jan 2019}{...} +{* 20 May 2019}{...} {hline} help for {hi:iecodebook} {hline} diff --git a/src/help_files/iecompdup.sthlp b/src/help_files/iecompdup.sthlp index 8234aa81..ff87349e 100644 --- a/src/help_files/iecompdup.sthlp +++ b/src/help_files/iecompdup.sthlp @@ -1,5 +1,5 @@ {smcl} -{* 31 Jan 2019}{...} +{* 20 May 2019}{...} {hline} help for {hi:iecompdup} {hline} diff --git a/src/help_files/ieduplicates.sthlp b/src/help_files/ieduplicates.sthlp index 6fcef50d..77e0e2cc 100644 --- a/src/help_files/ieduplicates.sthlp +++ b/src/help_files/ieduplicates.sthlp @@ -1,5 +1,5 @@ {smcl} -{* 31 Jan 2019}{...} +{* 20 May 2019}{...} {hline} help for {hi:ieduplicates} {hline} @@ -19,7 +19,7 @@ command please see the {browse "https://dimewiki.worldbank.org/wiki/Ieduplicates {it:ID_varname} , {cmdab:fol:der(}{it:string}{cmd:)} {cmdab:unique:vars(}{it:varlist}{cmd:)} [{cmdab:keep:vars(}{it:varlist}{cmd:)} {cmdab:tostringok} {cmdab:droprest} -{cmdab:nodaily} {cmdab:suf:fix(}{it:string}{cmd:)} +{cmdab:nodaily} {cmdab:suf:fix(}{it:string}{cmd:)} {cmdab:duplistid(}{it:string}{cmd:)} {cmdab:datelisted(}{it:string}{cmd:)} {cmdab:datefixed(}{it:string}{cmd:)} {cmdab:correct(}{it:string}{cmd:)} {cmdab:drop(}{it:string}{cmd:)} {cmdab:newid(}{it:string}{cmd:)} @@ -155,10 +155,10 @@ option {cmdab:nodaily} is used. {title:Excel variable name options:} -{phang}{cmdab:duplistid(}{it:string}{cmd:)} {cmdab:datelisted(}{it:string}{cmd:)} -{cmdab:datefixed(}{it:string}{cmd:)} {cmdab:correct(}{it:string}{cmd:)} -{cmdab:drop(}{it:string}{cmd:)} {cmdab:newid(}{it:string}{cmd:)} -{cmdab:initials(}{it:string}{cmd:)} {cmdab:notes(}{it:string}{cmd:)} +{phang}{cmdab:duplistid(}{it:string}{cmd:)} {cmdab:datelisted(}{it:string}{cmd:)} +{cmdab:datefixed(}{it:string}{cmd:)} {cmdab:correct(}{it:string}{cmd:)} +{cmdab:drop(}{it:string}{cmd:)} {cmdab:newid(}{it:string}{cmd:)} +{cmdab:initials(}{it:string}{cmd:)} {cmdab:notes(}{it:string}{cmd:)} allow the user to set a unique name for each default variable names (e.g. {it:duplistid}, {it:datelisted}, etc.) in the Excel report spreadsheet. This is meant to be used when the variable name already exists in the dataset. To avoid error, the command offers a way to modify the variable name in the Excel Report spreadsheet. {p_end} diff --git a/src/help_files/iefieldkit.sthlp b/src/help_files/iefieldkit.sthlp index 559818b5..819fc423 100644 --- a/src/help_files/iefieldkit.sthlp +++ b/src/help_files/iefieldkit.sthlp @@ -1,5 +1,5 @@ {smcl} -{* 31 Jan 2019}{...} +{* 20 May 2019}{...} {hline} help for {hi:iefieldkit} {hline} diff --git a/src/help_files/ietestform.sthlp b/src/help_files/ietestform.sthlp index 3da2eddf..1bb76108 100644 --- a/src/help_files/ietestform.sthlp +++ b/src/help_files/ietestform.sthlp @@ -1,5 +1,5 @@ {smcl} -{* 31 Jan 2019}{...} +{* 20 May 2019}{...} {hline} help for {hi:ietestform} {hline} From 09f57cb34fd805cba9036ceacea11989165ca147 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristoffer=20Bj=C3=A4rkefur?= Date: Mon, 20 May 2019 16:40:23 -0400 Subject: [PATCH 38/39] Update version in iefieldkit local - #71 checkbox 4 --- src/ado_files/iefieldkit.ado | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ado_files/iefieldkit.ado b/src/ado_files/iefieldkit.ado index f676944c..180d8def 100644 --- a/src/ado_files/iefieldkit.ado +++ b/src/ado_files/iefieldkit.ado @@ -4,8 +4,8 @@ capture program drop iefieldkit program iefieldkit, rclass * UPDATE THESE LOCALS FOR EACH NEW VERSION PUBLISHED - local version "1.0" - local versionDate "31JAN2019" + local version "1.1" + local versionDate "20MAY2019" syntax [anything] From aca09b43aecfc4c2e89379b0921cb6d66c803c82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristoffer=20Bj=C3=A4rkefur?= Date: Mon, 20 May 2019 16:43:27 -0400 Subject: [PATCH 39/39] test script should not be part of source --- .../ieduplicates/ieduplicate_test_script.do | 0 {src/test_scripts => test_scripts}/mastertestfile.do | 0 {src/test_scripts => test_scripts}/testfile.do | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename {src/test_scripts => test_scripts}/ieduplicates/ieduplicate_test_script.do (100%) rename {src/test_scripts => test_scripts}/mastertestfile.do (100%) rename {src/test_scripts => test_scripts}/testfile.do (100%) diff --git a/src/test_scripts/ieduplicates/ieduplicate_test_script.do b/test_scripts/ieduplicates/ieduplicate_test_script.do similarity index 100% rename from src/test_scripts/ieduplicates/ieduplicate_test_script.do rename to test_scripts/ieduplicates/ieduplicate_test_script.do diff --git a/src/test_scripts/mastertestfile.do b/test_scripts/mastertestfile.do similarity index 100% rename from src/test_scripts/mastertestfile.do rename to test_scripts/mastertestfile.do diff --git a/src/test_scripts/testfile.do b/test_scripts/testfile.do similarity index 100% rename from src/test_scripts/testfile.do rename to test_scripts/testfile.do