Skip to content

Commit

Permalink
Adds unit tests and fixes for the find duplicates check
Browse files Browse the repository at this point in the history
  • Loading branch information
fredkingham committed Nov 16, 2022
1 parent b5b9c0c commit 56eeda7
Show file tree
Hide file tree
Showing 2 changed files with 188 additions and 7 deletions.
18 changes: 11 additions & 7 deletions plugins/data_quality/checks/find_duplicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,7 @@ def find_exact_duplicates():
# something unexpected has happened.
raise ValueError(f'We have found {len(patients)} for hn {duplicate_hn}')
dup_patients.append(patients)
cleanable_duplicates, uncleanable_duplicates = calculate_deletable_undeletable()
cleanable_duplicates, uncleanable_duplicates = calculate_deletable_undeletable()
cleanable_duplicates, uncleanable_duplicates = calculate_deletable_undeletable(dup_patients)
email_title = f'{len(cleanable_duplicates) + len (uncleanable_duplicates)} Exact hospital number duplicates'
email_context = {
"title": email_title,
Expand Down Expand Up @@ -75,15 +74,18 @@ def find_zero_leading_duplicates():
without_zero_patients = list(Patient.objects.filter(
demographics__hospital_number=without_zero
))
if len(with_zero_patients) > 1 or len(without_zero_patient) > 1:
if len(with_zero_patients) > 1 or len(without_zero_patients) > 1:
# we expect these patients to be covered by the find_exact_duplicates
# check
continue
if without_zero_patients == 0:
if len(without_zero_patients) == 0:
# we have no patients with the zero stripped so lets continue
continue
dup_patients.append(with_zero[0], without_zero_patients[0])
cleanable_duplicates, uncleanable_duplicates = calculate_deletable_undeletable()
dup_patients.append((with_zero_patients[0], without_zero_patients[0],))
if len(dup_patients) == 0:
# if there are no duplicate patients, return
return
cleanable_duplicates, uncleanable_duplicates = calculate_deletable_undeletable(dup_patients)
email_title = f'{len(cleanable_duplicates) + len (uncleanable_duplicates)} Exact hospital number duplicates'
email_context = {
"title": email_title,
Expand Down Expand Up @@ -141,10 +143,12 @@ def calculate_deletable_undeletable(patient_tuples):
for patients in patient_tuples:
patient_1 = patients[0]
patient_1_native_records = has_records(patient_1)
patient_1_hn = patient_1.demographics_set.get().hospital_number
patient_2 = patients[1]
patient_2_native_records = has_records(patient_2)
patient_2_hn = patient_2.demographics_set.get().hospital_number
if len(patient_1_native_records) and len(patient_2_native_records):
logger.info(f'====== Unable to merge hn {duplicate_hn} as they both have duplicate records')
logger.info(f'====== Unable to merge hn {patient_1_hn}/{patient_2_hn} as they both have duplicate records')
logger.info(f'=== Patient 1 ({patient_1.id}) has:')
for record in patient_1_native_records:
logger.info(f'{record.__class__.__name__} {record.id}')
Expand Down
177 changes: 177 additions & 0 deletions plugins/data_quality/tests/test_find_duplicates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
from unittest import mock
from opal.core.test import OpalTestCase
from plugins.data_quality.checks import find_duplicates


@mock.patch('plugins.data_quality.utils.send_email')
class FindExactDuplicatesTestCase(OpalTestCase):
def test_finds_exact_cleanable_duplicates(self, send_email):
"""
Cleanable duplicates are when we can delete one without issue.
In this case only patient 1 has a subrecord so
patient 2 can be deleted.
"""
patient_1, episode = self.new_patient_and_episode_please()
patient_1.demographics_set.update(
hospital_number='111111',
)
diagnosis = episode.diagnosis_set.create()
diagnosis.condition = "Cough"
diagnosis.save()
patient_2, _ = self.new_patient_and_episode_please()
patient_2.demographics_set.update(
hospital_number='111111',
)
find_duplicates.find_exact_duplicates()
call_args = send_email.call_args[0]
self.assertEqual(
call_args[0], '1 Exact hospital number duplicates'
)
self.assertEqual(
call_args[2]["title"], '1 Exact hospital number duplicates'
)
self.assertEqual(
call_args[2]["uncleanable_duplicates"], []
)
self.assertEqual(
call_args[2]["cleanable_duplicates"], [(patient_2, patient_1,)]
)
self.assertEqual(
call_args[2]["uncleanable_duplicates"], []
)

def test_finds_exact_uncleanable_duplicates(self, send_email):
"""
Cleanable duplicates are when we can delete one without issue.
In this case only patient 1 has a subrecord so
patient 2 can be deleted.
"""
patient_1, episode_1 = self.new_patient_and_episode_please()
patient_1.demographics_set.update(
hospital_number='111111',
)
diagnosis = episode_1.diagnosis_set.create()
diagnosis.condition = "Cough"
diagnosis.save()
patient_2, episode_2 = self.new_patient_and_episode_please()
patient_2.demographics_set.update(
hospital_number='111111',
)
diagnosis = episode_2.diagnosis_set.create()
diagnosis.condition = "Fever"
diagnosis.save()
find_duplicates.find_exact_duplicates()
call_args = send_email.call_args[0]
self.assertEqual(
call_args[0], '1 Exact hospital number duplicates'
)
self.assertEqual(
call_args[2]["title"], '1 Exact hospital number duplicates'
)
self.assertEqual(
call_args[2]["cleanable_duplicates"], []
)
self.assertEqual(
call_args[2]["uncleanable_duplicates"], [(patient_1, patient_2,)]
)

def test_does_not_find_exact_duplicates(self, send_email):
patient_1, _ = self.new_patient_and_episode_please()
patient_1.demographics_set.update(
hospital_number='111111',
)
patient_2, _ = self.new_patient_and_episode_please()
patient_2.demographics_set.update(
hospital_number='22222222',
)
find_duplicates.find_exact_duplicates()
self.assertFalse(send_email.called)


@mock.patch('plugins.data_quality.utils.send_email')
class FindLeadingZeroDuplicatesTestCase(OpalTestCase):
def test_finds_leading_zero_cleanable_duplicates(self, send_email):
"""
Cleanable duplicates are when we can delete one without issue.
In this case only patient 1 has a subrecord so
patient 2 can be deleted.
"""
patient_1, episode = self.new_patient_and_episode_please()
patient_1.demographics_set.update(
hospital_number='111111',
)
diagnosis = episode.diagnosis_set.create()
diagnosis.condition = "Cough"
diagnosis.save()
patient_2, _ = self.new_patient_and_episode_please()
patient_2.demographics_set.update(
hospital_number='0111111',
)
find_duplicates.find_zero_leading_duplicates()
call_args = send_email.call_args[0]
self.assertEqual(
call_args[0], '1 Exact hospital number duplicates'
)
self.assertEqual(
call_args[2]["title"], '1 Exact hospital number duplicates'
)
self.assertEqual(
call_args[2]["uncleanable_duplicates"], []
)
self.assertEqual(
call_args[2]["cleanable_duplicates"], [(patient_2, patient_1,)]
)
self.assertEqual(
call_args[2]["uncleanable_duplicates"], []
)

def test_finds_leading_zero_uncleanable_duplicates(self, send_email):
"""
Cleanable duplicates are when we can delete one without issue.
In this case only patient 1 has a subrecord so
patient 2 can be deleted.
"""
patient_1, episode_1 = self.new_patient_and_episode_please()
patient_1.demographics_set.update(
hospital_number='111111',
)
diagnosis = episode_1.diagnosis_set.create()
diagnosis.condition = "Cough"
diagnosis.save()
patient_2, episode_2 = self.new_patient_and_episode_please()
patient_2.demographics_set.update(
hospital_number='0111111',
)
diagnosis = episode_2.diagnosis_set.create()
diagnosis.condition = "Fever"
diagnosis.save()
find_duplicates.find_zero_leading_duplicates()
call_args = send_email.call_args[0]
self.assertEqual(
call_args[0], '1 Exact hospital number duplicates'
)
self.assertEqual(
call_args[2]["title"], '1 Exact hospital number duplicates'
)
self.assertEqual(
call_args[2]["cleanable_duplicates"], []
)
self.assertEqual(
call_args[2]["uncleanable_duplicates"], [(patient_2,patient_1,)]
)

def test_does_not_find_leading_zero_duplicates(self, send_email):
patient_1, _ = self.new_patient_and_episode_please()
patient_1.demographics_set.update(
hospital_number='111111',
)
patient_2, _ = self.new_patient_and_episode_please()
patient_2.demographics_set.update(
hospital_number='022222222',
)
find_duplicates.find_zero_leading_duplicates()
self.assertFalse(send_email.called)

0 comments on commit 56eeda7

Please sign in to comment.