Skip to content

Commit

Permalink
Merge pull request freelawproject#2821 from freelawproject/2499-opini…
Browse files Browse the repository at this point in the history
…on-order

2499 Add Opinion Ordering
  • Loading branch information
mlissner authored Aug 7, 2024
2 parents 043c55f + 98bf786 commit c7d7078
Show file tree
Hide file tree
Showing 13 changed files with 386 additions and 37 deletions.
115 changes: 115 additions & 0 deletions cl/corpus_importer/management/commands/update_opinions_order.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
import argparse
import time

from django.db import transaction
from django.db.models import Count

from cl.lib.command_utils import VerboseCommand, logger
from cl.search.models import SOURCES, Opinion, OpinionCluster


def sort_harvard_opinions(options) -> None:
"""Sort harvard opinions
We assume that harvard data is already ordered, we just need to fill
the order field in each opinion
The harvard importer created the opinions in order of appearance in the file
:param options: dict of arguments passed to the command
:return: None
"""

skip_until = options.get("skip_until", None)
limit = options.get("limit", None)

# The filepath_json_harvard field can only be filled by the harvard importer,
# this helps us confirm that it was imported from a Harvard json. We exclude
# clusters merged with columbia because those may need some extra verification
harvard_clusters = (
OpinionCluster.objects.exclude(filepath_json_harvard="")
.prefetch_related("sub_opinions")
.annotate(opinions_count=Count("sub_opinions"))
.filter(opinions_count__gt=1)
.exclude(source__contains=SOURCES.COLUMBIA_ARCHIVE)
.order_by("id")
)
if skip_until:
harvard_clusters = harvard_clusters.filter(pk__gte=skip_until)

if limit:
harvard_clusters = harvard_clusters[:limit]

for cluster in harvard_clusters:
logger.info(f"Processing cluster id: {cluster}")
opinion_order = 1
any_update = False
with transaction.atomic():
# We need to make sure they are ordered by id
for cluster_op in cluster.sub_opinions.all().order_by("id"):
if cluster_op.type == Opinion.COMBINED:
continue
cluster_op.ordering_key = opinion_order
cluster_op.save()
opinion_order = opinion_order + 1
any_update = True
if not any_update:
# We want to know if you found anything unexpected, like for example
# only having combined opinions
logger.info(
f"No sub_opinions updated for cluster id: {cluster}"
)
continue
logger.info(msg=f"Opinions reordered for cluster id: {cluster.id}")
# Wait between each processed cluster to avoid issues with elastic
time.sleep(options["delay"])


class Command(VerboseCommand):
help = "Add ordering Key for sub opinions"

def __init__(self, *args, **kwargs):
super(Command, self).__init__(*args, **kwargs)

def valid_actions(self, s):
if s.lower() not in self.VALID_ACTIONS:
raise argparse.ArgumentTypeError(
"Unable to parse action. Valid actions are: %s"
% (", ".join(self.VALID_ACTIONS.keys()))
)

return self.VALID_ACTIONS[s]

def add_arguments(self, parser):
parser.add_argument(
"--skip-until",
help="Specific cluster id to skip until",
type=int,
required=False,
)
parser.add_argument(
"--limit",
type=int,
help="Number of files to sort",
required=False,
)
parser.add_argument(
"--action",
type=self.valid_actions,
required=True,
help="The action you wish to take. Valid choices are: %s"
% (", ".join(self.VALID_ACTIONS.keys())),
)
parser.add_argument(
"--delay",
type=float,
default=0.2,
help="How long to wait to update each opinion (in seconds, allows "
"floating numbers).",
)

def handle(self, *args, **options):
super().handle(*args, **options)
options["action"](options)

VALID_ACTIONS = {"sort-harvard": sort_harvard_opinions}
12 changes: 8 additions & 4 deletions cl/search/fixtures/functest_opinions.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@
"date_created": "2015-08-15T14:10:56.801Z",
"html_lawbox": "",
"per_curiam": false,
"type": "010combined"
"type": "010combined",
"ordering_key": null
},
"model": "search.opinion",
"pk": 10
Expand Down Expand Up @@ -134,7 +135,8 @@
"date_created": "2015-08-15T14:10:56.801Z",
"html_lawbox": "",
"per_curiam": false,
"type": "010combined"
"type": "010combined",
"ordering_key": null
},
"model": "search.opinion",
"pk": 11
Expand Down Expand Up @@ -184,7 +186,8 @@
"date_created": "2015-08-15T14:10:56.801Z",
"html_lawbox": "",
"per_curiam": false,
"type": "010combined"
"type": "010combined",
"ordering_key": null
},
"model": "search.opinion",
"pk": 12
Expand Down Expand Up @@ -254,7 +257,8 @@
"date_created": "2015-08-15T14:10:56.801Z",
"html_lawbox": "",
"per_curiam": false,
"type": "010combined"
"type": "010combined",
"ordering_key": null
},
"model": "search.opinion",
"pk": 12
Expand Down
6 changes: 4 additions & 2 deletions cl/search/fixtures/opinions-issue-412.json
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@
"date_created": "2015-08-15T14:10:56.801Z",
"html_lawbox": "",
"per_curiam": false,
"type": "010combined"
"type": "010combined",
"ordering_key": null
},
"model": "search.opinion",
"pk": 10
Expand Down Expand Up @@ -134,7 +135,8 @@
"date_created": "2015-08-15T14:10:56.801Z",
"html_lawbox": "",
"per_curiam": false,
"type": "010combined"
"type": "010combined",
"ordering_key": null
},
"model": "search.opinion",
"pk": 11
Expand Down
18 changes: 12 additions & 6 deletions cl/search/fixtures/test_objects_query_counts.json
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,8 @@
"date_created":"2015-08-15T14:10:56.801Z",
"html_lawbox":"",
"per_curiam":false,
"type":"020lead"
"type":"020lead",
"ordering_key": null
},
"model":"search.opinion",
"pk":1
Expand All @@ -324,7 +325,8 @@
"date_created":"2015-08-15T14:10:56.801Z",
"html_lawbox":"",
"per_curiam":false,
"type":"010combined"
"type":"010combined",
"ordering_key": null
},
"model":"search.opinion",
"pk":2
Expand All @@ -348,7 +350,8 @@
"date_created":"2015-08-15T14:10:56.801Z",
"html_lawbox":"",
"per_curiam":false,
"type":"010combined"
"type":"010combined",
"ordering_key": null
},
"model":"search.opinion",
"pk":3
Expand All @@ -371,7 +374,8 @@
"date_created":"2015-08-15T14:10:56.801Z",
"html_lawbox":"",
"per_curiam":false,
"type":"010combined"
"type":"010combined",
"ordering_key": null
},
"model":"search.opinion",
"pk":4
Expand All @@ -395,7 +399,8 @@
"date_created":"2015-08-15T14:10:56.801Z",
"html_lawbox":"",
"per_curiam":false,
"type":"010combined"
"type":"010combined",
"ordering_key": null
},
"model":"search.opinion",
"pk":5
Expand All @@ -418,7 +423,8 @@
"date_created":"2015-08-15T14:10:56.801Z",
"html_lawbox":"",
"per_curiam":false,
"type":"010combined"
"type":"010combined",
"ordering_key": null
},
"model":"search.opinion",
"pk":6
Expand Down
18 changes: 12 additions & 6 deletions cl/search/fixtures/test_objects_search.json
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,8 @@
"date_created": "2015-08-15T14:10:56.801Z",
"html_lawbox": "",
"per_curiam": false,
"type": "020lead"
"type": "020lead",
"ordering_key": null
},
"model": "search.opinion",
"pk": 1
Expand All @@ -261,7 +262,8 @@
"date_created": "2015-08-15T14:10:56.801Z",
"html_lawbox": "",
"per_curiam": false,
"type": "010combined"
"type": "010combined",
"ordering_key": null
},
"model": "search.opinion",
"pk": 2
Expand All @@ -283,7 +285,8 @@
"date_created": "2015-08-15T14:10:56.801Z",
"html_lawbox": "",
"per_curiam": false,
"type": "010combined"
"type": "010combined",
"ordering_key": null
},
"model": "search.opinion",
"pk": 3
Expand All @@ -305,7 +308,8 @@
"date_created": "2015-08-15T14:10:56.801Z",
"html_lawbox": "",
"per_curiam": false,
"type": "010combined"
"type": "010combined",
"ordering_key": null
},
"model": "search.opinion",
"pk": 4
Expand All @@ -327,7 +331,8 @@
"date_created": "2015-08-15T14:10:56.801Z",
"html_lawbox": "",
"per_curiam": false,
"type": "010combined"
"type": "010combined",
"ordering_key": null
},
"model": "search.opinion",
"pk": 5
Expand All @@ -349,7 +354,8 @@
"date_created": "2015-08-15T14:10:56.801Z",
"html_lawbox": "",
"per_curiam": false,
"type": "010combined"
"type": "010combined",
"ordering_key": null
},
"model": "search.opinion",
"pk": 6
Expand Down
72 changes: 72 additions & 0 deletions cl/search/migrations/0033_order_opinions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# Generated by Django 5.0.7 on 2024-08-05 20:19

import pgtrigger.compiler
import pgtrigger.migrations
from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
(
"people_db",
"0016_remove_abarating_update_or_delete_snapshot_update_and_more",
),
("search", "0032_update_docket_numbering_fields"),
]

operations = [
pgtrigger.migrations.RemoveTrigger(
model_name="opinion",
name="update_or_delete_snapshot_delete",
),
pgtrigger.migrations.RemoveTrigger(
model_name="opinion",
name="update_or_delete_snapshot_update",
),
migrations.AddField(
model_name="opinion",
name="ordering_key",
field=models.IntegerField(blank=True, null=True),
),
migrations.AddField(
model_name="opinionevent",
name="ordering_key",
field=models.IntegerField(blank=True, null=True),
),
pgtrigger.migrations.AddTrigger(
model_name="opinion",
trigger=pgtrigger.compiler.Trigger(
name="update_or_delete_snapshot_update",
sql=pgtrigger.compiler.UpsertTriggerSql(
condition='WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."cluster_id" IS DISTINCT FROM (NEW."cluster_id") OR OLD."author_id" IS DISTINCT FROM (NEW."author_id") OR OLD."author_str" IS DISTINCT FROM (NEW."author_str") OR OLD."per_curiam" IS DISTINCT FROM (NEW."per_curiam") OR OLD."joined_by_str" IS DISTINCT FROM (NEW."joined_by_str") OR OLD."type" IS DISTINCT FROM (NEW."type") OR OLD."sha1" IS DISTINCT FROM (NEW."sha1") OR OLD."page_count" IS DISTINCT FROM (NEW."page_count") OR OLD."download_url" IS DISTINCT FROM (NEW."download_url") OR OLD."local_path" IS DISTINCT FROM (NEW."local_path") OR OLD."plain_text" IS DISTINCT FROM (NEW."plain_text") OR OLD."html" IS DISTINCT FROM (NEW."html") OR OLD."html_lawbox" IS DISTINCT FROM (NEW."html_lawbox") OR OLD."html_columbia" IS DISTINCT FROM (NEW."html_columbia") OR OLD."html_anon_2020" IS DISTINCT FROM (NEW."html_anon_2020") OR OLD."xml_harvard" IS DISTINCT FROM (NEW."xml_harvard") OR OLD."html_with_citations" IS DISTINCT FROM (NEW."html_with_citations") OR OLD."extracted_by_ocr" IS DISTINCT FROM (NEW."extracted_by_ocr") OR OLD."ordering_key" IS DISTINCT FROM (NEW."ordering_key"))',
func='INSERT INTO "search_opinionevent" ("author_id", "author_str", "cluster_id", "date_created", "date_modified", "download_url", "extracted_by_ocr", "html", "html_anon_2020", "html_columbia", "html_lawbox", "html_with_citations", "id", "joined_by_str", "local_path", "ordering_key", "page_count", "per_curiam", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "type", "xml_harvard") VALUES (OLD."author_id", OLD."author_str", OLD."cluster_id", OLD."date_created", OLD."date_modified", OLD."download_url", OLD."extracted_by_ocr", OLD."html", OLD."html_anon_2020", OLD."html_columbia", OLD."html_lawbox", OLD."html_with_citations", OLD."id", OLD."joined_by_str", OLD."local_path", OLD."ordering_key", OLD."page_count", OLD."per_curiam", _pgh_attach_context(), NOW(), \'update_or_delete_snapshot\', OLD."id", OLD."plain_text", OLD."sha1", OLD."type", OLD."xml_harvard"); RETURN NULL;',
hash="7137855274503cc2c50a17729f82e150d2b7d872",
operation="UPDATE",
pgid="pgtrigger_update_or_delete_snapshot_update_67ecd",
table="search_opinion",
when="AFTER",
),
),
),
pgtrigger.migrations.AddTrigger(
model_name="opinion",
trigger=pgtrigger.compiler.Trigger(
name="update_or_delete_snapshot_delete",
sql=pgtrigger.compiler.UpsertTriggerSql(
func='INSERT INTO "search_opinionevent" ("author_id", "author_str", "cluster_id", "date_created", "date_modified", "download_url", "extracted_by_ocr", "html", "html_anon_2020", "html_columbia", "html_lawbox", "html_with_citations", "id", "joined_by_str", "local_path", "ordering_key", "page_count", "per_curiam", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "type", "xml_harvard") VALUES (OLD."author_id", OLD."author_str", OLD."cluster_id", OLD."date_created", OLD."date_modified", OLD."download_url", OLD."extracted_by_ocr", OLD."html", OLD."html_anon_2020", OLD."html_columbia", OLD."html_lawbox", OLD."html_with_citations", OLD."id", OLD."joined_by_str", OLD."local_path", OLD."ordering_key", OLD."page_count", OLD."per_curiam", _pgh_attach_context(), NOW(), \'update_or_delete_snapshot\', OLD."id", OLD."plain_text", OLD."sha1", OLD."type", OLD."xml_harvard"); RETURN NULL;',
hash="98fb52aa60fd8e89a83f8f7ac77ba5892739fb37",
operation="DELETE",
pgid="pgtrigger_update_or_delete_snapshot_delete_1f4fd",
table="search_opinion",
when="AFTER",
),
),
),
migrations.AddConstraint(
model_name="opinion",
constraint=models.UniqueConstraint(
fields=("cluster_id", "ordering_key"),
name="unique_opinion_ordering_key",
),
),
]
14 changes: 14 additions & 0 deletions cl/search/migrations/0033_order_opinions.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
BEGIN;
--
-- Add field ordering_key to opinion
--
ALTER TABLE "search_opinion" ADD COLUMN "ordering_key" integer NULL;
--
-- Add field ordering_key to opinionevent
--
ALTER TABLE "search_opinionevent" ADD COLUMN "ordering_key" integer NULL;
--
-- Create constraint unique_opinion_ordering_key on model opinion
--
ALTER TABLE "search_opinion" ADD CONSTRAINT "unique_opinion_ordering_key" UNIQUE ("cluster_id", "ordering_key");
COMMIT;
10 changes: 10 additions & 0 deletions cl/search/migrations/0033_order_opinions_customers.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
BEGIN;
--
-- Add field ordering_key to opinion
--
ALTER TABLE "search_opinion" ADD COLUMN "ordering_key" integer NULL;
--
-- Create constraint unique_opinion_ordering_key on model opinion
--
ALTER TABLE "search_opinion" ADD CONSTRAINT "unique_opinion_ordering_key" UNIQUE ("cluster_id", "ordering_key");
COMMIT;
Loading

0 comments on commit c7d7078

Please sign in to comment.