forked from freelawproject/courtlistener
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request freelawproject#2821 from freelawproject/2499-opini…
…on-order 2499 Add Opinion Ordering
- Loading branch information
Showing
13 changed files
with
386 additions
and
37 deletions.
There are no files selected for viewing
115 changes: 115 additions & 0 deletions
115
cl/corpus_importer/management/commands/update_opinions_order.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
import argparse | ||
import time | ||
|
||
from django.db import transaction | ||
from django.db.models import Count | ||
|
||
from cl.lib.command_utils import VerboseCommand, logger | ||
from cl.search.models import SOURCES, Opinion, OpinionCluster | ||
|
||
|
||
def sort_harvard_opinions(options) -> None: | ||
"""Sort harvard opinions | ||
We assume that harvard data is already ordered, we just need to fill | ||
the order field in each opinion | ||
The harvard importer created the opinions in order of appearance in the file | ||
:param options: dict of arguments passed to the command | ||
:return: None | ||
""" | ||
|
||
skip_until = options.get("skip_until", None) | ||
limit = options.get("limit", None) | ||
|
||
# The filepath_json_harvard field can only be filled by the harvard importer, | ||
# this helps us confirm that it was imported from a Harvard json. We exclude | ||
# clusters merged with columbia because those may need some extra verification | ||
harvard_clusters = ( | ||
OpinionCluster.objects.exclude(filepath_json_harvard="") | ||
.prefetch_related("sub_opinions") | ||
.annotate(opinions_count=Count("sub_opinions")) | ||
.filter(opinions_count__gt=1) | ||
.exclude(source__contains=SOURCES.COLUMBIA_ARCHIVE) | ||
.order_by("id") | ||
) | ||
if skip_until: | ||
harvard_clusters = harvard_clusters.filter(pk__gte=skip_until) | ||
|
||
if limit: | ||
harvard_clusters = harvard_clusters[:limit] | ||
|
||
for cluster in harvard_clusters: | ||
logger.info(f"Processing cluster id: {cluster}") | ||
opinion_order = 1 | ||
any_update = False | ||
with transaction.atomic(): | ||
# We need to make sure they are ordered by id | ||
for cluster_op in cluster.sub_opinions.all().order_by("id"): | ||
if cluster_op.type == Opinion.COMBINED: | ||
continue | ||
cluster_op.ordering_key = opinion_order | ||
cluster_op.save() | ||
opinion_order = opinion_order + 1 | ||
any_update = True | ||
if not any_update: | ||
# We want to know if you found anything unexpected, like for example | ||
# only having combined opinions | ||
logger.info( | ||
f"No sub_opinions updated for cluster id: {cluster}" | ||
) | ||
continue | ||
logger.info(msg=f"Opinions reordered for cluster id: {cluster.id}") | ||
# Wait between each processed cluster to avoid issues with elastic | ||
time.sleep(options["delay"]) | ||
|
||
|
||
class Command(VerboseCommand): | ||
help = "Add ordering Key for sub opinions" | ||
|
||
def __init__(self, *args, **kwargs): | ||
super(Command, self).__init__(*args, **kwargs) | ||
|
||
def valid_actions(self, s): | ||
if s.lower() not in self.VALID_ACTIONS: | ||
raise argparse.ArgumentTypeError( | ||
"Unable to parse action. Valid actions are: %s" | ||
% (", ".join(self.VALID_ACTIONS.keys())) | ||
) | ||
|
||
return self.VALID_ACTIONS[s] | ||
|
||
def add_arguments(self, parser): | ||
parser.add_argument( | ||
"--skip-until", | ||
help="Specific cluster id to skip until", | ||
type=int, | ||
required=False, | ||
) | ||
parser.add_argument( | ||
"--limit", | ||
type=int, | ||
help="Number of files to sort", | ||
required=False, | ||
) | ||
parser.add_argument( | ||
"--action", | ||
type=self.valid_actions, | ||
required=True, | ||
help="The action you wish to take. Valid choices are: %s" | ||
% (", ".join(self.VALID_ACTIONS.keys())), | ||
) | ||
parser.add_argument( | ||
"--delay", | ||
type=float, | ||
default=0.2, | ||
help="How long to wait to update each opinion (in seconds, allows " | ||
"floating numbers).", | ||
) | ||
|
||
def handle(self, *args, **options): | ||
super().handle(*args, **options) | ||
options["action"](options) | ||
|
||
VALID_ACTIONS = {"sort-harvard": sort_harvard_opinions} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
# Generated by Django 5.0.7 on 2024-08-05 20:19 | ||
|
||
import pgtrigger.compiler | ||
import pgtrigger.migrations | ||
from django.db import migrations, models | ||
|
||
|
||
class Migration(migrations.Migration): | ||
dependencies = [ | ||
( | ||
"people_db", | ||
"0016_remove_abarating_update_or_delete_snapshot_update_and_more", | ||
), | ||
("search", "0032_update_docket_numbering_fields"), | ||
] | ||
|
||
operations = [ | ||
pgtrigger.migrations.RemoveTrigger( | ||
model_name="opinion", | ||
name="update_or_delete_snapshot_delete", | ||
), | ||
pgtrigger.migrations.RemoveTrigger( | ||
model_name="opinion", | ||
name="update_or_delete_snapshot_update", | ||
), | ||
migrations.AddField( | ||
model_name="opinion", | ||
name="ordering_key", | ||
field=models.IntegerField(blank=True, null=True), | ||
), | ||
migrations.AddField( | ||
model_name="opinionevent", | ||
name="ordering_key", | ||
field=models.IntegerField(blank=True, null=True), | ||
), | ||
pgtrigger.migrations.AddTrigger( | ||
model_name="opinion", | ||
trigger=pgtrigger.compiler.Trigger( | ||
name="update_or_delete_snapshot_update", | ||
sql=pgtrigger.compiler.UpsertTriggerSql( | ||
condition='WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."cluster_id" IS DISTINCT FROM (NEW."cluster_id") OR OLD."author_id" IS DISTINCT FROM (NEW."author_id") OR OLD."author_str" IS DISTINCT FROM (NEW."author_str") OR OLD."per_curiam" IS DISTINCT FROM (NEW."per_curiam") OR OLD."joined_by_str" IS DISTINCT FROM (NEW."joined_by_str") OR OLD."type" IS DISTINCT FROM (NEW."type") OR OLD."sha1" IS DISTINCT FROM (NEW."sha1") OR OLD."page_count" IS DISTINCT FROM (NEW."page_count") OR OLD."download_url" IS DISTINCT FROM (NEW."download_url") OR OLD."local_path" IS DISTINCT FROM (NEW."local_path") OR OLD."plain_text" IS DISTINCT FROM (NEW."plain_text") OR OLD."html" IS DISTINCT FROM (NEW."html") OR OLD."html_lawbox" IS DISTINCT FROM (NEW."html_lawbox") OR OLD."html_columbia" IS DISTINCT FROM (NEW."html_columbia") OR OLD."html_anon_2020" IS DISTINCT FROM (NEW."html_anon_2020") OR OLD."xml_harvard" IS DISTINCT FROM (NEW."xml_harvard") OR OLD."html_with_citations" IS DISTINCT FROM (NEW."html_with_citations") OR OLD."extracted_by_ocr" IS DISTINCT FROM (NEW."extracted_by_ocr") OR OLD."ordering_key" IS DISTINCT FROM (NEW."ordering_key"))', | ||
func='INSERT INTO "search_opinionevent" ("author_id", "author_str", "cluster_id", "date_created", "date_modified", "download_url", "extracted_by_ocr", "html", "html_anon_2020", "html_columbia", "html_lawbox", "html_with_citations", "id", "joined_by_str", "local_path", "ordering_key", "page_count", "per_curiam", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "type", "xml_harvard") VALUES (OLD."author_id", OLD."author_str", OLD."cluster_id", OLD."date_created", OLD."date_modified", OLD."download_url", OLD."extracted_by_ocr", OLD."html", OLD."html_anon_2020", OLD."html_columbia", OLD."html_lawbox", OLD."html_with_citations", OLD."id", OLD."joined_by_str", OLD."local_path", OLD."ordering_key", OLD."page_count", OLD."per_curiam", _pgh_attach_context(), NOW(), \'update_or_delete_snapshot\', OLD."id", OLD."plain_text", OLD."sha1", OLD."type", OLD."xml_harvard"); RETURN NULL;', | ||
hash="7137855274503cc2c50a17729f82e150d2b7d872", | ||
operation="UPDATE", | ||
pgid="pgtrigger_update_or_delete_snapshot_update_67ecd", | ||
table="search_opinion", | ||
when="AFTER", | ||
), | ||
), | ||
), | ||
pgtrigger.migrations.AddTrigger( | ||
model_name="opinion", | ||
trigger=pgtrigger.compiler.Trigger( | ||
name="update_or_delete_snapshot_delete", | ||
sql=pgtrigger.compiler.UpsertTriggerSql( | ||
func='INSERT INTO "search_opinionevent" ("author_id", "author_str", "cluster_id", "date_created", "date_modified", "download_url", "extracted_by_ocr", "html", "html_anon_2020", "html_columbia", "html_lawbox", "html_with_citations", "id", "joined_by_str", "local_path", "ordering_key", "page_count", "per_curiam", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "type", "xml_harvard") VALUES (OLD."author_id", OLD."author_str", OLD."cluster_id", OLD."date_created", OLD."date_modified", OLD."download_url", OLD."extracted_by_ocr", OLD."html", OLD."html_anon_2020", OLD."html_columbia", OLD."html_lawbox", OLD."html_with_citations", OLD."id", OLD."joined_by_str", OLD."local_path", OLD."ordering_key", OLD."page_count", OLD."per_curiam", _pgh_attach_context(), NOW(), \'update_or_delete_snapshot\', OLD."id", OLD."plain_text", OLD."sha1", OLD."type", OLD."xml_harvard"); RETURN NULL;', | ||
hash="98fb52aa60fd8e89a83f8f7ac77ba5892739fb37", | ||
operation="DELETE", | ||
pgid="pgtrigger_update_or_delete_snapshot_delete_1f4fd", | ||
table="search_opinion", | ||
when="AFTER", | ||
), | ||
), | ||
), | ||
migrations.AddConstraint( | ||
model_name="opinion", | ||
constraint=models.UniqueConstraint( | ||
fields=("cluster_id", "ordering_key"), | ||
name="unique_opinion_ordering_key", | ||
), | ||
), | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
BEGIN; | ||
-- | ||
-- Add field ordering_key to opinion | ||
-- | ||
ALTER TABLE "search_opinion" ADD COLUMN "ordering_key" integer NULL; | ||
-- | ||
-- Add field ordering_key to opinionevent | ||
-- | ||
ALTER TABLE "search_opinionevent" ADD COLUMN "ordering_key" integer NULL; | ||
-- | ||
-- Create constraint unique_opinion_ordering_key on model opinion | ||
-- | ||
ALTER TABLE "search_opinion" ADD CONSTRAINT "unique_opinion_ordering_key" UNIQUE ("cluster_id", "ordering_key"); | ||
COMMIT; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
BEGIN; | ||
-- | ||
-- Add field ordering_key to opinion | ||
-- | ||
ALTER TABLE "search_opinion" ADD COLUMN "ordering_key" integer NULL; | ||
-- | ||
-- Create constraint unique_opinion_ordering_key on model opinion | ||
-- | ||
ALTER TABLE "search_opinion" ADD CONSTRAINT "unique_opinion_ordering_key" UNIQUE ("cluster_id", "ordering_key"); | ||
COMMIT; |
Oops, something went wrong.