From b701bc05516840bf9eb1793f304674c4085595e5 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Thu, 15 Jun 2023 14:05:55 -0400
Subject: [PATCH 01/50] feat(search.models): Add django-ordered-model

Add django-ordered-model
Add django-ordered-model to Opinions
Update poetry
---
 cl/search/migrations/0019_order_opinions.py  |  71 ++++++++++
 cl/search/migrations/0019_order_opinions.sql | 129 +++++++++++++++++++
 cl/search/models.py                          |   4 +-
 cl/settings/django.py                        |   1 +
 poetry.lock                                  |  13 +-
 pyproject.toml                               |   1 +
 6 files changed, 217 insertions(+), 2 deletions(-)
 create mode 100644 cl/search/migrations/0019_order_opinions.py
 create mode 100644 cl/search/migrations/0019_order_opinions.sql

diff --git a/cl/search/migrations/0019_order_opinions.py b/cl/search/migrations/0019_order_opinions.py
new file mode 100644
index 0000000000..5e446056cc
--- /dev/null
+++ b/cl/search/migrations/0019_order_opinions.py
@@ -0,0 +1,71 @@
+# Generated by Django 4.2.1 on 2023-06-15 17:56
+
+from django.db import migrations, models
+import pgtrigger.compiler
+import pgtrigger.migrations
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("search", "0018_update_cluster_model"),
+    ]
+
+    operations = [
+        migrations.AlterModelOptions(
+            name="opinion",
+            options={"ordering": ("order",)},
+        ),
+        pgtrigger.migrations.RemoveTrigger(
+            model_name="opinion",
+            name="update_or_delete_snapshot_delete",
+        ),
+        pgtrigger.migrations.RemoveTrigger(
+            model_name="opinion",
+            name="update_or_delete_snapshot_update",
+        ),
+        migrations.AddField(
+            model_name="opinion",
+            name="order",
+            field=models.PositiveIntegerField(
+                db_index=True, default=1, editable=False, verbose_name="order"
+            ),
+            preserve_default=False,
+        ),
+        migrations.AddField(
+            model_name="opinionevent",
+            name="order",
+            field=models.PositiveIntegerField(
+                default=1, editable=False, verbose_name="order"
+            ),
+            preserve_default=False,
+        ),
+        pgtrigger.migrations.AddTrigger(
+            model_name="opinion",
+            trigger=pgtrigger.compiler.Trigger(
+                name="update_or_delete_snapshot_update",
+                sql=pgtrigger.compiler.UpsertTriggerSql(
+                    condition='WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."order" IS DISTINCT FROM (NEW."order") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."cluster_id" IS DISTINCT FROM (NEW."cluster_id") OR OLD."author_id" IS DISTINCT FROM (NEW."author_id") OR OLD."author_str" IS DISTINCT FROM (NEW."author_str") OR OLD."per_curiam" IS DISTINCT FROM (NEW."per_curiam") OR OLD."joined_by_str" IS DISTINCT FROM (NEW."joined_by_str") OR OLD."type" IS DISTINCT FROM (NEW."type") OR OLD."sha1" IS DISTINCT FROM (NEW."sha1") OR OLD."page_count" IS DISTINCT FROM (NEW."page_count") OR OLD."download_url" IS DISTINCT FROM (NEW."download_url") OR OLD."local_path" IS DISTINCT FROM (NEW."local_path") OR OLD."plain_text" IS DISTINCT FROM (NEW."plain_text") OR OLD."html" IS DISTINCT FROM (NEW."html") OR OLD."html_lawbox" IS DISTINCT FROM (NEW."html_lawbox") OR OLD."html_columbia" IS DISTINCT FROM (NEW."html_columbia") OR OLD."html_anon_2020" IS DISTINCT FROM (NEW."html_anon_2020") OR OLD."xml_harvard" IS DISTINCT FROM (NEW."xml_harvard") OR OLD."html_with_citations" IS DISTINCT FROM (NEW."html_with_citations") OR OLD."extracted_by_ocr" IS DISTINCT FROM (NEW."extracted_by_ocr"))',
+                    func='INSERT INTO "search_opinionevent" ("author_id", "author_str", "cluster_id", "date_created", "date_modified", "download_url", "extracted_by_ocr", "html", "html_anon_2020", "html_columbia", "html_lawbox", "html_with_citations", "id", "joined_by_str", "local_path", "order", "page_count", "per_curiam", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "type", "xml_harvard") VALUES (OLD."author_id", OLD."author_str", OLD."cluster_id", OLD."date_created", OLD."date_modified", OLD."download_url", OLD."extracted_by_ocr", OLD."html", OLD."html_anon_2020", OLD."html_columbia", OLD."html_lawbox", OLD."html_with_citations", OLD."id", OLD."joined_by_str", OLD."local_path", OLD."order", OLD."page_count", OLD."per_curiam", _pgh_attach_context(), NOW(), \'update_or_delete_snapshot\', OLD."id", OLD."plain_text", OLD."sha1", OLD."type", OLD."xml_harvard"); RETURN NULL;',
+                    hash="bcac41027f469bbd394e8671cb0b2fa33e7035f3",
+                    operation="UPDATE",
+                    pgid="pgtrigger_update_or_delete_snapshot_update_67ecd",
+                    table="search_opinion",
+                    when="AFTER",
+                ),
+            ),
+        ),
+        pgtrigger.migrations.AddTrigger(
+            model_name="opinion",
+            trigger=pgtrigger.compiler.Trigger(
+                name="update_or_delete_snapshot_delete",
+                sql=pgtrigger.compiler.UpsertTriggerSql(
+                    func='INSERT INTO "search_opinionevent" ("author_id", "author_str", "cluster_id", "date_created", "date_modified", "download_url", "extracted_by_ocr", "html", "html_anon_2020", "html_columbia", "html_lawbox", "html_with_citations", "id", "joined_by_str", "local_path", "order", "page_count", "per_curiam", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "type", "xml_harvard") VALUES (OLD."author_id", OLD."author_str", OLD."cluster_id", OLD."date_created", OLD."date_modified", OLD."download_url", OLD."extracted_by_ocr", OLD."html", OLD."html_anon_2020", OLD."html_columbia", OLD."html_lawbox", OLD."html_with_citations", OLD."id", OLD."joined_by_str", OLD."local_path", OLD."order", OLD."page_count", OLD."per_curiam", _pgh_attach_context(), NOW(), \'update_or_delete_snapshot\', OLD."id", OLD."plain_text", OLD."sha1", OLD."type", OLD."xml_harvard"); RETURN NULL;',
+                    hash="79bebd7cda3c6ed3bc40f28799cf9c0f2638e2ad",
+                    operation="DELETE",
+                    pgid="pgtrigger_update_or_delete_snapshot_delete_1f4fd",
+                    table="search_opinion",
+                    when="AFTER",
+                ),
+            ),
+        ),
+    ]
diff --git a/cl/search/migrations/0019_order_opinions.sql b/cl/search/migrations/0019_order_opinions.sql
new file mode 100644
index 0000000000..3226cb510b
--- /dev/null
+++ b/cl/search/migrations/0019_order_opinions.sql
@@ -0,0 +1,129 @@
+BEGIN;
+--
+-- Change Meta options on opinion
+--
+-- (no-op)
+--
+-- Remove trigger update_or_delete_snapshot_delete from model opinion
+--
+DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_delete_1f4fd ON "search_opinion";
+--
+-- Remove trigger update_or_delete_snapshot_update from model opinion
+--
+DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_update_67ecd ON "search_opinion";
+--
+-- Add field order to opinion
+--
+ALTER TABLE "search_opinion" ADD COLUMN "order" integer DEFAULT 1 NOT NULL CHECK ("order" >= 0);
+ALTER TABLE "search_opinion" ALTER COLUMN "order" DROP DEFAULT;
+--
+-- Add field order to opinionevent
+--
+ALTER TABLE "search_opinionevent" ADD COLUMN "order" integer DEFAULT 1 NOT NULL CHECK ("order" >= 0);
+ALTER TABLE "search_opinionevent" ALTER COLUMN "order" DROP DEFAULT;
+--
+-- Create trigger update_or_delete_snapshot_update on model opinion
+--
+
+            CREATE OR REPLACE FUNCTION "public"._pgtrigger_should_ignore(
+                trigger_name NAME
+            )
+            RETURNS BOOLEAN AS $$
+                DECLARE
+                    _pgtrigger_ignore TEXT[];
+                    _result BOOLEAN;
+                BEGIN
+                    BEGIN
+                        SELECT INTO _pgtrigger_ignore
+                            CURRENT_SETTING('pgtrigger.ignore');
+                        EXCEPTION WHEN OTHERS THEN
+                    END;
+                    IF _pgtrigger_ignore IS NOT NULL THEN
+                        SELECT trigger_name = ANY(_pgtrigger_ignore)
+                        INTO _result;
+                        RETURN _result;
+                    ELSE
+                        RETURN FALSE;
+                    END IF;
+                END;
+            $$ LANGUAGE plpgsql;
+
+            CREATE OR REPLACE FUNCTION pgtrigger_update_or_delete_snapshot_update_67ecd()
+            RETURNS TRIGGER AS $$
+
+                BEGIN
+                    IF ("public"._pgtrigger_should_ignore(TG_NAME) IS TRUE) THEN
+                        IF (TG_OP = 'DELETE') THEN
+                            RETURN OLD;
+                        ELSE
+                            RETURN NEW;
+                        END IF;
+                    END IF;
+                    INSERT INTO "search_opinionevent" ("author_id", "author_str", "cluster_id", "date_created", "date_modified", "download_url", "extracted_by_ocr", "html", "html_anon_2020", "html_columbia", "html_lawbox", "html_with_citations", "id", "joined_by_str", "local_path", "order", "page_count", "per_curiam", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "type", "xml_harvard") VALUES (OLD."author_id", OLD."author_str", OLD."cluster_id", OLD."date_created", OLD."date_modified", OLD."download_url", OLD."extracted_by_ocr", OLD."html", OLD."html_anon_2020", OLD."html_columbia", OLD."html_lawbox", OLD."html_with_citations", OLD."id", OLD."joined_by_str", OLD."local_path", OLD."order", OLD."page_count", OLD."per_curiam", _pgh_attach_context(), NOW(), 'update_or_delete_snapshot', OLD."id", OLD."plain_text", OLD."sha1", OLD."type", OLD."xml_harvard"); RETURN NULL;
+                END;
+            $$ LANGUAGE plpgsql;
+
+            DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_update_67ecd ON "search_opinion";
+            CREATE  TRIGGER pgtrigger_update_or_delete_snapshot_update_67ecd
+                AFTER UPDATE ON "search_opinion"
+
+
+                FOR EACH ROW WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."order" IS DISTINCT FROM (NEW."order") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."cluster_id" IS DISTINCT FROM (NEW."cluster_id") OR OLD."author_id" IS DISTINCT FROM (NEW."author_id") OR OLD."author_str" IS DISTINCT FROM (NEW."author_str") OR OLD."per_curiam" IS DISTINCT FROM (NEW."per_curiam") OR OLD."joined_by_str" IS DISTINCT FROM (NEW."joined_by_str") OR OLD."type" IS DISTINCT FROM (NEW."type") OR OLD."sha1" IS DISTINCT FROM (NEW."sha1") OR OLD."page_count" IS DISTINCT FROM (NEW."page_count") OR OLD."download_url" IS DISTINCT FROM (NEW."download_url") OR OLD."local_path" IS DISTINCT FROM (NEW."local_path") OR OLD."plain_text" IS DISTINCT FROM (NEW."plain_text") OR OLD."html" IS DISTINCT FROM (NEW."html") OR OLD."html_lawbox" IS DISTINCT FROM (NEW."html_lawbox") OR OLD."html_columbia" IS DISTINCT FROM (NEW."html_columbia") OR OLD."html_anon_2020" IS DISTINCT FROM (NEW."html_anon_2020") OR OLD."xml_harvard" IS DISTINCT FROM (NEW."xml_harvard") OR OLD."html_with_citations" IS DISTINCT FROM (NEW."html_with_citations") OR OLD."extracted_by_ocr" IS DISTINCT FROM (NEW."extracted_by_ocr"))
+                EXECUTE PROCEDURE pgtrigger_update_or_delete_snapshot_update_67ecd();
+
+            COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_update_67ecd ON "search_opinion" IS 'bcac41027f469bbd394e8671cb0b2fa33e7035f3';
+
+--
+-- Create trigger update_or_delete_snapshot_delete on model opinion
+--
+
+            CREATE OR REPLACE FUNCTION "public"._pgtrigger_should_ignore(
+                trigger_name NAME
+            )
+            RETURNS BOOLEAN AS $$
+                DECLARE
+                    _pgtrigger_ignore TEXT[];
+                    _result BOOLEAN;
+                BEGIN
+                    BEGIN
+                        SELECT INTO _pgtrigger_ignore
+                            CURRENT_SETTING('pgtrigger.ignore');
+                        EXCEPTION WHEN OTHERS THEN
+                    END;
+                    IF _pgtrigger_ignore IS NOT NULL THEN
+                        SELECT trigger_name = ANY(_pgtrigger_ignore)
+                        INTO _result;
+                        RETURN _result;
+                    ELSE
+                        RETURN FALSE;
+                    END IF;
+                END;
+            $$ LANGUAGE plpgsql;
+
+            CREATE OR REPLACE FUNCTION pgtrigger_update_or_delete_snapshot_delete_1f4fd()
+            RETURNS TRIGGER AS $$
+
+                BEGIN
+                    IF ("public"._pgtrigger_should_ignore(TG_NAME) IS TRUE) THEN
+                        IF (TG_OP = 'DELETE') THEN
+                            RETURN OLD;
+                        ELSE
+                            RETURN NEW;
+                        END IF;
+                    END IF;
+                    INSERT INTO "search_opinionevent" ("author_id", "author_str", "cluster_id", "date_created", "date_modified", "download_url", "extracted_by_ocr", "html", "html_anon_2020", "html_columbia", "html_lawbox", "html_with_citations", "id", "joined_by_str", "local_path", "order", "page_count", "per_curiam", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "type", "xml_harvard") VALUES (OLD."author_id", OLD."author_str", OLD."cluster_id", OLD."date_created", OLD."date_modified", OLD."download_url", OLD."extracted_by_ocr", OLD."html", OLD."html_anon_2020", OLD."html_columbia", OLD."html_lawbox", OLD."html_with_citations", OLD."id", OLD."joined_by_str", OLD."local_path", OLD."order", OLD."page_count", OLD."per_curiam", _pgh_attach_context(), NOW(), 'update_or_delete_snapshot', OLD."id", OLD."plain_text", OLD."sha1", OLD."type", OLD."xml_harvard"); RETURN NULL;
+                END;
+            $$ LANGUAGE plpgsql;
+
+            DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_delete_1f4fd ON "search_opinion";
+            CREATE  TRIGGER pgtrigger_update_or_delete_snapshot_delete_1f4fd
+                AFTER DELETE ON "search_opinion"
+
+
+                FOR EACH ROW
+                EXECUTE PROCEDURE pgtrigger_update_or_delete_snapshot_delete_1f4fd();
+
+            COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_delete_1f4fd ON "search_opinion" IS '79bebd7cda3c6ed3bc40f28799cf9c0f2638e2ad';
+
+CREATE INDEX "search_opinion_order_d54dd126" ON "search_opinion" ("order");
+COMMIT;
diff --git a/cl/search/models.py b/cl/search/models.py
index d04587edef..fc6aa75414 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -14,6 +14,7 @@
 from django.utils.encoding import force_str
 from django.utils.text import slugify
 from eyecite import get_citations
+from ordered_model.models import OrderedModel
 
 from cl.citations.utils import get_citation_depth_between_clusters
 from cl.custom_filters.templatetags.text_filters import best_case_name
@@ -2815,7 +2816,7 @@ def sort_cites(c):
 
 
 @pghistory.track(AfterUpdateOrDeleteSnapshot())
-class Opinion(AbstractDateTimeModel):
+class Opinion(OrderedModel, AbstractDateTimeModel):
     COMBINED = "010combined"
     UNANIMOUS = "015unamimous"
     LEAD = "020lead"
@@ -2965,6 +2966,7 @@ class Opinion(AbstractDateTimeModel):
         default=False,
         db_index=True,
     )
+    order_with_respect_to = "cluster"
 
     @property
     def siblings(self) -> QuerySet:
diff --git a/cl/settings/django.py b/cl/settings/django.py
index 21b1ba4a7c..a522d824df 100644
--- a/cl/settings/django.py
+++ b/cl/settings/django.py
@@ -162,6 +162,7 @@
     "admin_cursor_paginator",
     "pghistory",
     "pgtrigger",
+    "ordered_model",
     # CourtListener Apps
     "cl.alerts",
     "cl.audio",
diff --git a/poetry.lock b/poetry.lock
index 7f91780e6a..b8f82b7eed 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1054,6 +1054,17 @@ files = [
     {file = "django_mathfilters-1.0.0-py3-none-any.whl", hash = "sha256:64200a21bb249fbf27be601d4bbb788779e09c6e063170c097cd82c4d18ebb83"},
 ]
 
+[[package]]
+name = "django-ordered-model"
+version = "3.7.4"
+description = "Allows Django models to be ordered and provides a simple admin interface for reordering them."
+optional = false
+python-versions = "*"
+files = [
+    {file = "django-ordered-model-3.7.4.tar.gz", hash = "sha256:f258b9762525c00a53009e82f8b8bf2a3aa315e8b453e281e8fdbbfe2b8cb3ba"},
+    {file = "django_ordered_model-3.7.4-py3-none-any.whl", hash = "sha256:dfcd3183fe0749dad1c9971cba1d6240ce7328742a30ddc92feca41107bb241d"},
+]
+
 [[package]]
 name = "django-override-storage"
 version = "0.3.2"
@@ -4576,4 +4587,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.11, <3.12"
-content-hash = "674af32861e1e5bf9c31401f02a3af0b698be8b60b9492cd89ab5464218efd3e"
+content-hash = "2b4d76ce134a241162a25c9634a4f9fdbf140d261750fdfca63a87ccbac4fcfd"
diff --git a/pyproject.toml b/pyproject.toml
index 91020cf1e0..ee00cd8366 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -108,6 +108,7 @@ types-dateparser = "^1.1.4.6"
 juriscraper = "^2.5.49"
 uvicorn = {extras = ["standard"], version = "^0.22.0"}
 daphne = "^4.0.0"
+django-ordered-model = "^3.7.4"
 
 
 [tool.poetry.group.dev.dependencies]

From 6cf0d7581be8241eda3d0b8b4a46833efb7de979 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Thu, 15 Jun 2023 14:57:38 -0400
Subject: [PATCH 02/50] fix(tests): Update fixtures for opinion model

---
 cl/search/fixtures/functest_opinions.json     | 12 +++--
 cl/search/fixtures/opinions-issue-412.json    |  6 ++-
 cl/search/fixtures/opinions-issue-550.json    |  6 ++-
 cl/search/fixtures/test_objects_search.json   | 18 ++++---
 .../fixtures/api_scotus_map_data.json         |  6 ++-
 .../fixtures/scotus_map_data.json             | 51 ++++++++++++-------
 6 files changed, 66 insertions(+), 33 deletions(-)

diff --git a/cl/search/fixtures/functest_opinions.json b/cl/search/fixtures/functest_opinions.json
index e4fa89a260..45f5f0b759 100644
--- a/cl/search/fixtures/functest_opinions.json
+++ b/cl/search/fixtures/functest_opinions.json
@@ -64,7 +64,8 @@
       "date_created": "2015-08-15T14:10:56.801Z",
       "html_lawbox": "",
       "per_curiam": false,
-      "type": "010combined"
+      "type": "010combined",
+      "order": 1
     },
     "model": "search.opinion",
     "pk": 10
@@ -134,7 +135,8 @@
       "date_created": "2015-08-15T14:10:56.801Z",
       "html_lawbox": "",
       "per_curiam": false,
-      "type": "010combined"
+      "type": "010combined",
+      "order": 1
     },
     "model": "search.opinion",
     "pk": 11
@@ -184,7 +186,8 @@
       "date_created": "2015-08-15T14:10:56.801Z",
       "html_lawbox": "",
       "per_curiam": false,
-      "type": "010combined"
+      "type": "010combined",
+      "order": 1
     },
     "model": "search.opinion",
     "pk": 12
@@ -254,7 +257,8 @@
       "date_created": "2015-08-15T14:10:56.801Z",
       "html_lawbox": "",
       "per_curiam": false,
-      "type": "010combined"
+      "type": "010combined",
+      "order": 1
     },
     "model": "search.opinion",
     "pk": 12
diff --git a/cl/search/fixtures/opinions-issue-412.json b/cl/search/fixtures/opinions-issue-412.json
index ca6ac33971..2e429ebecf 100644
--- a/cl/search/fixtures/opinions-issue-412.json
+++ b/cl/search/fixtures/opinions-issue-412.json
@@ -64,7 +64,8 @@
       "date_created": "2015-08-15T14:10:56.801Z",
       "html_lawbox": "",
       "per_curiam": false,
-      "type": "010combined"
+      "type": "010combined",
+      "order": 1
     },
     "model": "search.opinion",
     "pk": 10
@@ -134,7 +135,8 @@
       "date_created": "2015-08-15T14:10:56.801Z",
       "html_lawbox": "",
       "per_curiam": false,
-      "type": "010combined"
+      "type": "010combined",
+      "order": 1
     },
     "model": "search.opinion",
     "pk": 11
diff --git a/cl/search/fixtures/opinions-issue-550.json b/cl/search/fixtures/opinions-issue-550.json
index b0163eb8f8..829a94c7d2 100644
--- a/cl/search/fixtures/opinions-issue-550.json
+++ b/cl/search/fixtures/opinions-issue-550.json
@@ -64,7 +64,8 @@
       "date_created": "2015-08-15T14:10:56.801Z",
       "html_lawbox": "",
       "per_curiam": false,
-      "type": "010combined"
+      "type": "010combined",
+      "order": 1
     },
     "model": "search.opinion",
     "pk": 10
@@ -86,7 +87,8 @@
       "date_created": "2015-08-15T14:10:56.801Z",
       "html_lawbox": "",
       "per_curiam": false,
-      "type": "020lead"
+      "type": "020lead",
+      "order": 1
     },
     "model": "search.opinion",
     "pk": 11
diff --git a/cl/search/fixtures/test_objects_search.json b/cl/search/fixtures/test_objects_search.json
index 2255c7edcf..9fddb84fca 100644
--- a/cl/search/fixtures/test_objects_search.json
+++ b/cl/search/fixtures/test_objects_search.json
@@ -239,7 +239,8 @@
       "date_created": "2015-08-15T14:10:56.801Z",
       "html_lawbox": "",
       "per_curiam": false,
-      "type": "020lead"
+      "type": "020lead",
+      "order": 1
     },
     "model": "search.opinion",
     "pk": 1
@@ -261,7 +262,8 @@
       "date_created": "2015-08-15T14:10:56.801Z",
       "html_lawbox": "",
       "per_curiam": false,
-      "type": "010combined"
+      "type": "010combined",
+      "order": 1
     },
     "model": "search.opinion",
     "pk": 2
@@ -283,7 +285,8 @@
       "date_created": "2015-08-15T14:10:56.801Z",
       "html_lawbox": "",
       "per_curiam": false,
-      "type": "010combined"
+      "type": "010combined",
+      "order": 1
     },
     "model": "search.opinion",
     "pk": 3
@@ -305,7 +308,8 @@
       "date_created": "2015-08-15T14:10:56.801Z",
       "html_lawbox": "",
       "per_curiam": false,
-      "type": "010combined"
+      "type": "010combined",
+      "order": 1
     },
     "model": "search.opinion",
     "pk": 4
@@ -327,7 +331,8 @@
       "date_created": "2015-08-15T14:10:56.801Z",
       "html_lawbox": "",
       "per_curiam": false,
-      "type": "010combined"
+      "type": "010combined",
+      "order": 1
     },
     "model": "search.opinion",
     "pk": 5
@@ -349,7 +354,8 @@
       "date_created": "2015-08-15T14:10:56.801Z",
       "html_lawbox": "",
       "per_curiam": false,
-      "type": "010combined"
+      "type": "010combined",
+      "order": 1
     },
     "model": "search.opinion",
     "pk": 6
diff --git a/cl/visualizations/fixtures/api_scotus_map_data.json b/cl/visualizations/fixtures/api_scotus_map_data.json
index 5b4b19fe73..46dc2f9856 100644
--- a/cl/visualizations/fixtures/api_scotus_map_data.json
+++ b/cl/visualizations/fixtures/api_scotus_map_data.json
@@ -121,7 +121,8 @@
       "date_created": "2015-08-15T14:10:56.801Z",
       "html_lawbox": "",
       "per_curiam": false,
-      "type": "020lead"
+      "type": "020lead",
+      "order": 1
     },
     "model": "search.opinion",
     "pk": 1
@@ -143,7 +144,8 @@
       "date_created": "2015-08-15T14:10:56.801Z",
       "html_lawbox": "",
       "per_curiam": false,
-      "type": "010combined"
+      "type": "010combined",
+      "order": 1
     },
     "model": "search.opinion",
     "pk": 2
diff --git a/cl/visualizations/fixtures/scotus_map_data.json b/cl/visualizations/fixtures/scotus_map_data.json
index ce504fe2c9..a885e4df54 100644
--- a/cl/visualizations/fixtures/scotus_map_data.json
+++ b/cl/visualizations/fixtures/scotus_map_data.json
@@ -902,7 +902,8 @@
     "date_created": "2016-02-16T19:49:54.525Z",
     "html_lawbox": "",
     "per_curiam": false,
-    "type": "010combined"
+    "type": "010combined",
+    "order": 1
   },
   "model": "search.opinion",
   "pk": 111014
@@ -924,7 +925,8 @@
     "date_created": "2016-02-16T19:49:54.545Z",
     "html_lawbox": "",
     "per_curiam": false,
-    "type": "010combined"
+    "type": "010combined",
+    "order": 1
   },
   "model": "search.opinion",
   "pk": 111113
@@ -946,7 +948,8 @@
     "date_created": "2016-02-16T19:49:54.565Z",
     "html_lawbox": "",
     "per_curiam": false,
-    "type": "010combined"
+    "type": "010combined",
+    "order": 1
   },
   "model": "search.opinion",
   "pk": 111464
@@ -968,7 +971,8 @@
     "date_created": "2016-02-16T19:49:54.610Z",
     "html_lawbox": "",
     "per_curiam": false,
-    "type": "010combined"
+    "type": "010combined",
+    "order": 1
   },
   "model": "search.opinion",
   "pk": 111505
@@ -990,7 +994,8 @@
     "date_created": "2016-02-16T19:49:54.629Z",
     "html_lawbox": "",
     "per_curiam": false,
-    "type": "010combined"
+    "type": "010combined",
+    "order": 1
   },
   "model": "search.opinion",
   "pk": 111924
@@ -1012,7 +1017,8 @@
     "date_created": "2016-02-16T19:49:54.575Z",
     "html_lawbox": "",
     "per_curiam": false,
-    "type": "010combined"
+    "type": "010combined",
+    "order": 1
   },
   "model": "search.opinion",
   "pk": 112331
@@ -1034,7 +1040,8 @@
     "date_created": "2016-02-16T19:49:54.537Z",
     "html_lawbox": "",
     "per_curiam": false,
-    "type": "010combined"
+    "type": "010combined",
+    "order": 1
   },
   "model": "search.opinion",
   "pk": 112646
@@ -1056,7 +1063,8 @@
     "date_created": "2016-02-16T19:49:54.583Z",
     "html_lawbox": "",
     "per_curiam": false,
-    "type": "010combined"
+    "type": "010combined",
+    "order": 1
   },
   "model": "search.opinion",
   "pk": 112779
@@ -1078,7 +1086,8 @@
     "date_created": "2016-02-16T19:49:54.592Z",
     "html_lawbox": "",
     "per_curiam": false,
-    "type": "010combined"
+    "type": "010combined",
+    "order": 1
   },
   "model": "search.opinion",
   "pk": 112874
@@ -1100,7 +1109,8 @@
     "date_created": "2016-02-16T19:49:54.602Z",
     "html_lawbox": "",
     "per_curiam": false,
-    "type": "010combined"
+    "type": "010combined",
+    "order": 1
   },
   "model": "search.opinion",
   "pk": 117967
@@ -1122,7 +1132,8 @@
     "date_created": "2016-02-16T19:49:54.553Z",
     "html_lawbox": "",
     "per_curiam": false,
-    "type": "010combined"
+    "type": "010combined",
+    "order": 1
   },
   "model": "search.opinion",
   "pk": 118377
@@ -1144,7 +1155,8 @@
     "date_created": "2016-02-16T19:49:54.621Z",
     "html_lawbox": "",
     "per_curiam": false,
-    "type": "010combined"
+    "type": "010combined",
+    "order": 1
   },
   "model": "search.opinion",
   "pk": 121168
@@ -1166,7 +1178,8 @@
     "date_created": "2016-02-16T19:49:54.658Z",
     "html_lawbox": "",
     "per_curiam": false,
-    "type": "010combined"
+    "type": "010combined",
+    "order": 1
   },
   "model": "search.opinion",
   "pk": 136984
@@ -1188,7 +1201,8 @@
     "date_created": "2016-02-16T19:49:54.647Z",
     "html_lawbox": "",
     "per_curiam": false,
-    "type": "010combined"
+    "type": "010combined",
+    "order": 1
   },
   "model": "search.opinion",
   "pk": 142900
@@ -1210,7 +1224,8 @@
     "date_created": "2016-02-16T19:49:54.666Z",
     "html_lawbox": "",
     "per_curiam": false,
-    "type": "010combined"
+    "type": "010combined",
+    "order": 1
   },
   "model": "search.opinion",
   "pk": 799990
@@ -1232,7 +1247,8 @@
     "date_created": "2016-02-16T19:49:54.636Z",
     "html_lawbox": "",
     "per_curiam": false,
-    "type": "010combined"
+    "type": "010combined",
+    "order": 1
   },
   "model": "search.opinion",
   "pk": 799993
@@ -1254,7 +1270,8 @@
     "date_created": "2016-02-16T19:49:54.513Z",
     "html_lawbox": "",
     "per_curiam": false,
-    "type": "010combined"
+    "type": "010combined",
+    "order": 1
   },
   "model": "search.opinion",
   "pk": 2674862

From 05e9d9856b543579c28de371dcf8823c8ee7e666 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Thu, 15 Jun 2023 15:23:23 -0400
Subject: [PATCH 03/50] fix(tests): Update fixtures for opinion model

Take 2
---
 .../fixtures/test_objects_query_counts.json    | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/cl/search/fixtures/test_objects_query_counts.json b/cl/search/fixtures/test_objects_query_counts.json
index aa909b2fb2..b51117602a 100644
--- a/cl/search/fixtures/test_objects_query_counts.json
+++ b/cl/search/fixtures/test_objects_query_counts.json
@@ -300,7 +300,8 @@
          "date_created":"2015-08-15T14:10:56.801Z",
          "html_lawbox":"",
          "per_curiam":false,
-         "type":"020lead"
+         "type":"020lead",
+         "order": 1
       },
       "model":"search.opinion",
       "pk":1
@@ -324,7 +325,8 @@
          "date_created":"2015-08-15T14:10:56.801Z",
          "html_lawbox":"",
          "per_curiam":false,
-         "type":"010combined"
+         "type":"010combined",
+         "order": 1
       },
       "model":"search.opinion",
       "pk":2
@@ -348,7 +350,8 @@
          "date_created":"2015-08-15T14:10:56.801Z",
          "html_lawbox":"",
          "per_curiam":false,
-         "type":"010combined"
+         "type":"010combined",
+         "order": 1
       },
       "model":"search.opinion",
       "pk":3
@@ -371,7 +374,8 @@
          "date_created":"2015-08-15T14:10:56.801Z",
          "html_lawbox":"",
          "per_curiam":false,
-         "type":"010combined"
+         "type":"010combined",
+         "order": 1
       },
       "model":"search.opinion",
       "pk":4
@@ -395,7 +399,8 @@
          "date_created":"2015-08-15T14:10:56.801Z",
          "html_lawbox":"",
          "per_curiam":false,
-         "type":"010combined"
+         "type":"010combined",
+         "order": 1
       },
       "model":"search.opinion",
       "pk":5
@@ -418,7 +423,8 @@
          "date_created":"2015-08-15T14:10:56.801Z",
          "html_lawbox":"",
          "per_curiam":false,
-         "type":"010combined"
+         "type":"010combined",
+         "order": 1
       },
       "model":"search.opinion",
       "pk":6

From b0fc70a56055699c551b59a3ed38a005459905e3 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Fri, 16 Jun 2023 12:46:45 -0400
Subject: [PATCH 04/50] feat(models): Override django-ordered-model default

By default it sorts by order - so if we dont
want that feature we simply need to override
the django order with a custom ordered manager
in on the opinion class.

(I think)
---
 cl/search/models.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/cl/search/models.py b/cl/search/models.py
index fc6aa75414..7fc2c03458 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -14,7 +14,7 @@
 from django.utils.encoding import force_str
 from django.utils.text import slugify
 from eyecite import get_citations
-from ordered_model.models import OrderedModel
+from ordered_model.models import OrderedModel, OrderedModelManager
 
 from cl.citations.utils import get_citation_depth_between_clusters
 from cl.custom_filters.templatetags.text_filters import best_case_name
@@ -2815,6 +2815,13 @@ def sort_cites(c):
         return 8
 
 
+class CustomOrderedManager(OrderedModelManager):
+    """Override the django ordered model default ordering"""
+
+    def get_queryset(self):
+        return super().get_queryset().order_by()
+
+
 @pghistory.track(AfterUpdateOrDeleteSnapshot())
 class Opinion(OrderedModel, AbstractDateTimeModel):
     COMBINED = "010combined"
@@ -2968,6 +2975,8 @@ class Opinion(OrderedModel, AbstractDateTimeModel):
     )
     order_with_respect_to = "cluster"
 
+    objects = CustomOrderedManager()
+
     @property
     def siblings(self) -> QuerySet:
         # These are other sub-opinions of the current cluster.

From b8fa44563ac4bb42d6ad3020c604da8f8940f187 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Fri, 16 Jun 2023 13:44:51 -0400
Subject: [PATCH 05/50] fix(models): Different override for ordering on OP

---
 cl/search/models.py | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/cl/search/models.py b/cl/search/models.py
index 7fc2c03458..be645bc5e8 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -14,7 +14,7 @@
 from django.utils.encoding import force_str
 from django.utils.text import slugify
 from eyecite import get_citations
-from ordered_model.models import OrderedModel, OrderedModelManager
+from ordered_model.models import OrderedModel
 
 from cl.citations.utils import get_citation_depth_between_clusters
 from cl.custom_filters.templatetags.text_filters import best_case_name
@@ -2815,13 +2815,6 @@ def sort_cites(c):
         return 8
 
 
-class CustomOrderedManager(OrderedModelManager):
-    """Override the django ordered model default ordering"""
-
-    def get_queryset(self):
-        return super().get_queryset().order_by()
-
-
 @pghistory.track(AfterUpdateOrDeleteSnapshot())
 class Opinion(OrderedModel, AbstractDateTimeModel):
     COMBINED = "010combined"
@@ -2975,7 +2968,8 @@ class Opinion(OrderedModel, AbstractDateTimeModel):
     )
     order_with_respect_to = "cluster"
 
-    objects = CustomOrderedManager()
+    class Meta:
+        ordering = ()
 
     @property
     def siblings(self) -> QuerySet:

From 7429eba0290bc2f931489b5799e90de318cd1512 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Fri, 28 Jul 2023 19:30:18 -0600
Subject: [PATCH 06/50] fix(poetry): Fix merge conflicts

---
 poetry.lock    | 13 ++++++++++++-
 pyproject.toml |  1 +
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/poetry.lock b/poetry.lock
index e034727158..24dc7977e0 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1072,6 +1072,17 @@ files = [
     {file = "django_mathfilters-1.0.0-py3-none-any.whl", hash = "sha256:64200a21bb249fbf27be601d4bbb788779e09c6e063170c097cd82c4d18ebb83"},
 ]
 
+[[package]]
+name = "django-ordered-model"
+version = "3.7.4"
+description = "Allows Django models to be ordered and provides a simple admin interface for reordering them."
+optional = false
+python-versions = "*"
+files = [
+    {file = "django-ordered-model-3.7.4.tar.gz", hash = "sha256:f258b9762525c00a53009e82f8b8bf2a3aa315e8b453e281e8fdbbfe2b8cb3ba"},
+    {file = "django_ordered_model-3.7.4-py3-none-any.whl", hash = "sha256:dfcd3183fe0749dad1c9971cba1d6240ce7328742a30ddc92feca41107bb241d"},
+]
+
 [[package]]
 name = "django-override-storage"
 version = "0.3.2"
@@ -4690,4 +4701,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.11, <3.12"
-content-hash = "10446165560282337aada87c0f3a9324dc904777bbfcc0f7e35db5c9d13a10a9"
+content-hash = "7c0448e0852dba4f13177892cc0e619e2b58470f4d82707d8069fbeceb1cb919"
diff --git a/pyproject.toml b/pyproject.toml
index f9d568defa..2caee093ee 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -110,6 +110,7 @@ daphne = "^4.0.0"
 psycopg2 = "^2.9.6"
 juriscraper = "^2.5.51"
 httpx = {extras = ["http2"], version = "^0.24.1"}
+django-ordered-model = "^3.7.4"
 
 
 [tool.poetry.group.dev.dependencies]

From f45a093c6b02ed5ae4a1077062295fa25f1c4894 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Mon, 31 Jul 2023 19:43:51 -0600
Subject: [PATCH 07/50] fix(models): Add 'order' field as default ordering for
 Opinion model

Test added for django-ordered-model library
Optimize imports in search/tests.py
---
 cl/search/models.py |  2 +-
 cl/search/tests.py  | 69 +++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 67 insertions(+), 4 deletions(-)

diff --git a/cl/search/models.py b/cl/search/models.py
index 5024bdcc3d..e50987c3f6 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -2986,7 +2986,7 @@ class Opinion(OrderedModel, AbstractDateTimeModel):
     order_with_respect_to = "cluster"
 
     class Meta:
-        ordering = ()
+        ordering = ("order",)
 
     @property
     def siblings(self) -> QuerySet:
diff --git a/cl/search/tests.py b/cl/search/tests.py
index b5c71c9b16..c4edb24b31 100644
--- a/cl/search/tests.py
+++ b/cl/search/tests.py
@@ -5,7 +5,7 @@
 from datetime import date
 from functools import reduce
 from pathlib import Path
-from unittest import mock, skipUnless
+from unittest import mock
 
 import pytz
 from asgiref.sync import sync_to_async
@@ -19,9 +19,8 @@
 from django.db import IntegrityError, transaction
 from django.http import HttpRequest
 from django.test import AsyncRequestFactory, override_settings
-from django.test.utils import captured_stderr
 from django.urls import reverse
-from elasticsearch_dsl import Q, connections
+from elasticsearch_dsl import Q
 from factory import RelatedFactory
 from lxml import etree, html
 from rest_framework.status import HTTP_200_OK
@@ -58,6 +57,7 @@
     DocketFactory,
     OpinionClusterFactory,
     OpinionClusterFactoryWithChildrenAndParents,
+    OpinionFactory,
     OpinionsCitedWithParentsFactory,
     OpinionWithChildrenFactory,
     OpinionWithParentsFactory,
@@ -283,6 +283,69 @@ def test_custom_manager_chained_filter(self) -> None:
         )
         self.assertEqual(cluster_count, expected_count)
 
+    def test_opinions_order(self) -> None:
+        """Test django-ordered-model library"""
+
+        # Create court
+        court = CourtFactory(id="nyappdiv")
+
+        # Create cluster
+        cluster = OpinionClusterFactory(
+            case_name="Foo v. Bar",
+            case_name_short="Foo v. Bar",
+            docket=DocketFactory(
+                court=court,
+            ),
+            date_filed=date(1978, 3, 10),
+            source="U",
+            precedential_status=PRECEDENTIAL_STATUS.PUBLISHED,
+        )
+
+        # Create three opinions
+        op_1 = OpinionFactory(
+            cluster=cluster,
+            type="Concurrence Opinion",
+        )
+
+        op_2 = OpinionFactory(
+            cluster=cluster,
+            type="Dissent",
+        )
+
+        op_3 = OpinionFactory(
+            cluster=cluster,
+            type="Lead Opinion",
+        )
+
+        # Test that the value of the order field matches the order in which
+        # they were created
+        self.assertEqual(op_1.order, 0)
+        self.assertEqual(op_2.order, 1)
+        self.assertEqual(op_3.order, 2)
+
+        # Use library method to move lead opinion to first position, we can
+        # use this function to easily reorder existing opinions
+        op_3.to(0)
+
+        # The position of the elements was modified, we refresh the objects
+        op_1.refresh_from_db()
+        op_2.refresh_from_db()
+        op_3.refresh_from_db()
+
+        # Test new order
+        self.assertEqual(op_3.order, 0)
+        self.assertEqual(op_1.order, 1)
+        self.assertEqual(op_2.order, 2)
+
+        # Add new opinion to cluster
+        op_4 = OpinionFactory(
+            cluster=cluster,
+            type="Dissent",
+        )
+
+        # Test that the new opinion is in last place
+        self.assertEqual(op_4.order, 3)
+
 
 class DocketValidationTest(TestCase):
     @classmethod

From 37dee19fcfacf95a79aac71c21ccc507d10289b4 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 30 Aug 2023 11:27:50 -0600
Subject: [PATCH 08/50] fix(opinion_order): fix merge conflicts with main

---
 poetry.lock    | 17 ++++++++++++++---
 pyproject.toml |  1 +
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index a23818c13e..9b7321deb1 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand.
 
 [[package]]
 name = "amqp"
@@ -1094,6 +1094,17 @@ files = [
 [package.dependencies]
 Django = ">=3.2"
 
+[[package]]
+name = "django-ordered-model"
+version = "3.7.4"
+description = "Allows Django models to be ordered and provides a simple admin interface for reordering them."
+optional = false
+python-versions = "*"
+files = [
+    {file = "django-ordered-model-3.7.4.tar.gz", hash = "sha256:f258b9762525c00a53009e82f8b8bf2a3aa315e8b453e281e8fdbbfe2b8cb3ba"},
+    {file = "django_ordered_model-3.7.4-py3-none-any.whl", hash = "sha256:dfcd3183fe0749dad1c9971cba1d6240ce7328742a30ddc92feca41107bb241d"},
+]
+
 [[package]]
 name = "django-override-storage"
 version = "0.3.2"
@@ -2718,7 +2729,7 @@ name = "ndg-httpsclient"
 version = "0.5.1"
 description = "Provides enhanced HTTPS support for httplib and urllib2 using PyOpenSSL"
 optional = false
-python-versions = ">=2.7,<3.0.0 || >=3.4.0"
+python-versions = ">=2.7,<3.0.dev0 || >=3.4.dev0"
 files = [
     {file = "ndg_httpsclient-0.5.1-py2-none-any.whl", hash = "sha256:d2c7225f6a1c6cf698af4ebc962da70178a99bcde24ee6d1961c4f3338130d57"},
     {file = "ndg_httpsclient-0.5.1-py3-none-any.whl", hash = "sha256:dd174c11d971b6244a891f7be2b32ca9853d3797a72edb34fa5d7b07d8fff7d4"},
@@ -5091,4 +5102,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.11, <3.12"
-content-hash = "5257a6d2a26b74054bac82d0c5700a55f1e2e2ec580608921e8a27a76d015f52"
+content-hash = "46adbdc75bf4ad70aa4d6531f4d71a8f22f1e85ee9886408e921e7147aab7a36"
diff --git a/pyproject.toml b/pyproject.toml
index 87d6e90ff9..ef5970143f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -113,6 +113,7 @@ juriscraper = "^2.5.51"
 httpx = {extras = ["http2"], version = "^0.24.1"}
 django-model-utils = "^4.3.1"
 inflection = "^0.5.1"  # necessary for DRF schema generation - remove after drf-spectacular
+django-ordered-model = "^3.7.4"
 
 
 [tool.poetry.group.dev.dependencies]

From 3b4cb06ef8724d5052f9868f8d77388acfe18be1 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 30 Aug 2023 11:55:03 -0600
Subject: [PATCH 09/50] fix(opinion_order): rename migrations

---
 .../{0019_order_opinions.py => 0020_order_opinions.py}          | 2 +-
 .../{0019_order_opinions.sql => 0020_order_opinions.sql}        | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename cl/search/migrations/{0019_order_opinions.py => 0020_order_opinions.py} (99%)
 rename cl/search/migrations/{0019_order_opinions.sql => 0020_order_opinions.sql} (100%)

diff --git a/cl/search/migrations/0019_order_opinions.py b/cl/search/migrations/0020_order_opinions.py
similarity index 99%
rename from cl/search/migrations/0019_order_opinions.py
rename to cl/search/migrations/0020_order_opinions.py
index 5e446056cc..f614156360 100644
--- a/cl/search/migrations/0019_order_opinions.py
+++ b/cl/search/migrations/0020_order_opinions.py
@@ -7,7 +7,7 @@
 
 class Migration(migrations.Migration):
     dependencies = [
-        ("search", "0018_update_cluster_model"),
+        ("search", "0019_add_docket_source_noop"),
     ]
 
     operations = [
diff --git a/cl/search/migrations/0019_order_opinions.sql b/cl/search/migrations/0020_order_opinions.sql
similarity index 100%
rename from cl/search/migrations/0019_order_opinions.sql
rename to cl/search/migrations/0020_order_opinions.sql

From 878b9479e9c95b429b16c6bd044a2315b6cce3f3 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 30 Aug 2023 17:11:30 -0600
Subject: [PATCH 10/50] feat(opinion_order): management command to update the
 order of harvard and columbia opinions

---
 .../commands/update_opinions_order.py         | 598 ++++++++++++++++++
 1 file changed, 598 insertions(+)
 create mode 100644 cl/corpus_importer/management/commands/update_opinions_order.py

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
new file mode 100644
index 0000000000..f48de154a0
--- /dev/null
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -0,0 +1,598 @@
+import re
+from typing import Any, Optional
+
+from bs4 import BeautifulSoup, NavigableString, Tag
+from django.core.management import BaseCommand
+from django.db.models import Count
+
+from cl.corpus_importer.utils import similarity_scores
+from cl.lib.command_utils import logger
+from cl.lib.string_diff import get_cosine_similarity
+from cl.search.models import Opinion, OpinionCluster
+
+# TODO Should we add a flag to know that the cluster has been processed?
+
+
+def match_text_lists(
+    file_opinions_list: list[str], cl_opinions_list: list[str]
+) -> dict[int, Any]:
+    """Generate matching lists above threshold
+    :param file_opinions_list: Opinions from file
+    :param cl_opinions_list: CL opinions
+    :return: Matches if found or False
+    """
+    # We import this here to avoid a circular import
+    from cl.corpus_importer.management.commands.harvard_opinions import (
+        compare_documents,
+    )
+
+    scores = similarity_scores(file_opinions_list, cl_opinions_list)
+
+    matches = {}
+    for i, row in enumerate(scores):
+        j = row.argmax()  # type: ignore
+        # Lower threshold for small opinions.
+        if (
+            get_cosine_similarity(file_opinions_list[i], cl_opinions_list[j])
+            < 0.60
+        ):
+            continue
+        percent_match = compare_documents(
+            file_opinions_list[i], cl_opinions_list[j]
+        )
+        if percent_match < 60:
+            continue
+        matches[i] = j
+
+    # Key is opinion position from file, Value is opinion position from cl opinion
+    # e.g. matches {0: 1, 1: 2} 0 is file opinion and 1 in cl opinion, 1 is file
+    # opinion and 2 is cl opinion
+    return matches
+
+
+def get_opinion_content(
+    cluster_id,
+) -> tuple[Optional[str], list[dict], int, bool]:
+    """Get the opinions content for a cluster object
+    :param cluster_id: Cluster ID for a set of opinions
+    :return: (xml path, list of extracted opinions, start position, True if combined
+    opinions exists in cluster)
+    """
+    cl_cleaned_opinions = []
+    # by default the opinions are ordered by pk
+    opinions_from_cluster = Opinion.objects.filter(
+        cluster_id=cluster_id
+    ).order_by("id")
+    combined_opinions_cluster = opinions_from_cluster.filter(
+        type="010combined"
+    )
+    xml_path = None
+    combined_opinion = False
+    if combined_opinions_cluster:
+        # the combined opinion will be displayed at beginning
+        start_position = combined_opinions_cluster.count()
+        combined_opinion = True
+    else:
+        # we don't have combined opinions, we start ordering from 0 to n
+        start_position = 0
+
+    for i, op in enumerate(opinions_from_cluster.exclude(type="010combined")):
+        if op.local_path and not xml_path:
+            xml_path = op.local_path
+        content = None
+        if len(op.html_with_citations) > 1:
+            content = op.html_with_citations
+        elif len(op.html_columbia) > 1:
+            content = op.html_columbia
+        elif len(op.html_lawbox) > 1:
+            content = op.html_lawbox
+        elif len(op.plain_text) > 1:
+            content = op.plain_text
+        elif len(op.html) > 1:
+            content = op.html
+        elif len(op.xml_harvard) > 1:
+            content = op.xml_harvard
+        if content:
+            soup = BeautifulSoup(content, features="html.parser")
+            prep_text = re.sub(
+                r"[^a-zA-Z0-9 ]", "", soup.getText(separator=" ").lower()
+            )
+            prep_text = re.sub(" +", " ", prep_text)
+            cl_cleaned_opinions.append(
+                {
+                    "id": op.id,
+                    "byline": op.author_str,
+                    "type": op.type,
+                    "opinion": prep_text,
+                    "order": i,
+                }
+            )
+
+    return xml_path, cl_cleaned_opinions, start_position, combined_opinion
+
+
+def get_opinions_columbia_xml(xml_filepath: str) -> list:
+    """Convert xml data into dict
+    :param xml_filepath: path of xml file
+    :return: dict with data
+    """
+
+    SIMPLE_TAGS = [
+        "attorneys",
+        "caption",
+        "citation",
+        "court",
+        "date",
+        "docket",
+        "hearing_date",
+        "panel",
+        "posture",
+        "reporter_caption",
+    ]
+
+    data = {}  # type: dict
+
+    with open(xml_filepath, "r", encoding="utf-8") as f:
+        file_content = f.read()
+
+        data["unpublished"] = False
+
+        if "<opinion unpublished=true>" in file_content:
+            file_content = file_content.replace(
+                "<opinion unpublished=true>", "<opinion>"
+            )
+            file_content = file_content.replace("<unpublished>", "").replace(
+                "</unpublished>", ""
+            )
+
+            data["unpublished"] = True
+
+    # Sometimes opening and ending tag mismatch (e.g. c6b39dcb29c9c.xml)
+    file_content = file_content.replace(
+        "</footnote_body></block_quote>", "</block_quote></footnote_body>"
+    )
+
+    soup = BeautifulSoup(file_content, "lxml")
+
+    # Find the outer <opinion> tag to have all elements inside
+    find_opinion = soup.find("opinion")
+
+    step_one_opinions = []  # type: list
+    opinions = []  # type: list
+    order = 0
+
+    if find_opinion:
+        untagged_content = []
+
+        # We iterate all content, with and without tags
+        # STEP 1: Extract all content in multiple dict elements
+        for i, content in enumerate(find_opinion):  # type: int, Tag
+            if type(content) == NavigableString:
+                # We found a raw string, store it
+                untagged_content.append(str(content))
+
+            else:
+                if content.name in SIMPLE_TAGS + [
+                    "citation_line",
+                    "opinion_byline",
+                    "dissent_byline",
+                    "concurrence_byline",
+                ]:
+                    # Ignore these tags, it will be processed later
+                    continue
+                elif content.name in [
+                    "opinion_text",
+                    "dissent_text",
+                    "concurrence_text",
+                ]:
+                    if untagged_content:
+                        # We found something other than a navigable string that is
+                        # not an opinion, but now we have found an opinion,
+                        # let's create this content first
+
+                        # default type
+                        op_type = "opinion"
+                        if step_one_opinions:
+                            if step_one_opinions[-1].get("type"):
+                                # use type of previous opinion if exists
+                                op_type = step_one_opinions[-1].get("type")
+
+                        # Get rid of double spaces
+                        opinion_content = re.sub(
+                            " +", " ", "\n".join(untagged_content)
+                        ).strip()  # type: str
+                        if opinion_content:
+                            step_one_opinions.append(
+                                {
+                                    "opinion": opinion_content,
+                                    "order": order,
+                                    "byline": "",
+                                    "type": op_type,
+                                }
+                            )
+                            order = order + 1
+                        untagged_content = []
+
+                    byline = content.find_previous_sibling()
+                    opinion_author = ""
+                    if byline and "_byline" in byline.name:
+                        opinion_author = byline.get_text()
+
+                    opinion_content = re.sub(
+                        " +", " ", content.decode_contents()
+                    ).strip()
+                    if opinion_content:
+                        step_one_opinions.append(
+                            {
+                                "opinion": opinion_content,
+                                "order": order,
+                                "byline": opinion_author,
+                                "type": content.name.replace("_text", ""),
+                            }
+                        )
+                        order = order + 1
+
+                else:
+                    # Content not inside _text tag, we store it
+                    untagged_content.append(str(content))
+
+        if untagged_content:
+            # default type
+            op_type = "opinion"
+            if step_one_opinions:
+                if step_one_opinions[-1].get("type"):
+                    # use type of previous opinion if exists
+                    op_type = step_one_opinions[-1].get("type")
+
+            opinion_content = re.sub(
+                " +", " ", "\n".join(untagged_content)
+            ).strip()
+            if opinion_content:
+                step_one_opinions.append(
+                    {
+                        "opinion": opinion_content,
+                        "order": order,
+                        "byline": "",
+                        "type": op_type,
+                    }
+                )
+
+        # Step 2: Merge found content in the xml file
+        new_order = 0
+        authorless_content = []
+
+        for i, found_content in enumerate(step_one_opinions, start=1):
+            byline = found_content.get("byline")
+            if not byline:
+                # Opinion has no byline, store it
+                authorless_content.append(found_content)
+
+            if byline:
+                # Opinion has byline
+                opinion_type = found_content.get("type")
+                opinion_content = found_content.get("opinion", "")
+                # Store content that doesn't match the current type
+                alternative_authorless_content = [
+                    z
+                    for z in authorless_content
+                    if z.get("type") != opinion_type
+                ]
+                # Keep content that matches the current type
+                authorless_content = [
+                    z
+                    for z in authorless_content
+                    if z.get("type") == opinion_type
+                ]
+
+                if alternative_authorless_content:
+                    # Keep floating text that are not from the same type,
+                    # we need to create a separate opinion for those,
+                    # for example: in 2713f39c5a8e8684.xml we have an opinion
+                    # without an author, and the next opinion with an author is
+                    # a dissent opinion, we can't combine both
+
+                    # We check if the previous stored opinion matches the type of the
+                    # content
+                    relevant_opinions = (
+                        [opinions[-1]]
+                        if opinions
+                        and opinions[-1]["type"]
+                        == alternative_authorless_content[0].get("type")
+                        else []
+                    )
+
+                    if relevant_opinions:
+                        previous_opinion = relevant_opinions[-1]
+                        if previous_opinion.get(
+                            "type"
+                        ) == alternative_authorless_content[0].get("type"):
+                            # Merge last opinion with previous opinion, it probably
+                            # belongs the same author
+                            relevant_opinions[-1][
+                                "opinion"
+                            ] += "\n" + "\n".join(
+                                [
+                                    f.get("opinion")
+                                    for f in alternative_authorless_content
+                                    if f.get("opinion")
+                                ]
+                            )
+                        authorless_content = []
+
+                    else:
+                        # No relevant opinions found, create a new opinion
+                        new_opinion = {
+                            "byline": None,
+                            "type": alternative_authorless_content[0].get(
+                                "type"
+                            ),
+                            "opinion": "\n".join(
+                                [
+                                    f.get("opinion")
+                                    for f in alternative_authorless_content
+                                    if f.get("opinion")
+                                ]
+                            ),
+                            "order": new_order,
+                        }
+                        new_order = new_order + 1
+                        opinions.append(new_opinion)
+
+                # Add new opinion
+                new_opinion = {
+                    "byline": byline,
+                    "type": opinion_type,
+                    "opinion": "\n".join(
+                        [
+                            f.get("opinion")
+                            for f in authorless_content
+                            if f.get("type") == opinion_type
+                        ]
+                    )
+                    + "\n\n"
+                    + opinion_content,
+                    "order": new_order,
+                }
+
+                opinions.append(new_opinion)
+                new_order = new_order + 1
+                authorless_content = []
+
+            if len(step_one_opinions) == i and authorless_content:
+                # If is the last opinion, and we still have opinions without
+                # byline, create an opinion without an author and the contents
+                # that couldn't be merged
+
+                # We check if the previous stored opinion matches the type of the
+                # content
+                relevant_opinions = (
+                    [opinions[-1]]
+                    if opinions
+                    and opinions[-1]["type"]
+                    == authorless_content[0].get("type")
+                    else []
+                )
+
+                if relevant_opinions:
+                    previous_opinion = relevant_opinions[-1]
+                    if previous_opinion.get("type") == authorless_content[
+                        0
+                    ].get("type"):
+                        # Merge last opinion with previous opinion, it probably
+                        # belongs the same author
+                        relevant_opinions[-1]["opinion"] += "\n" + "\n".join(
+                            [
+                                f.get("opinion")
+                                for f in authorless_content
+                                if f.get("opinion")
+                            ]
+                        )
+
+                else:
+                    # Create last floating opinion
+                    new_opinion = {
+                        "byline": None,
+                        "type": authorless_content[0].get("type"),
+                        "opinion": "\n".join(
+                            [
+                                f.get("opinion")
+                                for f in authorless_content
+                                if f.get("opinion")
+                            ]
+                        ),
+                        "order": new_order,
+                    }
+                    opinions.append(new_opinion)
+
+    for op in opinions:
+        opinion_content = op.get("opinion")
+        opinion_content = BeautifulSoup(
+            opinion_content, "html.parser"
+        ).getText()
+        opinion_content = re.sub(r"[^a-zA-Z0-9 ]", "", opinion_content.lower())
+        op["opinion"] = opinion_content
+
+    return opinions
+
+
+def run_harvard():
+    """
+    We assume that harvard data is already ordered, we just need to fill the order
+    field in each opinion
+    """
+
+    # Get all harvard clusters with more than one opinion
+    clusters = (
+        OpinionCluster.objects.prefetch_related("sub_opinions")
+        .annotate(opinions_count=Count("sub_opinions"))
+        .filter(opinions_count__gt=1, source="U")
+    )
+    # print(clusters.query)
+    print("clusters", len(clusters))
+
+    # cluster_id: 4697264, the combined opinion will go to the last position
+    for oc in clusters:
+        combined_opinions_cluster = oc.sub_opinions.filter(
+            type="010combined"
+        ).order_by("id")
+        if combined_opinions_cluster:
+            # the combined opinion will be displayed at first
+            start_position = combined_opinions_cluster.count()
+        else:
+            # we don't have combined opinions, we start ordering from 0 to n
+            start_position = 0
+
+        print("combined_opinions_cluster", combined_opinions_cluster)
+        for opinion_order, cluster_op in enumerate(
+            oc.sub_opinions.exclude(type="010combined").order_by("id"),
+            start=start_position,
+        ):
+            cluster_op.order = opinion_order
+            cluster_op.save()
+
+        # Show combined opinions at beginning
+        for opinion_order, cluster_op in enumerate(combined_opinions_cluster):
+            cluster_op.order = opinion_order
+            cluster_op.save()
+
+        logger.info(msg=f"Opinions reordered for cluster id: {oc.id}")
+
+
+def run_columbia():
+    """
+    Update opinion order for columbia clusters
+    """
+
+    # Get all columbia cluster ids with more than one opinion
+    clusters = (
+        OpinionCluster.objects.annotate(opinions_count=Count("sub_opinions"))
+        .filter(opinions_count__gt=1, source="Z")
+        .order_by("id")
+        .values_list("id")
+    )
+
+    for cluster_id in clusters:
+        logger.info(f"Processing cluster id: {cluster_id}")
+        (
+            xml_path,
+            cl_cleaned_opinions,
+            start_position,
+            combined_opinion,
+        ) = get_opinion_content(cluster_id)
+
+        columbia_opinions = None
+        if xml_path:
+            columbia_opinions = get_opinions_columbia_xml(xml_path)
+
+        if cl_cleaned_opinions and columbia_opinions:
+            matches = match_text_lists(
+                [op.get("opinion") for op in columbia_opinions],
+                [op.get("opinion") for op in cl_cleaned_opinions],
+            )
+
+            if matches:
+                if len(matches.values()) != len(set(matches.values())):
+                    # We don't have a unique match for each opinion, they were
+                    # probably combined incorrectly
+                    logger.info(
+                        f"We can't infer opinions order for cluster id: {cluster_id}"
+                    )
+                    # Go to next cluster id
+                    continue
+
+                if len(cl_cleaned_opinions) > len(set(matches.values())):
+                    # We have more opinions than matches
+                    logger.info(
+                        f"We couldn't match all cl opinions to the file's "
+                        f"content, cluster id: {cluster_id}"
+                    )
+                    # Go to next cluster id
+                    continue
+
+                failed = False
+                for file_pos, cl_pos in matches.items():
+                    # file_pos is the correct index to find the opinion id to update
+                    file_opinion = columbia_opinions[file_pos]
+                    # the order was calculated using the xml file
+                    file_order = file_opinion.get("order") + start_position
+                    cl_opinion = cl_cleaned_opinions[cl_pos]
+                    opinion_id_to_update = cl_opinion.get("id")
+
+                    if opinion_id_to_update:
+                        try:
+                            # Save opinion
+                            op = Opinion.objects.get(id=opinion_id_to_update)
+                            op.order = file_order
+                            op.save()
+                            logger.info(
+                                f"Cluster id processed: {cluster_id} Update opinion id: {opinion_id_to_update} with position: {file_order}"
+                            )
+                        except Opinion.DoesNotExist:
+                            logger.warning(
+                                f"We can't update opinion, opinion doesn't exist with "
+                                f"id: {opinion_id_to_update}"
+                            )
+                            failed = True
+                            break
+                    else:
+                        logger.warning(
+                            f"We can't update opinion, empty opinion id "
+                            f"from cluster: {cluster_id}"
+                        )
+                        failed = True
+                        break
+
+                if combined_opinion and not failed:
+                    combined_opinions_cluster = Opinion.objects.filter(
+                        cluster_id=cluster_id, type="010combined"
+                    ).order_by("id")
+
+                    # Show combined opinions at beginning
+                    for opinion_order, cluster_op in enumerate(
+                        combined_opinions_cluster
+                    ):
+                        cluster_op.order = opinion_order
+                        cluster_op.save()
+
+            else:
+                # No matches found
+                logger.warning(
+                    f"Failed to match opinions from cluster id: {cluster_id}"
+                )
+                continue
+
+
+class Command(BaseCommand):
+    help = "Fill order field in Opinion objects"
+
+    def __init__(self, *args, **kwargs):
+        super(Command, self).__init__(*args, **kwargs)
+
+    def add_arguments(self, parser):
+        parser.add_argument(
+            "--process-harvard",
+            action="store_true",
+            help="Fix harvard opinions order",
+        )
+
+        parser.add_argument(
+            "--process-columbia",
+            action="store_true",
+            help="Fix columbia opinions order",
+        )
+
+    def handle(self, *args, **options):
+        print("harvard", options["process_harvard"])
+        print("columbia", options["process_columbia"])
+
+        if options["process_harvard"] and options["process_columbia"]:
+            print(
+                "You can only select one option process-harvard or process-columbia"
+            )
+            return
+
+        if options["process_harvard"]:
+            run_harvard()
+
+        if options["process_columbia"]:
+            run_columbia()

From c3a5c4a2a0ad002b075ea69b3a0757bbef684a1f Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 30 Aug 2023 19:13:02 -0600
Subject: [PATCH 11/50] feat(opinion_order): exception when xml file not found

---
 .../commands/update_opinions_order.py         | 20 +++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index f48de154a0..0560c506ba 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -4,6 +4,7 @@
 from bs4 import BeautifulSoup, NavigableString, Tag
 from django.core.management import BaseCommand
 from django.db.models import Count
+from django.db.models.fields.files import FieldFile
 
 from cl.corpus_importer.utils import similarity_scores
 from cl.lib.command_utils import logger
@@ -52,7 +53,7 @@ def match_text_lists(
 
 def get_opinion_content(
     cluster_id,
-) -> tuple[Optional[str], list[dict], int, bool]:
+) -> tuple[Optional[FieldFile], list[dict], int, bool]:
     """Get the opinions content for a cluster object
     :param cluster_id: Cluster ID for a set of opinions
     :return: (xml path, list of extracted opinions, start position, True if combined
@@ -78,6 +79,8 @@ def get_opinion_content(
 
     for i, op in enumerate(opinions_from_cluster.exclude(type="010combined")):
         if op.local_path and not xml_path:
+            # We store the field because we are using S3 for storage and that backend
+            # doesn't support absolute paths
             xml_path = op.local_path
         content = None
         if len(op.html_with_citations) > 1:
@@ -111,7 +114,7 @@ def get_opinion_content(
     return xml_path, cl_cleaned_opinions, start_position, combined_opinion
 
 
-def get_opinions_columbia_xml(xml_filepath: str) -> list:
+def get_opinions_columbia_xml(xml_filepath: FieldFile) -> list:
     """Convert xml data into dict
     :param xml_filepath: path of xml file
     :return: dict with data
@@ -132,8 +135,8 @@ def get_opinions_columbia_xml(xml_filepath: str) -> list:
 
     data = {}  # type: dict
 
-    with open(xml_filepath, "r", encoding="utf-8") as f:
-        file_content = f.read()
+    with xml_filepath.open("r") as f:
+        file_content = f.read().decode("utf-8")
 
         data["unpublished"] = False
 
@@ -432,6 +435,7 @@ def run_harvard():
 
     # cluster_id: 4697264, the combined opinion will go to the last position
     for oc in clusters:
+        logger.info(f"Processing cluster id: {oc}")
         combined_opinions_cluster = oc.sub_opinions.filter(
             type="010combined"
         ).order_by("id")
@@ -468,7 +472,7 @@ def run_columbia():
         OpinionCluster.objects.annotate(opinions_count=Count("sub_opinions"))
         .filter(opinions_count__gt=1, source="Z")
         .order_by("id")
-        .values_list("id")
+        .values_list("id", flat=True)
     )
 
     for cluster_id in clusters:
@@ -482,7 +486,11 @@ def run_columbia():
 
         columbia_opinions = None
         if xml_path:
-            columbia_opinions = get_opinions_columbia_xml(xml_path)
+            try:
+                columbia_opinions = get_opinions_columbia_xml(xml_path)
+            except FileNotFoundError:
+                logger.warning(f"Xml file not found, cluster id: {cluster_id}")
+                continue
 
         if cl_cleaned_opinions and columbia_opinions:
             matches = match_text_lists(

From 6ba8d3d3b1048ba4dfaf79ef60b72bf5fff8e55f Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 30 Aug 2023 19:37:30 -0600
Subject: [PATCH 12/50] feat(opinion_order): add param to resume command to
 order opinions

---
 .../commands/update_opinions_order.py         | 33 ++++++++++++-------
 1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index 0560c506ba..d4d915695d 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -82,6 +82,7 @@ def get_opinion_content(
             # We store the field because we are using S3 for storage and that backend
             # doesn't support absolute paths
             xml_path = op.local_path
+            # print("url", op.local_path.url)
         content = None
         if len(op.html_with_citations) > 1:
             content = op.html_with_citations
@@ -136,7 +137,7 @@ def get_opinions_columbia_xml(xml_filepath: FieldFile) -> list:
     data = {}  # type: dict
 
     with xml_filepath.open("r") as f:
-        file_content = f.read().decode("utf-8")
+        file_content = f.read()
 
         data["unpublished"] = False
 
@@ -418,10 +419,11 @@ def get_opinions_columbia_xml(xml_filepath: FieldFile) -> list:
     return opinions
 
 
-def run_harvard():
+def run_harvard(start_id: int):
     """
     We assume that harvard data is already ordered, we just need to fill the order
     field in each opinion
+    :param start_id: skip any id lower than this value
     """
 
     # Get all harvard clusters with more than one opinion
@@ -429,9 +431,11 @@ def run_harvard():
         OpinionCluster.objects.prefetch_related("sub_opinions")
         .annotate(opinions_count=Count("sub_opinions"))
         .filter(opinions_count__gt=1, source="U")
+        .order_by("id")
     )
-    # print(clusters.query)
-    print("clusters", len(clusters))
+
+    if start_id:
+        clusters = clusters.filter(pk__gte=start_id)
 
     # cluster_id: 4697264, the combined opinion will go to the last position
     for oc in clusters:
@@ -446,7 +450,6 @@ def run_harvard():
             # we don't have combined opinions, we start ordering from 0 to n
             start_position = 0
 
-        print("combined_opinions_cluster", combined_opinions_cluster)
         for opinion_order, cluster_op in enumerate(
             oc.sub_opinions.exclude(type="010combined").order_by("id"),
             start=start_position,
@@ -462,9 +465,10 @@ def run_harvard():
         logger.info(msg=f"Opinions reordered for cluster id: {oc.id}")
 
 
-def run_columbia():
+def run_columbia(start_id: int):
     """
     Update opinion order for columbia clusters
+    :param start_id: skip any id lower than this value
     """
 
     # Get all columbia cluster ids with more than one opinion
@@ -475,6 +479,9 @@ def run_columbia():
         .values_list("id", flat=True)
     )
 
+    if start_id:
+        clusters = filter(lambda x: x >= start_id, clusters)
+
     for cluster_id in clusters:
         logger.info(f"Processing cluster id: {cluster_id}")
         (
@@ -589,10 +596,14 @@ def add_arguments(self, parser):
             help="Fix columbia opinions order",
         )
 
-    def handle(self, *args, **options):
-        print("harvard", options["process_harvard"])
-        print("columbia", options["process_columbia"])
+        parser.add_argument(
+            "--start-id",
+            type=int,
+            default=0,
+            help="Skip any id lower than this value",
+        )
 
+    def handle(self, *args, **options):
         if options["process_harvard"] and options["process_columbia"]:
             print(
                 "You can only select one option process-harvard or process-columbia"
@@ -600,7 +611,7 @@ def handle(self, *args, **options):
             return
 
         if options["process_harvard"]:
-            run_harvard()
+            run_harvard(options["start_id"])
 
         if options["process_columbia"]:
-            run_columbia()
+            run_columbia(options["start_id"])

From 71ec6241cc0c06d4aaebfb71a0cec188eb39a11a Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 30 Aug 2023 19:47:56 -0600
Subject: [PATCH 13/50] feat(opinion_order): add new param for command

---
 .../commands/update_opinions_order.py         | 27 ++++++++++++++-----
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index d4d915695d..7a46530a82 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -11,8 +11,6 @@
 from cl.lib.string_diff import get_cosine_similarity
 from cl.search.models import Opinion, OpinionCluster
 
-# TODO Should we add a flag to know that the cluster has been processed?
-
 
 def match_text_lists(
     file_opinions_list: list[str], cl_opinions_list: list[str]
@@ -419,11 +417,12 @@ def get_opinions_columbia_xml(xml_filepath: FieldFile) -> list:
     return opinions
 
 
-def run_harvard(start_id: int):
+def run_harvard(start_id: int, end_id: int):
     """
     We assume that harvard data is already ordered, we just need to fill the order
     field in each opinion
     :param start_id: skip any id lower than this value
+    :param end_id: skip any id greater than this value
     """
 
     # Get all harvard clusters with more than one opinion
@@ -437,6 +436,9 @@ def run_harvard(start_id: int):
     if start_id:
         clusters = clusters.filter(pk__gte=start_id)
 
+    if end_id:
+        clusters = clusters.filter(pk__lte=end_id)
+
     # cluster_id: 4697264, the combined opinion will go to the last position
     for oc in clusters:
         logger.info(f"Processing cluster id: {oc}")
@@ -465,10 +467,11 @@ def run_harvard(start_id: int):
         logger.info(msg=f"Opinions reordered for cluster id: {oc.id}")
 
 
-def run_columbia(start_id: int):
+def run_columbia(start_id: int, end_id: int):
     """
     Update opinion order for columbia clusters
     :param start_id: skip any id lower than this value
+    :param end_id: skip any id greater than this value
     """
 
     # Get all columbia cluster ids with more than one opinion
@@ -482,6 +485,9 @@ def run_columbia(start_id: int):
     if start_id:
         clusters = filter(lambda x: x >= start_id, clusters)
 
+    if end_id:
+        clusters = filter(lambda x: x <= end_id, clusters)
+
     for cluster_id in clusters:
         logger.info(f"Processing cluster id: {cluster_id}")
         (
@@ -600,7 +606,14 @@ def add_arguments(self, parser):
             "--start-id",
             type=int,
             default=0,
-            help="Skip any id lower than this value",
+            help="Start id for a range of clusters (inclusive)",
+        )
+
+        parser.add_argument(
+            "--end-id",
+            type=int,
+            default=0,
+            help="End id for a range of clusters (inclusive)",
         )
 
     def handle(self, *args, **options):
@@ -611,7 +624,7 @@ def handle(self, *args, **options):
             return
 
         if options["process_harvard"]:
-            run_harvard(options["start_id"])
+            run_harvard(options["start_id"], options["end_id"])
 
         if options["process_columbia"]:
-            run_columbia(options["start_id"])
+            run_columbia(options["start_id"], options["end_id"])

From f4615b07d931f93b7a2409438d17f85d6582f4a9 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Thu, 31 Aug 2023 13:35:04 -0600
Subject: [PATCH 14/50] feat(opinion_order): update typing

---
 .../commands/update_opinions_order.py         | 20 +++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index 7a46530a82..480f2ef6d6 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -1,5 +1,5 @@
 import re
-from typing import Any, Optional
+from typing import Any, List, Optional
 
 from bs4 import BeautifulSoup, NavigableString, Tag
 from django.core.management import BaseCommand
@@ -13,12 +13,12 @@
 
 
 def match_text_lists(
-    file_opinions_list: list[str], cl_opinions_list: list[str]
-) -> dict[int, Any]:
+    file_opinions_list: List[Any], cl_opinions_list: List[Any]
+) -> dict[int, int]:
     """Generate matching lists above threshold
     :param file_opinions_list: Opinions from file
     :param cl_opinions_list: CL opinions
-    :return: Matches if found or False
+    :return: Matches if found or empty dict
     """
     # We import this here to avoid a circular import
     from cl.corpus_importer.management.commands.harvard_opinions import (
@@ -507,8 +507,16 @@ def run_columbia(start_id: int, end_id: int):
 
         if cl_cleaned_opinions and columbia_opinions:
             matches = match_text_lists(
-                [op.get("opinion") for op in columbia_opinions],
-                [op.get("opinion") for op in cl_cleaned_opinions],
+                [
+                    op.get("opinion")
+                    for op in columbia_opinions
+                    if op.get("opinion")
+                ],
+                [
+                    op.get("opinion")
+                    for op in cl_cleaned_opinions
+                    if op.get("opinion")
+                ],
             )
 
             if matches:

From 3ceff218c23c77201b3b78fd7bda838db09a2706 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Thu, 31 Aug 2023 14:30:00 -0600
Subject: [PATCH 15/50] feat(opinion_order): temporary read xml files from s3

it requires to change the AWS_STORAGE_BUCKET_NAME env variable to read files from private storage
---
 .../management/commands/update_opinions_order.py   | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index 480f2ef6d6..0b96a5dae1 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -134,6 +134,16 @@ def get_opinions_columbia_xml(xml_filepath: FieldFile) -> list:
 
     data = {}  # type: dict
 
+    if "/home/mlissner" in str(xml_filepath):
+        # Temporary replace the path with the correct from S3, this way we read them
+        # directly from S3, we need the files in /sources/columbia/opinions/ in
+        # com-courtlistener-storage bucket
+        # TODO discuss this
+        xml_filepath.name = xml_filepath.name.replace(
+            "/home/mlissner", "/sources"
+        )
+
+    # print(f"Opening {xml_filepath.url}")
     with xml_filepath.open("r") as f:
         file_content = f.read()
 
@@ -502,7 +512,9 @@ def run_columbia(start_id: int, end_id: int):
             try:
                 columbia_opinions = get_opinions_columbia_xml(xml_path)
             except FileNotFoundError:
-                logger.warning(f"Xml file not found, cluster id: {cluster_id}")
+                logger.warning(
+                    f"Xml file not found in {xml_path}, cluster id: {cluster_id}"
+                )
                 continue
 
         if cl_cleaned_opinions and columbia_opinions:

From 0bd9b9ac3bf2a511633d93de0bdebc49da06ca5d Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Tue, 5 Sep 2023 15:52:59 -0600
Subject: [PATCH 16/50] feat(update_opinions_order): argument added to point to
 the mounted directory with xml files

---
 .../commands/update_opinions_order.py         | 47 +++++++++++++------
 1 file changed, 32 insertions(+), 15 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index 0b96a5dae1..f6c72811d8 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -1,3 +1,4 @@
+import os.path
 import re
 from typing import Any, List, Optional
 
@@ -80,7 +81,6 @@ def get_opinion_content(
             # We store the field because we are using S3 for storage and that backend
             # doesn't support absolute paths
             xml_path = op.local_path
-            # print("url", op.local_path.url)
         content = None
         if len(op.html_with_citations) > 1:
             content = op.html_with_citations
@@ -113,9 +113,10 @@ def get_opinion_content(
     return xml_path, cl_cleaned_opinions, start_position, combined_opinion
 
 
-def get_opinions_columbia_xml(xml_filepath: FieldFile) -> list:
+def get_opinions_columbia_xml(xml_filepath: FieldFile, xml_dir: str) -> list:
     """Convert xml data into dict
     :param xml_filepath: path of xml file
+    :param xml_dir: absolute path to the directory with columbia xml files
     :return: dict with data
     """
 
@@ -134,17 +135,17 @@ def get_opinions_columbia_xml(xml_filepath: FieldFile) -> list:
 
     data = {}  # type: dict
 
-    if "/home/mlissner" in str(xml_filepath):
-        # Temporary replace the path with the correct from S3, this way we read them
-        # directly from S3, we need the files in /sources/columbia/opinions/ in
-        # com-courtlistener-storage bucket
-        # TODO discuss this
-        xml_filepath.name = xml_filepath.name.replace(
-            "/home/mlissner", "/sources"
+    if "/home/mlissner/columbia/opinions/" in str(xml_filepath):
+        filepath = str(
+            xml_filepath.name.replace("/home/mlissner/columbia/opinions/", "")
         )
+        # fix file path temporarily
+        new_xml_filepath = os.path.join(xml_dir, filepath)
+    else:
+        logger.info(f"Can't fix xml file path: {xml_filepath}")
+        raise FileNotFoundError
 
-    # print(f"Opening {xml_filepath.url}")
-    with xml_filepath.open("r") as f:
+    with open(new_xml_filepath, "r", encoding="utf-8") as f:
         file_content = f.read()
 
         data["unpublished"] = False
@@ -477,11 +478,12 @@ def run_harvard(start_id: int, end_id: int):
         logger.info(msg=f"Opinions reordered for cluster id: {oc.id}")
 
 
-def run_columbia(start_id: int, end_id: int):
+def run_columbia(start_id: int, end_id: int, xml_dir: str):
     """
     Update opinion order for columbia clusters
     :param start_id: skip any id lower than this value
     :param end_id: skip any id greater than this value
+    :param xml_dir: absolute path to the directory with columbia xml files
     """
 
     # Get all columbia cluster ids with more than one opinion
@@ -510,7 +512,9 @@ def run_columbia(start_id: int, end_id: int):
         columbia_opinions = None
         if xml_path:
             try:
-                columbia_opinions = get_opinions_columbia_xml(xml_path)
+                columbia_opinions = get_opinions_columbia_xml(
+                    xml_path, xml_dir
+                )
             except FileNotFoundError:
                 logger.warning(
                     f"Xml file not found in {xml_path}, cluster id: {cluster_id}"
@@ -622,6 +626,12 @@ def add_arguments(self, parser):
             help="Fix columbia opinions order",
         )
 
+        parser.add_argument(
+            "--xml-dir",
+            required=False,
+            help="The absolute path to the directory with columbia xml files",
+        )
+
         parser.add_argument(
             "--start-id",
             type=int,
@@ -646,5 +656,12 @@ def handle(self, *args, **options):
         if options["process_harvard"]:
             run_harvard(options["start_id"], options["end_id"])
 
-        if options["process_columbia"]:
-            run_columbia(options["start_id"], options["end_id"])
+        if options["process_columbia"] and options["xml_dir"]:
+            run_columbia(
+                options["start_id"], options["end_id"], options["xml_dir"]
+            )
+
+        if options["process_columbia"] and not options["xml_dir"]:
+            print(
+                "Argument --xml-dir required to read xml files from mounted directory"
+            )

From 7b16b42d99c1f8b9076d47ddb0ba916df21b564a Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Tue, 5 Sep 2023 16:09:04 -0600
Subject: [PATCH 17/50] feat(update_opinions_order): fix mypy error

---
 .../management/commands/update_opinions_order.py              | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index f6c72811d8..05a1bdb7f5 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -136,8 +136,8 @@ def get_opinions_columbia_xml(xml_filepath: FieldFile, xml_dir: str) -> list:
     data = {}  # type: dict
 
     if "/home/mlissner/columbia/opinions/" in str(xml_filepath):
-        filepath = str(
-            xml_filepath.name.replace("/home/mlissner/columbia/opinions/", "")
+        filepath = str(xml_filepath).replace(
+            "/home/mlissner/columbia/opinions/", ""
         )
         # fix file path temporarily
         new_xml_filepath = os.path.join(xml_dir, filepath)

From d49708adfacecfa075dd3a298a8cdc867532c008 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Thu, 21 Sep 2023 13:27:05 -0600
Subject: [PATCH 18/50] fix(opinion_order): Update poetry.lock

---
 poetry.lock | 27 ++++++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 90d12b08bf..a7f14d94cb 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand.
 
 [[package]]
 name = "amqp"
@@ -1097,6 +1097,17 @@ files = [
 [package.dependencies]
 Django = ">=3.2"
 
+[[package]]
+name = "django-ordered-model"
+version = "3.7.4"
+description = "Allows Django models to be ordered and provides a simple admin interface for reordering them."
+optional = false
+python-versions = "*"
+files = [
+    {file = "django-ordered-model-3.7.4.tar.gz", hash = "sha256:f258b9762525c00a53009e82f8b8bf2a3aa315e8b453e281e8fdbbfe2b8cb3ba"},
+    {file = "django_ordered_model-3.7.4-py3-none-any.whl", hash = "sha256:dfcd3183fe0749dad1c9971cba1d6240ce7328742a30ddc92feca41107bb241d"},
+]
+
 [[package]]
 name = "django-override-storage"
 version = "0.3.2"
@@ -2577,6 +2588,16 @@ files = [
     {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"},
     {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"},
     {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"},
     {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"},
     {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"},
     {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"},
@@ -2721,7 +2742,7 @@ name = "ndg-httpsclient"
 version = "0.5.1"
 description = "Provides enhanced HTTPS support for httplib and urllib2 using PyOpenSSL"
 optional = false
-python-versions = ">=2.7,<3.0.dev0 || >=3.4.dev0"
+python-versions = ">=2.7,<3.0.0 || >=3.4.0"
 files = [
     {file = "ndg_httpsclient-0.5.1-py2-none-any.whl", hash = "sha256:d2c7225f6a1c6cf698af4ebc962da70178a99bcde24ee6d1961c4f3338130d57"},
     {file = "ndg_httpsclient-0.5.1-py3-none-any.whl", hash = "sha256:dd174c11d971b6244a891f7be2b32ca9853d3797a72edb34fa5d7b07d8fff7d4"},
@@ -5096,4 +5117,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.11, <3.12"
-content-hash = "96bb211d8a53b99b00d7d118fd7f90f35dcf27b9a940532d8ea814eecc5cbd6b"
+content-hash = "6ce30a4f34302d7e0ca29bf1f9794ad2fc1759cef8312bcfebb5550a33cb0019"

From 9ae8dc891f764a471729dc8131e0e071bd9f9e7c Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Fri, 29 Sep 2023 17:15:04 -0600
Subject: [PATCH 19/50] fix(opinion_order): Update poetry.lock

---
 poetry.lock | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/poetry.lock b/poetry.lock
index c9b8295b88..a7fe6b3511 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1097,6 +1097,17 @@ files = [
 [package.dependencies]
 Django = ">=3.2"
 
+[[package]]
+name = "django-ordered-model"
+version = "3.7.4"
+description = "Allows Django models to be ordered and provides a simple admin interface for reordering them."
+optional = false
+python-versions = "*"
+files = [
+    {file = "django-ordered-model-3.7.4.tar.gz", hash = "sha256:f258b9762525c00a53009e82f8b8bf2a3aa315e8b453e281e8fdbbfe2b8cb3ba"},
+    {file = "django_ordered_model-3.7.4-py3-none-any.whl", hash = "sha256:dfcd3183fe0749dad1c9971cba1d6240ce7328742a30ddc92feca41107bb241d"},
+]
+
 [[package]]
 name = "django-override-storage"
 version = "0.3.2"
@@ -2577,6 +2588,16 @@ files = [
     {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"},
     {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"},
     {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"},
+    {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"},
     {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"},
     {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"},
     {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"},
@@ -5096,4 +5117,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.11, <3.12"
-content-hash = "4b906615444a53e1a26780aa6a3742c0e7844c307c6a991b059ee4de0cb177a8"
+content-hash = "6da7f3d3b926ac02caf9720eda2b6c81ae71fe04aafb6a0a35f83e52b4c412cc"

From 7702a082063ed2b80b6f803a1a6afa7af6347887 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Fri, 29 Sep 2023 18:03:45 -0600
Subject: [PATCH 20/50] fix(opinion_order): Rename migrations

---
 .../{0020_order_opinions.py => 0022_order_opinions.py}          | 2 +-
 .../{0020_order_opinions.sql => 0022_order_opinions.sql}        | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename cl/search/migrations/{0020_order_opinions.py => 0022_order_opinions.py} (99%)
 rename cl/search/migrations/{0020_order_opinions.sql => 0022_order_opinions.sql} (100%)

diff --git a/cl/search/migrations/0020_order_opinions.py b/cl/search/migrations/0022_order_opinions.py
similarity index 99%
rename from cl/search/migrations/0020_order_opinions.py
rename to cl/search/migrations/0022_order_opinions.py
index f614156360..763c98e8fc 100644
--- a/cl/search/migrations/0020_order_opinions.py
+++ b/cl/search/migrations/0022_order_opinions.py
@@ -7,7 +7,7 @@
 
 class Migration(migrations.Migration):
     dependencies = [
-        ("search", "0019_add_docket_source_noop"),
+        ("search", "0021_add_pghistory_courthouse"),
     ]
 
     operations = [
diff --git a/cl/search/migrations/0020_order_opinions.sql b/cl/search/migrations/0022_order_opinions.sql
similarity index 100%
rename from cl/search/migrations/0020_order_opinions.sql
rename to cl/search/migrations/0022_order_opinions.sql

From 3f173fef6ac191d2c1a0b43f38de3917a9f9b9bf Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 29 Nov 2023 13:08:52 -0600
Subject: [PATCH 21/50] fix(opinions_order): rename migrations

update poetry.lock
---
 ...der_opinions.py => 0024_order_opinions.py} |  2 +-
 ...r_opinions.sql => 0024_order_opinions.sql} |  0
 poetry.lock                                   | 30 ++++++++-----------
 3 files changed, 14 insertions(+), 18 deletions(-)
 rename cl/search/migrations/{0022_order_opinions.py => 0024_order_opinions.py} (99%)
 rename cl/search/migrations/{0022_order_opinions.sql => 0024_order_opinions.sql} (100%)

diff --git a/cl/search/migrations/0022_order_opinions.py b/cl/search/migrations/0024_order_opinions.py
similarity index 99%
rename from cl/search/migrations/0022_order_opinions.py
rename to cl/search/migrations/0024_order_opinions.py
index 763c98e8fc..1abaed4d76 100644
--- a/cl/search/migrations/0022_order_opinions.py
+++ b/cl/search/migrations/0024_order_opinions.py
@@ -7,7 +7,7 @@
 
 class Migration(migrations.Migration):
     dependencies = [
-        ("search", "0021_add_pghistory_courthouse"),
+        ("search", "0023_add_docket_sources_noop"),
     ]
 
     operations = [
diff --git a/cl/search/migrations/0022_order_opinions.sql b/cl/search/migrations/0024_order_opinions.sql
similarity index 100%
rename from cl/search/migrations/0022_order_opinions.sql
rename to cl/search/migrations/0024_order_opinions.sql
diff --git a/poetry.lock b/poetry.lock
index cdb46a7ef7..30080de3f7 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1101,6 +1101,17 @@ files = [
 [package.dependencies]
 Django = ">=3.2"
 
+[[package]]
+name = "django-ordered-model"
+version = "3.7.4"
+description = "Allows Django models to be ordered and provides a simple admin interface for reordering them."
+optional = false
+python-versions = "*"
+files = [
+    {file = "django-ordered-model-3.7.4.tar.gz", hash = "sha256:f258b9762525c00a53009e82f8b8bf2a3aa315e8b453e281e8fdbbfe2b8cb3ba"},
+    {file = "django_ordered_model-3.7.4-py3-none-any.whl", hash = "sha256:dfcd3183fe0749dad1c9971cba1d6240ce7328742a30ddc92feca41107bb241d"},
+]
+
 [[package]]
 name = "django-override-storage"
 version = "0.3.2"
@@ -1493,18 +1504,6 @@ files = [
     {file = "fast_diff_match_patch-2.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c4cb3aa60664bcafd070915cc0f148c63da3a20babeca29bdf24e6aee80ff481"},
     {file = "fast_diff_match_patch-2.0.1-cp310-cp310-win32.whl", hash = "sha256:3423c373c168fcbc56fa488960248ce086dd686402817aa5d4d967537fff1203"},
     {file = "fast_diff_match_patch-2.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:f8b5595277f99b4908ae9bab33548bfe7497a99a1f5dc5c277a4f36051dcf993"},
-    {file = "fast_diff_match_patch-2.0.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a682a72b93e07902b9af3bc591fe365da4024888cceb308f04cdec59eeb3602d"},
-    {file = "fast_diff_match_patch-2.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d30e7fb0de87e02db88cda54f6c57a9f7d789e4d0922cfed41f61a1d4415408b"},
-    {file = "fast_diff_match_patch-2.0.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:58b273cecb941bef392bda622a534de03e6ea8d3186d4d07745375cce9db0833"},
-    {file = "fast_diff_match_patch-2.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0e39bb9ca0b7632a15e85cb6b0c4c575010e6fb6e43e5714ee53c7cef1aa4135"},
-    {file = "fast_diff_match_patch-2.0.1-cp311-cp311-win32.whl", hash = "sha256:b4d4e6aa5c6a4af0b6c66be593021579f4693c94b848084b89e6783180361db6"},
-    {file = "fast_diff_match_patch-2.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:c1154830dbcb83d1c9ed24f43b1e8226cafc7ce46b6e0971e866bdf513ecc216"},
-    {file = "fast_diff_match_patch-2.0.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c6723cfba7bd9fb712e179acbc9c6cb526076612c0325ad4f1066f3bd176064a"},
-    {file = "fast_diff_match_patch-2.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:378251cc37cd21d14802669a3453f026ed3aa07c07a8aa2daabeefd14a0e0a36"},
-    {file = "fast_diff_match_patch-2.0.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:7a2e1ce344438b14400a91b65c79c39345b0ce70a0a8797e88b14485577b5fc0"},
-    {file = "fast_diff_match_patch-2.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:cc7285d9a1fbf8990361ce37728202fd6ebee6ddc6cfe6fb15a19905e562f304"},
-    {file = "fast_diff_match_patch-2.0.1-cp312-cp312-win32.whl", hash = "sha256:3aaeb207fe586979ecb194ecc2c81ba979d351cd0bdaba8489ce4be0f55206dc"},
-    {file = "fast_diff_match_patch-2.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:4d759ec2d79c638407f32c29dc348fcef6e6a1659927056527b0939a1ab31ca5"},
     {file = "fast_diff_match_patch-2.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:e5205e4f3b820f65138947e0d42959b6910fd959c8e5e8f4fc72472f6fec9d8b"},
     {file = "fast_diff_match_patch-2.0.1-cp36-cp36m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fa1212d0200169e93392805957ca6ae351bfc51282c5119fb231f968c7e12fbc"},
     {file = "fast_diff_match_patch-2.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d30a9db041dfee960a9c8a35fa99685b1f29530f52f69fef1e3cc02867f0b9"},
@@ -1545,9 +1544,6 @@ files = [
     {file = "fast_diff_match_patch-2.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:58ada748637821445df3cfcb21df412136fb69b8e677ea364aa9ca7a8facb048"},
     {file = "fast_diff_match_patch-2.0.1-cp39-cp39-win32.whl", hash = "sha256:b07808e98f0bfcd557281126135b24729a30ee10ccc2db4d3358fb2f18ac1879"},
     {file = "fast_diff_match_patch-2.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:6f2202d1e9d225918ea3803f66ca9c99d080c8ba5094c438680eb2c8dfd2e48c"},
-    {file = "fast_diff_match_patch-2.0.1-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8ecff01b3d10d6bed965a1591e37597df118ab0bcc98a3f59a724a0d9bd63fb1"},
-    {file = "fast_diff_match_patch-2.0.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a92ba0d543524234a17ea2da4892a9752273cfdfed528e581f0f76cbd78cf991"},
-    {file = "fast_diff_match_patch-2.0.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:dd5b3b99bb7c14ce8ea5ab184afb2cc6796dac71439b2cfc6fb6227a6846aef3"},
     {file = "fast_diff_match_patch-2.0.1-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:daa821a8dcbc1026f7f8cc177ca599bcfbaaddccdf90bc1ad1e44255b1c239e1"},
     {file = "fast_diff_match_patch-2.0.1-pp37-pypy37_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:27de6dc97e7d6dc207585d778ace58e7cc364b8383e5412164224d52ad4099b5"},
     {file = "fast_diff_match_patch-2.0.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec27f797b1ecee79c3d76c9a081a6c20fd89068b41ba3b84a6ebe48317c5c46c"},
@@ -2750,7 +2746,7 @@ name = "ndg-httpsclient"
 version = "0.5.1"
 description = "Provides enhanced HTTPS support for httplib and urllib2 using PyOpenSSL"
 optional = false
-python-versions = ">=2.7,<3.0.dev0 || >=3.4.dev0"
+python-versions = ">=2.7,<3.0.0 || >=3.4.0"
 files = [
     {file = "ndg_httpsclient-0.5.1-py2-none-any.whl", hash = "sha256:d2c7225f6a1c6cf698af4ebc962da70178a99bcde24ee6d1961c4f3338130d57"},
     {file = "ndg_httpsclient-0.5.1-py3-none-any.whl", hash = "sha256:dd174c11d971b6244a891f7be2b32ca9853d3797a72edb34fa5d7b07d8fff7d4"},
@@ -5212,4 +5208,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.11, <3.12"
-content-hash = "f3edde54a6877b5506669d8d8354b28d8b7c6dffbb08c4b0954079680cec63dc"
+content-hash = "ce20135f86ae0bc9264359886c298076a90c74d5a30256f7db4541812ffb4f76"

From 9dedd433ca589f7db5f4d71edd7318fbd34e3aa8 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 29 Nov 2023 19:14:00 -0600
Subject: [PATCH 22/50] fix(opinions_order): code refactored

NOTE: functions found in columbia_utils.py and utils.py, were temporarily added in the command,when the necessary changes are combined we need to remove the functions and import them from the utils.
---
 .../commands/update_opinions_order.py         | 833 ++++++++++--------
 1 file changed, 461 insertions(+), 372 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index 05a1bdb7f5..ae931ba4b7 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -5,18 +5,356 @@
 from bs4 import BeautifulSoup, NavigableString, Tag
 from django.core.management import BaseCommand
 from django.db.models import Count
-from django.db.models.fields.files import FieldFile
 
 from cl.corpus_importer.utils import similarity_scores
 from cl.lib.command_utils import logger
 from cl.lib.string_diff import get_cosine_similarity
-from cl.search.models import Opinion, OpinionCluster
+from cl.search.models import SOURCES, Opinion, OpinionCluster
+
+VALID_COLUMBIA_SOURCES = [
+    key
+    for key in dict(SOURCES.NAMES).keys()
+    if SOURCES.COLUMBIA_ARCHIVE in key
+]
+
+VALID_HARVARD_SOURCES = [
+    key for key in dict(SOURCES.NAMES).keys() if SOURCES.HARVARD_CASELAW in key
+]
+
+
+# TODO remove the funcitions below and import them from utils.py and columbia_utils.py when those changes get merged
+
+
+SIMPLE_TAGS = [
+    "attorneys",
+    "caption",
+    "citation",
+    "court",
+    "date",
+    "docket",
+    "hearing_date",
+    "panel",
+    "posture",
+    "reporter_caption",
+]
+
+
+class EmptyOpinionException(Exception):
+    """An exception for opinions that raise a ZeroDivisionError Exception due empty
+    opinion tag or empty opinion content in cl"""
+
+    def __init__(self, message: str) -> None:
+        self.message = message
+
+
+def read_xml_to_soup(filepath: str) -> BeautifulSoup:
+    """This function reads the xml file, fixes the bad tags in columbia xml
+    files and returns a BeautifulSoup object
+
+    :param filepath: path to xml file
+    :return: BeautifulSoup object of parsed content
+    """
+    with open(filepath, "r", encoding="utf-8") as f:
+        file_content = f.read()
+        # Sometimes opening and ending tag mismatch (e.g. ed7c6b39dcb29c9c.xml)
+        file_content = file_content.replace(
+            "</footnote_body></block_quote>", "</block_quote></footnote_body>"
+        )
+        # Fix opinion with invalid attribute
+        if "<opinion unpublished=true>" in file_content:
+            file_content = file_content.replace(
+                "<opinion unpublished=true>", "<opinion unpublished='true'>"
+            )
+            file_content = file_content.replace("<unpublished>", "").replace(
+                "</unpublished>", ""
+            )
+    return BeautifulSoup(file_content, "lxml")
+
+
+def add_floating_opinion(
+    opinions: list, floating_content: list, opinion_order: int
+) -> list:
+    """We have found floating opinions in bs object, we keep the opinion
+    content as a new opinion
+
+    :param opinions: a list with opinions found
+    :param floating_content: content that is not in known non-opinion tags
+    :param opinion_order: opinion position
+    :return: updated list of opinions
+    """
+    op_type = "opinion"
+    if opinions:
+        if opinions[-1].get("type"):
+            # Use type of previous opinion if exists
+            op_type = opinions[-1].get("type")
+
+    # Get rid of double spaces from floating content
+    opinion_content = re.sub(
+        " +", " ", "\n".join(floating_content)
+    ).strip()  # type: str
+    if opinion_content:
+        opinions.append(
+            {
+                "opinion": opinion_content,
+                "order": opinion_order,
+                "byline": "",
+                "type": op_type,
+            }
+        )
+    return opinions
+
+
+def extract_columbia_opinions(
+    outer_opinion: BeautifulSoup,
+) -> list[Optional[dict]]:
+    """We extract all possible opinions from BeautifulSoup, with and without
+    author, and we create new opinions if floating content exists(content that
+    is not explicitly defined within an opinion tag or doesn't have an author)
+
+    :param outer_opinion: element containing all xml tags
+    :return: list of opinion dicts
+    """
+    opinions: list = []
+    floating_content = []
+    order = 0
+
+    # We iterate all content to look for all possible opinions
+    for i, content in enumerate(outer_opinion):  # type: int, Tag
+        if isinstance(content, NavigableString):
+            # We found a raw string, store it
+            floating_content.append(str(content))
+        else:
+            if content.name in SIMPLE_TAGS + [
+                "citation_line",
+                "opinion_byline",
+                "dissent_byline",
+                "concurrence_byline",
+            ]:
+                # Ignore these tags, it will be processed later
+                continue
+            elif content.name in [
+                "opinion_text",
+                "dissent_text",
+                "concurrence_text",
+            ]:
+                if floating_content:
+                    # We have found an opinion, but there is floating
+                    # content, we create a dict with the opinion using the
+                    # floating content with default type = "opinion"
+                    opinions = add_floating_opinion(
+                        opinions, floating_content, order
+                    )
+                    floating_content = []
+
+                byline = content.find_previous_sibling()
+                opinion_author = ""
+                if byline and "_byline" in byline.name:
+                    opinion_author = byline.get_text()
+
+                opinion_content = re.sub(
+                    " +", " ", content.decode_contents()
+                ).strip()
+                if opinion_content:
+                    # Now we create a dict with current opinion
+                    opinions.append(
+                        {
+                            "opinion": opinion_content,
+                            "order": order,
+                            "byline": opinion_author,
+                            "type": content.name.replace("_text", ""),
+                        }
+                    )
+                    order = order + 1
+
+            else:
+                if content.name not in SIMPLE_TAGS + ["syllabus"]:
+                    # We store content that is not inside _text tag and is
+                    # not in one of the known non-opinion tags
+                    floating_content.append(str(content))
+
+    # Combine the new content into another opinion. great.
+    if floating_content:
+        # If we end to go through all the found opinions and if we still
+        # have floating content out there, we create a new opinion with the
+        # last type of opinion
+        opinions = add_floating_opinion(opinions, floating_content, order)
+    return opinions
+
+
+def is_per_curiam_opinion(
+    content: Optional[str], byline: Optional[str]
+) -> bool:
+    """Check if opinion author is per curiam
+    :param content: opinion content
+    :param byline: opinion text author
+    :return: True if opinion author is per curiam
+    """
+    if byline and "per curiam" in byline[:1000].lower():
+        return True
+    if content and "per curiam" in content[:1000].lower():
+        return True
+    return False
+
+
+def merge_opinions(
+    opinions: list, content: list, current_order: int
+) -> tuple[list, int]:
+    """Merge last and previous opinion if are the same type or create a new
+    opinion if merge is not possible
+
+    :param opinions: list of opinions that is being updated constantly
+    :param content: list of opinions without an author
+    :param current_order: opinion position
+    :return: updated list of opinions
+    """
+
+    # We check if the previous stored opinion matches the type of the
+    # content, and we store the opinion dict temporary
+    relevant_opinions = (
+        [opinions[-1]]
+        if opinions and opinions[-1]["type"] == content[0].get("type")
+        else []
+    )
+
+    if relevant_opinions:
+        relevant_opinions[-1]["opinion"] += "\n" + "\n".join(
+            [f.get("opinion") for f in content if f.get("opinion")]
+        )
+
+    else:
+        # No relevant opinions found, create a new opinion with the content
+        opinion_content = "\n".join(
+            [f.get("opinion") for f in content if f.get("opinion")]
+        )
+        new_opinion = {
+            "byline": None,
+            "type": content[0].get("type"),
+            "opinion": opinion_content,
+            "order": current_order,
+            "per_curiam": is_per_curiam_opinion(opinion_content, None),
+        }
+        opinions.append(new_opinion)
+        current_order = current_order + 1
+
+    return opinions, current_order
+
+
+def process_extracted_opinions(extracted_opinions: list) -> list:
+    """We read the extracted data in extract_opinions function to merge all
+    possible floating opinions (it is not explicitly defined within an opinion
+    tag or doesn't have an author)
+
+    :param extracted_opinions: list of opinions obtained from xml file
+    :return: a list with extracted and processed opinions
+    """
+
+    opinions: list = []
+    authorless_content = []
+    order = 0
+
+    for i, found_content in enumerate(extracted_opinions, start=1):
+        byline = found_content.get("byline")
+        if not byline:
+            # Opinion has no byline, store opinion content
+            authorless_content.append(found_content)
+
+        if byline:
+            # Opinion has byline, get opinion type and content
+            opinion_type = found_content.get("type")
+            opinion_content = found_content.get("opinion", "")
+            # Store content that doesn't match the current opinion type
+            alternative_authorless_content = [
+                content
+                for content in authorless_content
+                if content.get("type") != opinion_type
+            ]
+            # Keep content that matches the current type
+            authorless_content = [
+                op_content
+                for op_content in authorless_content
+                if op_content.get("type") == opinion_type
+            ]
+
+            if alternative_authorless_content:
+                # Keep floating text that are not from the same type,
+                # we need to create a separate opinion for those,
+                # for example: in 2713f39c5a8e8684.xml we have an opinion
+                # without an author, and the next opinion with an author is
+                # a dissent opinion, we can't combine both
+                opinions, order = merge_opinions(
+                    opinions, alternative_authorless_content, order
+                )
+
+            opinion_content = (
+                "\n".join(
+                    [
+                        f.get("opinion")
+                        for f in authorless_content
+                        if f.get("type") == opinion_type
+                    ]
+                )
+                + "\n\n"
+                + opinion_content
+            )
+
+            # Add new opinion
+            new_opinion = {
+                "byline": byline,
+                "type": opinion_type,
+                "opinion": opinion_content,
+                "order": order,
+                "per_curiam": is_per_curiam_opinion(opinion_content, byline),
+            }
+
+            opinions.append(new_opinion)
+            order = order + 1
+            authorless_content = []
+
+        if len(extracted_opinions) == i and authorless_content:
+            # If is the last opinion, and we still have opinions without
+            # byline, create an opinion without an author and the contents
+            # that couldn't be merged
+            opinions, order = merge_opinions(
+                opinions, authorless_content, order
+            )
+
+    return opinions
+
+
+def map_opinion_types(opinions=None) -> None:
+    """Map opinion type to model field choice
+
+    :param opinions: a list that contains all opinions as dict elements
+    :return: None
+    """
+
+    if opinions is None:
+        opinions = []
+    lead = False
+    for op in opinions:
+        op_type = op.get("type")
+        # Only first opinion with "opinion" type is a lead opinion, the next
+        # opinion with "opinion" type is an addendum
+        if not lead and op_type and op_type == "opinion":
+            lead = True
+            op["type"] = "020lead"
+            continue
+        elif lead and op_type and op_type == "opinion":
+            op["type"] = "050addendum"
+        elif op_type and op_type == "dissent":
+            op["type"] = "040dissent"
+        elif op_type and op_type == "concurrence":
+            op["type"] = "030concurrence"
+
+
+# TODO ------------------------ remove until here -------------------------------
 
 
 def match_text_lists(
     file_opinions_list: List[Any], cl_opinions_list: List[Any]
 ) -> dict[int, int]:
     """Generate matching lists above threshold
+
     :param file_opinions_list: Opinions from file
     :param cl_opinions_list: CL opinions
     :return: Matches if found or empty dict
@@ -50,10 +388,11 @@ def match_text_lists(
     return matches
 
 
-def get_opinion_content(
+def get_opinions_cleaned_content(
     cluster_id,
-) -> tuple[Optional[FieldFile], list[dict], int, bool]:
-    """Get the opinions content for a cluster object
+) -> tuple[Optional[str], list[dict], int, bool]:
+    """Get cleaned opinions content for a cluster object
+
     :param cluster_id: Cluster ID for a set of opinions
     :return: (xml path, list of extracted opinions, start position, True if combined
     opinions exists in cluster)
@@ -67,380 +406,108 @@ def get_opinion_content(
         type="010combined"
     )
     xml_path = None
-    combined_opinion = False
+    cluster_has_combined_opinion = False
     if combined_opinions_cluster:
         # the combined opinion will be displayed at beginning
         start_position = combined_opinions_cluster.count()
-        combined_opinion = True
+        cluster_has_combined_opinion = True
     else:
         # we don't have combined opinions, we start ordering from 0 to n
         start_position = 0
 
     for i, op in enumerate(opinions_from_cluster.exclude(type="010combined")):
         if op.local_path and not xml_path:
-            # We store the field because we are using S3 for storage and that backend
-            # doesn't support absolute paths
-            xml_path = op.local_path
-        content = None
-        if len(op.html_with_citations) > 1:
-            content = op.html_with_citations
-        elif len(op.html_columbia) > 1:
-            content = op.html_columbia
-        elif len(op.html_lawbox) > 1:
-            content = op.html_lawbox
-        elif len(op.plain_text) > 1:
-            content = op.plain_text
-        elif len(op.html) > 1:
-            content = op.html
-        elif len(op.xml_harvard) > 1:
-            content = op.xml_harvard
-        if content:
-            soup = BeautifulSoup(content, features="html.parser")
-            prep_text = re.sub(
-                r"[^a-zA-Z0-9 ]", "", soup.getText(separator=" ").lower()
-            )
-            prep_text = re.sub(" +", " ", prep_text)
-            cl_cleaned_opinions.append(
-                {
-                    "id": op.id,
-                    "byline": op.author_str,
-                    "type": op.type,
-                    "opinion": prep_text,
-                    "order": i,
-                }
-            )
-
-    return xml_path, cl_cleaned_opinions, start_position, combined_opinion
-
+            xml_path = str(op.local_path)
 
-def get_opinions_columbia_xml(xml_filepath: FieldFile, xml_dir: str) -> list:
-    """Convert xml data into dict
-    :param xml_filepath: path of xml file
-    :param xml_dir: absolute path to the directory with columbia xml files
-    :return: dict with data
-    """
-
-    SIMPLE_TAGS = [
-        "attorneys",
-        "caption",
-        "citation",
-        "court",
-        "date",
-        "docket",
-        "hearing_date",
-        "panel",
-        "posture",
-        "reporter_caption",
-    ]
-
-    data = {}  # type: dict
-
-    if "/home/mlissner/columbia/opinions/" in str(xml_filepath):
-        filepath = str(xml_filepath).replace(
-            "/home/mlissner/columbia/opinions/", ""
-        )
-        # fix file path temporarily
-        new_xml_filepath = os.path.join(xml_dir, filepath)
-    else:
-        logger.info(f"Can't fix xml file path: {xml_filepath}")
-        raise FileNotFoundError
-
-    with open(new_xml_filepath, "r", encoding="utf-8") as f:
-        file_content = f.read()
+        content = None
 
-        data["unpublished"] = False
+        # We can only use columbia's content to infer the ordering
+        if len(op.html_columbia) > 1:
+            content = op.html_columbia
 
-        if "<opinion unpublished=true>" in file_content:
-            file_content = file_content.replace(
-                "<opinion unpublished=true>", "<opinion>"
-            )
-            file_content = file_content.replace("<unpublished>", "").replace(
-                "</unpublished>", ""
+        if not content:
+            raise EmptyOpinionException(
+                "There is no content in html_columbia field"
             )
 
-            data["unpublished"] = True
+        soup = BeautifulSoup(content, features="html.parser")
+        opinion_text = soup.getText(separator=" ", strip=True)
+        prep_text = re.sub(
+            " +", " ", " ".join(opinion_text.split("\n"))
+        ).strip()
+        prep_text = re.sub(r"[^a-zA-Z0-9 ]", "", prep_text.lower())
+
+        cl_cleaned_opinions.append(
+            {
+                "id": op.id,
+                "byline": op.author_str,
+                "type": op.type,
+                "opinion": prep_text,
+                "order": i,
+            }
+        )
 
-    # Sometimes opening and ending tag mismatch (e.g. c6b39dcb29c9c.xml)
-    file_content = file_content.replace(
-        "</footnote_body></block_quote>", "</block_quote></footnote_body>"
+    return (
+        xml_path,
+        cl_cleaned_opinions,
+        start_position,
+        cluster_has_combined_opinion,
     )
 
-    soup = BeautifulSoup(file_content, "lxml")
-
-    # Find the outer <opinion> tag to have all elements inside
-    find_opinion = soup.find("opinion")
-
-    step_one_opinions = []  # type: list
-    opinions = []  # type: list
-    order = 0
-
-    if find_opinion:
-        untagged_content = []
 
-        # We iterate all content, with and without tags
-        # STEP 1: Extract all content in multiple dict elements
-        for i, content in enumerate(find_opinion):  # type: int, Tag
-            if type(content) == NavigableString:
-                # We found a raw string, store it
-                untagged_content.append(str(content))
+def fix_filepath(filepath: str) -> str:
+    """Fix filepath from file field
 
-            else:
-                if content.name in SIMPLE_TAGS + [
-                    "citation_line",
-                    "opinion_byline",
-                    "dissent_byline",
-                    "concurrence_byline",
-                ]:
-                    # Ignore these tags, it will be processed later
-                    continue
-                elif content.name in [
-                    "opinion_text",
-                    "dissent_text",
-                    "concurrence_text",
-                ]:
-                    if untagged_content:
-                        # We found something other than a navigable string that is
-                        # not an opinion, but now we have found an opinion,
-                        # let's create this content first
-
-                        # default type
-                        op_type = "opinion"
-                        if step_one_opinions:
-                            if step_one_opinions[-1].get("type"):
-                                # use type of previous opinion if exists
-                                op_type = step_one_opinions[-1].get("type")
-
-                        # Get rid of double spaces
-                        opinion_content = re.sub(
-                            " +", " ", "\n".join(untagged_content)
-                        ).strip()  # type: str
-                        if opinion_content:
-                            step_one_opinions.append(
-                                {
-                                    "opinion": opinion_content,
-                                    "order": order,
-                                    "byline": "",
-                                    "type": op_type,
-                                }
-                            )
-                            order = order + 1
-                        untagged_content = []
-
-                    byline = content.find_previous_sibling()
-                    opinion_author = ""
-                    if byline and "_byline" in byline.name:
-                        opinion_author = byline.get_text()
-
-                    opinion_content = re.sub(
-                        " +", " ", content.decode_contents()
-                    ).strip()
-                    if opinion_content:
-                        step_one_opinions.append(
-                            {
-                                "opinion": opinion_content,
-                                "order": order,
-                                "byline": opinion_author,
-                                "type": content.name.replace("_text", ""),
-                            }
-                        )
-                        order = order + 1
-
-                else:
-                    # Content not inside _text tag, we store it
-                    untagged_content.append(str(content))
-
-        if untagged_content:
-            # default type
-            op_type = "opinion"
-            if step_one_opinions:
-                if step_one_opinions[-1].get("type"):
-                    # use type of previous opinion if exists
-                    op_type = step_one_opinions[-1].get("type")
-
-            opinion_content = re.sub(
-                " +", " ", "\n".join(untagged_content)
-            ).strip()
-            if opinion_content:
-                step_one_opinions.append(
-                    {
-                        "opinion": opinion_content,
-                        "order": order,
-                        "byline": "",
-                        "type": op_type,
-                    }
-                )
+    :param filepath: path from file field
+    :return: new file path
+    """
+    if "/home/mlissner/columbia/opinions/" in filepath:
+        filepath = filepath.replace("/home/mlissner/columbia/opinions/", "")
+    return filepath
 
-        # Step 2: Merge found content in the xml file
-        new_order = 0
-        authorless_content = []
-
-        for i, found_content in enumerate(step_one_opinions, start=1):
-            byline = found_content.get("byline")
-            if not byline:
-                # Opinion has no byline, store it
-                authorless_content.append(found_content)
-
-            if byline:
-                # Opinion has byline
-                opinion_type = found_content.get("type")
-                opinion_content = found_content.get("opinion", "")
-                # Store content that doesn't match the current type
-                alternative_authorless_content = [
-                    z
-                    for z in authorless_content
-                    if z.get("type") != opinion_type
-                ]
-                # Keep content that matches the current type
-                authorless_content = [
-                    z
-                    for z in authorless_content
-                    if z.get("type") == opinion_type
-                ]
-
-                if alternative_authorless_content:
-                    # Keep floating text that are not from the same type,
-                    # we need to create a separate opinion for those,
-                    # for example: in 2713f39c5a8e8684.xml we have an opinion
-                    # without an author, and the next opinion with an author is
-                    # a dissent opinion, we can't combine both
-
-                    # We check if the previous stored opinion matches the type of the
-                    # content
-                    relevant_opinions = (
-                        [opinions[-1]]
-                        if opinions
-                        and opinions[-1]["type"]
-                        == alternative_authorless_content[0].get("type")
-                        else []
-                    )
 
-                    if relevant_opinions:
-                        previous_opinion = relevant_opinions[-1]
-                        if previous_opinion.get(
-                            "type"
-                        ) == alternative_authorless_content[0].get("type"):
-                            # Merge last opinion with previous opinion, it probably
-                            # belongs the same author
-                            relevant_opinions[-1][
-                                "opinion"
-                            ] += "\n" + "\n".join(
-                                [
-                                    f.get("opinion")
-                                    for f in alternative_authorless_content
-                                    if f.get("opinion")
-                                ]
-                            )
-                        authorless_content = []
+def get_opinions_columbia_file(xml_filepath: str) -> list:
+    """Get opinions from columbia xml file and convert it into dict
 
-                    else:
-                        # No relevant opinions found, create a new opinion
-                        new_opinion = {
-                            "byline": None,
-                            "type": alternative_authorless_content[0].get(
-                                "type"
-                            ),
-                            "opinion": "\n".join(
-                                [
-                                    f.get("opinion")
-                                    for f in alternative_authorless_content
-                                    if f.get("opinion")
-                                ]
-                            ),
-                            "order": new_order,
-                        }
-                        new_order = new_order + 1
-                        opinions.append(new_opinion)
-
-                # Add new opinion
-                new_opinion = {
-                    "byline": byline,
-                    "type": opinion_type,
-                    "opinion": "\n".join(
-                        [
-                            f.get("opinion")
-                            for f in authorless_content
-                            if f.get("type") == opinion_type
-                        ]
-                    )
-                    + "\n\n"
-                    + opinion_content,
-                    "order": new_order,
-                }
-
-                opinions.append(new_opinion)
-                new_order = new_order + 1
-                authorless_content = []
-
-            if len(step_one_opinions) == i and authorless_content:
-                # If is the last opinion, and we still have opinions without
-                # byline, create an opinion without an author and the contents
-                # that couldn't be merged
-
-                # We check if the previous stored opinion matches the type of the
-                # content
-                relevant_opinions = (
-                    [opinions[-1]]
-                    if opinions
-                    and opinions[-1]["type"]
-                    == authorless_content[0].get("type")
-                    else []
-                )
+    :param xml_filepath: path of xml file
+    :return: dict with data
+    """
+    soup = read_xml_to_soup(xml_filepath)
 
-                if relevant_opinions:
-                    previous_opinion = relevant_opinions[-1]
-                    if previous_opinion.get("type") == authorless_content[
-                        0
-                    ].get("type"):
-                        # Merge last opinion with previous opinion, it probably
-                        # belongs the same author
-                        relevant_opinions[-1]["opinion"] += "\n" + "\n".join(
-                            [
-                                f.get("opinion")
-                                for f in authorless_content
-                                if f.get("opinion")
-                            ]
-                        )
+    # Find the outer <opinion> tag to have all elements inside
+    outer_opinion = soup.find("opinion")
 
-                else:
-                    # Create last floating opinion
-                    new_opinion = {
-                        "byline": None,
-                        "type": authorless_content[0].get("type"),
-                        "opinion": "\n".join(
-                            [
-                                f.get("opinion")
-                                for f in authorless_content
-                                if f.get("opinion")
-                            ]
-                        ),
-                        "order": new_order,
-                    }
-                    opinions.append(new_opinion)
+    extracted_opinions = extract_columbia_opinions(outer_opinion)
+    opinions = process_extracted_opinions(extracted_opinions)
+    map_opinion_types(opinions)
 
     for op in opinions:
         opinion_content = op.get("opinion")
-        opinion_content = BeautifulSoup(
-            opinion_content, "html.parser"
-        ).getText()
-        opinion_content = re.sub(r"[^a-zA-Z0-9 ]", "", opinion_content.lower())
-        op["opinion"] = opinion_content
+        soup = BeautifulSoup(opinion_content, "html.parser")
+        opinion_text = soup.getText(separator=" ", strip=True)
+        opinion_text = re.sub(
+            " +", " ", " ".join(opinion_text.split("\n"))
+        ).strip()
+        cleaned_opinion = re.sub(r"[^a-zA-Z0-9 ]", "", opinion_text.lower())
+        op["opinion"] = cleaned_opinion
 
     return opinions
 
 
-def run_harvard(start_id: int, end_id: int):
-    """
-    We assume that harvard data is already ordered, we just need to fill the order
+def sort_harvard_opinions(start_id: int, end_id: int) -> None:
+    """We assume that harvard data is already ordered, we just need to fill the order
     field in each opinion
+
     :param start_id: skip any id lower than this value
     :param end_id: skip any id greater than this value
+    :return: None
     """
 
     # Get all harvard clusters with more than one opinion
     clusters = (
         OpinionCluster.objects.prefetch_related("sub_opinions")
         .annotate(opinions_count=Count("sub_opinions"))
-        .filter(opinions_count__gt=1, source="U")
+        .filter(opinions_count__gt=1, source__in=VALID_HARVARD_SOURCES)
         .order_by("id")
     )
 
@@ -478,18 +545,19 @@ def run_harvard(start_id: int, end_id: int):
         logger.info(msg=f"Opinions reordered for cluster id: {oc.id}")
 
 
-def run_columbia(start_id: int, end_id: int, xml_dir: str):
-    """
-    Update opinion order for columbia clusters
+def sort_columbia_opinions(start_id: int, end_id: int, xml_dir: str) -> None:
+    """Update opinion ordering for columbia clusters
+
     :param start_id: skip any id lower than this value
     :param end_id: skip any id greater than this value
     :param xml_dir: absolute path to the directory with columbia xml files
+    :return: None
     """
 
     # Get all columbia cluster ids with more than one opinion
     clusters = (
         OpinionCluster.objects.annotate(opinions_count=Count("sub_opinions"))
-        .filter(opinions_count__gt=1, source="Z")
+        .filter(opinions_count__gt=1, source__in=VALID_COLUMBIA_SOURCES)
         .order_by("id")
         .values_list("id", flat=True)
     )
@@ -502,37 +570,53 @@ def run_columbia(start_id: int, end_id: int, xml_dir: str):
 
     for cluster_id in clusters:
         logger.info(f"Processing cluster id: {cluster_id}")
-        (
-            xml_path,
-            cl_cleaned_opinions,
-            start_position,
-            combined_opinion,
-        ) = get_opinion_content(cluster_id)
-
-        columbia_opinions = None
+
+        try:
+            (
+                xml_path,
+                cl_cleaned_opinions,
+                start_position,
+                cluster_has_combined_opinion,
+            ) = get_opinions_cleaned_content(cluster_id)
+        except EmptyOpinionException:
+            logger.warning(
+                f"At least one of the opinions from cluster id: {cluster_id} is empty."
+            )
+            continue
+
+        extracted_columbia_opinions = None
         if xml_path:
-            try:
-                columbia_opinions = get_opinions_columbia_xml(
-                    xml_path, xml_dir
-                )
-            except FileNotFoundError:
+            fixed_xml_filepath = os.path.join(xml_dir, fix_filepath(xml_path))
+
+            if not os.path.exists(fixed_xml_filepath):
                 logger.warning(
-                    f"Xml file not found in {xml_path}, cluster id: {cluster_id}"
+                    f"Xml file not found in {fixed_xml_filepath}, cluster id: {cluster_id}"
+                )
+                continue
+
+            try:
+                extracted_columbia_opinions = get_opinions_columbia_file(
+                    fixed_xml_filepath
                 )
+            except UnicodeDecodeError:
+                logger.warning(f"Cannot decode file: {fixed_xml_filepath}")
                 continue
 
-        if cl_cleaned_opinions and columbia_opinions:
+        if cl_cleaned_opinions and extracted_columbia_opinions:
+            columbia_opinions_content = [
+                op.get("opinion")
+                for op in extracted_columbia_opinions
+                if op.get("opinion")
+            ]
+            cl_opinions_content = [
+                op.get("opinion")
+                for op in cl_cleaned_opinions
+                if op.get("opinion")
+            ]
+
             matches = match_text_lists(
-                [
-                    op.get("opinion")
-                    for op in columbia_opinions
-                    if op.get("opinion")
-                ],
-                [
-                    op.get("opinion")
-                    for op in cl_cleaned_opinions
-                    if op.get("opinion")
-                ],
+                columbia_opinions_content,
+                cl_opinions_content,
             )
 
             if matches:
@@ -557,7 +641,7 @@ def run_columbia(start_id: int, end_id: int, xml_dir: str):
                 failed = False
                 for file_pos, cl_pos in matches.items():
                     # file_pos is the correct index to find the opinion id to update
-                    file_opinion = columbia_opinions[file_pos]
+                    file_opinion = extracted_columbia_opinions[file_pos]
                     # the order was calculated using the xml file
                     file_order = file_opinion.get("order") + start_position
                     cl_opinion = cl_cleaned_opinions[cl_pos]
@@ -587,7 +671,7 @@ def run_columbia(start_id: int, end_id: int, xml_dir: str):
                         failed = True
                         break
 
-                if combined_opinion and not failed:
+                if cluster_has_combined_opinion and not failed:
                     combined_opinions_cluster = Opinion.objects.filter(
                         cluster_id=cluster_id, type="010combined"
                     ).order_by("id")
@@ -628,6 +712,7 @@ def add_arguments(self, parser):
 
         parser.add_argument(
             "--xml-dir",
+            default="/opt/courtlistener/_columbia",
             required=False,
             help="The absolute path to the directory with columbia xml files",
         )
@@ -653,11 +738,15 @@ def handle(self, *args, **options):
             )
             return
 
+        if not options["process_harvard"] and not options["process_columbia"]:
+            print("One option required: process-harvard or process-columbia")
+            return
+
         if options["process_harvard"]:
-            run_harvard(options["start_id"], options["end_id"])
+            sort_harvard_opinions(options["start_id"], options["end_id"])
 
         if options["process_columbia"] and options["xml_dir"]:
-            run_columbia(
+            sort_columbia_opinions(
                 options["start_id"], options["end_id"], options["xml_dir"]
             )
 

From f808b95b68487580b3d24be400afee91dcd4f938 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Thu, 30 Nov 2023 11:43:58 -0600
Subject: [PATCH 23/50] fix(opinions_order): code refactored

NOTE: functions found in columbia_utils.py and utils.py, were temporarily added in the command,when the necessary changes are combined we need to remove the functions and import them from the utils.
---
 .../commands/update_opinions_order.py         | 231 +++++++++++-------
 1 file changed, 147 insertions(+), 84 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index ae931ba4b7..5b86c98130 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -1,12 +1,13 @@
 import os.path
 import re
-from typing import Any, List, Optional
+from typing import Any, Optional
 
 from bs4 import BeautifulSoup, NavigableString, Tag
 from django.core.management import BaseCommand
+from django.db import transaction
 from django.db.models import Count
 
-from cl.corpus_importer.utils import similarity_scores
+from cl.corpus_importer.utils import compare_documents, similarity_scores
 from cl.lib.command_utils import logger
 from cl.lib.string_diff import get_cosine_similarity
 from cl.search.models import SOURCES, Opinion, OpinionCluster
@@ -24,7 +25,6 @@
 
 # TODO remove the funcitions below and import them from utils.py and columbia_utils.py when those changes get merged
 
-
 SIMPLE_TAGS = [
     "attorneys",
     "caption",
@@ -347,47 +347,86 @@ def map_opinion_types(opinions=None) -> None:
             op["type"] = "030concurrence"
 
 
-# TODO ------------------------ remove until here -------------------------------
-
-
-def match_text_lists(
-    file_opinions_list: List[Any], cl_opinions_list: List[Any]
+def match_opinion_lists(
+    file_opinions_list: list[Any], cl_opinions_list: list[Any]
 ) -> dict[int, int]:
-    """Generate matching lists above threshold
+    """Try to match the opinions on two lists and generate a dict with position of
+    matching opinions
+
+    Remove non-alphanumeric and non-whitespace characters from lowercased text,
+    this tries to make both texts in equal conditions to prove if both are similar or
+    equal
+
+    get_cosine_similarity works great when both texts are almost the same with very
+    small variations
+
+    Sometimes cosine similarity fails when there are small variations in text,
+    such as parties, attorneys, case name, or court that are included in the content
+    of the opinion, compare_documents() checks the percentage of the file opinion
+    text that it is in courtlistener opinion, having a large percentage means that
+    almost all the file opinion is in courtlistener opinion, but there is a
+    possibility that the courtlistener opinion contains some additional data in que
+    opinion content (such as case name, parties, etc.)
+
+    compare_documents works good when the opinion from the file is a subset of the
+    opinion in CL, the percentage represents how much of the opinion of the file is
+    in the opinion from cl (content in cl opinion can have other data in the body
+    like posture, attorneys, etc. e.g. in cluster id: 7643871 we have the posture and
+    the opinion text but in the xml file we only have the opinion text, cosine_sim:
+    0.1639075094124459 and percent_match: 73)
+
+    Sometimes one algorithm performs better than the other, this is due to some
+    additional text, such as editor's notes, or the author, page number or posture
+    added to the opinion
+
+    Key is opinion position from file, Value is opinion position from cl opinion e.g.
+    matches {0: 1, 1: 2} 0 is file opinion and 1 in cl opinion, 1 is file opinion and
+    2 is cl opinion
 
     :param file_opinions_list: Opinions from file
     :param cl_opinions_list: CL opinions
     :return: Matches if found or empty dict
     """
-    # We import this here to avoid a circular import
-    from cl.corpus_importer.management.commands.harvard_opinions import (
-        compare_documents,
-    )
 
     scores = similarity_scores(file_opinions_list, cl_opinions_list)
 
     matches = {}
     for i, row in enumerate(scores):
         j = row.argmax()  # type: ignore
-        # Lower threshold for small opinions.
-        if (
-            get_cosine_similarity(file_opinions_list[i], cl_opinions_list[j])
-            < 0.60
-        ):
-            continue
-        percent_match = compare_documents(
-            file_opinions_list[i], cl_opinions_list[j]
+        file_opinion = re.sub(
+            r"[^a-zA-Z0-9 ]", "", file_opinions_list[i].lower()
         )
-        if percent_match < 60:
+        cl_opinion = re.sub(r"[^a-zA-Z0-9 ]", "", cl_opinions_list[j].lower())
+
+        cosine_sim = get_cosine_similarity(file_opinion, cl_opinion)
+
+        percent_match = compare_documents(file_opinion, cl_opinion)
+
+        if cosine_sim < 0.60 and percent_match < 60:
             continue
+
         matches[i] = j
 
-    # Key is opinion position from file, Value is opinion position from cl opinion
-    # e.g. matches {0: 1, 1: 2} 0 is file opinion and 1 in cl opinion, 1 is file
-    # opinion and 2 is cl opinion
     return matches
 
 
+def clean_opinion_content(text: str) -> str:
+    """Clean opinion content
+
+    :param text: text to clean
+    :return: cleaned text
+    """
+
+    # Replace line breaks with spaces and get rid of double spaces
+    text = re.sub(" +", " ", " ".join(text.split("\n"))).strip()
+
+    # Remove non-alphanumeric and non-whitespace characters from lowercased text
+    return re.sub(r"[^a-zA-Z0-9 ]", "", text.lower())
+
+
+# TODO ------------------------ remove until here -------------------------------
+
+
 def get_opinions_cleaned_content(
     cluster_id,
 ) -> tuple[Optional[str], list[dict], int, bool]:
@@ -432,10 +471,7 @@ def get_opinions_cleaned_content(
 
         soup = BeautifulSoup(content, features="html.parser")
         opinion_text = soup.getText(separator=" ", strip=True)
-        prep_text = re.sub(
-            " +", " ", " ".join(opinion_text.split("\n"))
-        ).strip()
-        prep_text = re.sub(r"[^a-zA-Z0-9 ]", "", prep_text.lower())
+        prep_text = clean_opinion_content(opinion_text)
 
         cl_cleaned_opinions.append(
             {
@@ -485,10 +521,7 @@ def get_opinions_columbia_file(xml_filepath: str) -> list:
         opinion_content = op.get("opinion")
         soup = BeautifulSoup(opinion_content, "html.parser")
         opinion_text = soup.getText(separator=" ", strip=True)
-        opinion_text = re.sub(
-            " +", " ", " ".join(opinion_text.split("\n"))
-        ).strip()
-        cleaned_opinion = re.sub(r"[^a-zA-Z0-9 ]", "", opinion_text.lower())
+        cleaned_opinion = clean_opinion_content(opinion_text)
         op["opinion"] = cleaned_opinion
 
     return opinions
@@ -545,6 +578,78 @@ def sort_harvard_opinions(start_id: int, end_id: int) -> None:
         logger.info(msg=f"Opinions reordered for cluster id: {oc.id}")
 
 
+def update_opinions(
+    cluster_id: int,
+    cl_opinions: list,
+    columbia_opinions: list,
+    matches: dict,
+    cluster_has_combined_opinion: bool,
+    start_position: int,
+):
+    """Update opinions with correct order
+
+    :param cluster_id:
+    :param cl_opinions: a list with cleaned opinions from cl
+    :param columbia_opinions: a ordered list with cleaned opinions from xml file
+    :param matches: a dict with the matches of each opinion of both lists
+    :param cluster_has_combined_opinion: True if the cluster has combined opinions
+    :param start_position: the number from where the order should begin for
+    non-combined opinions
+    :return: None
+    """
+    update_failed = False
+
+    with transaction.atomic():
+        for file_pos, cl_pos in matches.items():
+            # file_pos is the correct index to find the opinion id to update
+            file_opinion = columbia_opinions[file_pos]
+            # the order was calculated using the xml file
+            file_order = file_opinion.get("order") + start_position
+            cl_opinion = cl_opinions[cl_pos]
+            opinion_id_to_update = cl_opinion.get("id")
+
+            if opinion_id_to_update:
+                try:
+                    # Update opinion order
+                    op = Opinion.objects.get(id=opinion_id_to_update)
+                    op.order = file_order
+                    op.save()
+                except Opinion.DoesNotExist:
+                    # This should not happen, but it is better to be
+                    # cautious
+                    logger.warning(
+                        f"We can't update opinion, opinion doesn't exist "
+                        f"with id: {opinion_id_to_update}"
+                    )
+                    update_failed = True
+                    break
+
+        if cluster_has_combined_opinion and not update_failed:
+            combined_opinions_cluster = Opinion.objects.filter(
+                cluster_id=cluster_id, type="010combined"
+            ).order_by("id")
+
+            # Show combined opinions at beginning
+            for opinion_order, cluster_op in enumerate(
+                combined_opinions_cluster
+            ):
+                cluster_op.order = opinion_order
+                cluster_op.save()
+
+        if update_failed:
+            # There was an error updating an opinion, rollback all changes for
+            # cluster's opinions
+            logger.warning(
+                f"There was an error updating the order of opinions of the "
+                f"cluster id: {cluster_id}"
+            )
+            transaction.set_rollback(True)
+        else:
+            logger.info(
+                f"The order of opinions was updated, cluster id: {cluster_id}"
+            )
+
+
 def sort_columbia_opinions(start_id: int, end_id: int, xml_dir: str) -> None:
     """Update opinion ordering for columbia clusters
 
@@ -614,7 +719,7 @@ def sort_columbia_opinions(start_id: int, end_id: int, xml_dir: str) -> None:
                 if op.get("opinion")
             ]
 
-            matches = match_text_lists(
+            matches = match_opinion_lists(
                 columbia_opinions_content,
                 cl_opinions_content,
             )
@@ -638,57 +743,15 @@ def sort_columbia_opinions(start_id: int, end_id: int, xml_dir: str) -> None:
                     # Go to next cluster id
                     continue
 
-                failed = False
-                for file_pos, cl_pos in matches.items():
-                    # file_pos is the correct index to find the opinion id to update
-                    file_opinion = extracted_columbia_opinions[file_pos]
-                    # the order was calculated using the xml file
-                    file_order = file_opinion.get("order") + start_position
-                    cl_opinion = cl_cleaned_opinions[cl_pos]
-                    opinion_id_to_update = cl_opinion.get("id")
-
-                    if opinion_id_to_update:
-                        try:
-                            # Save opinion
-                            op = Opinion.objects.get(id=opinion_id_to_update)
-                            op.order = file_order
-                            op.save()
-                            logger.info(
-                                f"Cluster id processed: {cluster_id} Update opinion id: {opinion_id_to_update} with position: {file_order}"
-                            )
-                        except Opinion.DoesNotExist:
-                            logger.warning(
-                                f"We can't update opinion, opinion doesn't exist with "
-                                f"id: {opinion_id_to_update}"
-                            )
-                            failed = True
-                            break
-                    else:
-                        logger.warning(
-                            f"We can't update opinion, empty opinion id "
-                            f"from cluster: {cluster_id}"
-                        )
-                        failed = True
-                        break
-
-                if cluster_has_combined_opinion and not failed:
-                    combined_opinions_cluster = Opinion.objects.filter(
-                        cluster_id=cluster_id, type="010combined"
-                    ).order_by("id")
-
-                    # Show combined opinions at beginning
-                    for opinion_order, cluster_op in enumerate(
-                        combined_opinions_cluster
-                    ):
-                        cluster_op.order = opinion_order
-                        cluster_op.save()
-
-            else:
-                # No matches found
-                logger.warning(
-                    f"Failed to match opinions from cluster id: {cluster_id}"
+                # Update all opinions order
+                update_opinions(
+                    cluster_id,
+                    cl_cleaned_opinions,
+                    extracted_columbia_opinions,
+                    matches,
+                    cluster_has_combined_opinion,
+                    start_position,
                 )
-                continue
 
 
 class Command(BaseCommand):

From f928aa021fe9de812f9e82b64a044582b5ffda78 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Mon, 19 Feb 2024 18:13:46 -0600
Subject: [PATCH 24/50] fix(opinion_order): update poetry.lock and
 pyproject.toml

---
 poetry.lock    | 13 ++++++++++++-
 pyproject.toml |  1 +
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/poetry.lock b/poetry.lock
index 24a1c45791..25db969843 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1062,6 +1062,17 @@ files = [
 [package.dependencies]
 Django = ">=3.2"
 
+[[package]]
+name = "django-ordered-model"
+version = "3.7.4"
+description = "Allows Django models to be ordered and provides a simple admin interface for reordering them."
+optional = false
+python-versions = "*"
+files = [
+    {file = "django-ordered-model-3.7.4.tar.gz", hash = "sha256:f258b9762525c00a53009e82f8b8bf2a3aa315e8b453e281e8fdbbfe2b8cb3ba"},
+    {file = "django_ordered_model-3.7.4-py3-none-any.whl", hash = "sha256:dfcd3183fe0749dad1c9971cba1d6240ce7328742a30ddc92feca41107bb241d"},
+]
+
 [[package]]
 name = "django-override-storage"
 version = "0.3.2"
@@ -5105,4 +5116,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.12, <3.13"
-content-hash = "d0cb9ebf26ba111318df8c00976f71ad6b18ffc1aafab1df3b506bfe5128611d"
+content-hash = "a8dfd3edc2209cb2d357696b751508ebd0c249be0b1b408f2f7225884a5e7b2a"
diff --git a/pyproject.toml b/pyproject.toml
index 32afda8f5f..e8d88a61f8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -112,6 +112,7 @@ httpx = {extras = ["http2"], version = "^0.26.0"}
 django-model-utils = "^4.3.1"
 juriscraper = "*"
 django-permissions-policy = "^4.19.0"
+django-ordered-model = "^3.7.4"
 
 
 [tool.poetry.group.dev.dependencies]

From d46b42fd39b6abacf301ae3ce46ed090d5cb5446 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Mon, 19 Feb 2024 18:21:48 -0600
Subject: [PATCH 25/50] fix(opinion_order): rename migrations

---
 .../{0024_order_opinions.py => 0027_order_opinions.py}          | 2 +-
 .../{0024_order_opinions.sql => 0027_order_opinions.sql}        | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename cl/search/migrations/{0024_order_opinions.py => 0027_order_opinions.py} (98%)
 rename cl/search/migrations/{0024_order_opinions.sql => 0027_order_opinions.sql} (100%)

diff --git a/cl/search/migrations/0024_order_opinions.py b/cl/search/migrations/0027_order_opinions.py
similarity index 98%
rename from cl/search/migrations/0024_order_opinions.py
rename to cl/search/migrations/0027_order_opinions.py
index 1abaed4d76..e1c602e2e5 100644
--- a/cl/search/migrations/0024_order_opinions.py
+++ b/cl/search/migrations/0027_order_opinions.py
@@ -7,7 +7,7 @@
 
 class Migration(migrations.Migration):
     dependencies = [
-        ("search", "0023_add_docket_sources_noop"),
+        ("search", "0026_drop_docket_unique_together_and_more"),
     ]
 
     operations = [
diff --git a/cl/search/migrations/0024_order_opinions.sql b/cl/search/migrations/0027_order_opinions.sql
similarity index 100%
rename from cl/search/migrations/0024_order_opinions.sql
rename to cl/search/migrations/0027_order_opinions.sql

From cefb8482ed586e65526f59818901eca56ca26e7d Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Mon, 6 May 2024 18:12:54 -0600
Subject: [PATCH 26/50] feat(opinion_order): resolve merge conflict, rename
 migrations

---
 ..._order_opinions.py => 0031_order_opinions.py} |  2 +-
 ...rder_opinions.sql => 0031_order_opinions.sql} |  0
 poetry.lock                                      | 16 +++++++++++++---
 3 files changed, 14 insertions(+), 4 deletions(-)
 rename cl/search/migrations/{0027_order_opinions.py => 0031_order_opinions.py} (98%)
 rename cl/search/migrations/{0027_order_opinions.sql => 0031_order_opinions.sql} (100%)

diff --git a/cl/search/migrations/0027_order_opinions.py b/cl/search/migrations/0031_order_opinions.py
similarity index 98%
rename from cl/search/migrations/0027_order_opinions.py
rename to cl/search/migrations/0031_order_opinions.py
index e1c602e2e5..9e7774203d 100644
--- a/cl/search/migrations/0027_order_opinions.py
+++ b/cl/search/migrations/0031_order_opinions.py
@@ -7,7 +7,7 @@
 
 class Migration(migrations.Migration):
     dependencies = [
-        ("search", "0026_drop_docket_unique_together_and_more"),
+        ("search", "0030_recapdocument_pacer_doc_id_idx"),
     ]
 
     operations = [
diff --git a/cl/search/migrations/0027_order_opinions.sql b/cl/search/migrations/0031_order_opinions.sql
similarity index 100%
rename from cl/search/migrations/0027_order_opinions.sql
rename to cl/search/migrations/0031_order_opinions.sql
diff --git a/poetry.lock b/poetry.lock
index f22583b490..109cadc2d3 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
 
 [[package]]
 name = "amqp"
@@ -1062,6 +1062,17 @@ files = [
 [package.dependencies]
 Django = ">=3.2"
 
+[[package]]
+name = "django-ordered-model"
+version = "3.7.4"
+description = "Allows Django models to be ordered and provides a simple admin interface for reordering them."
+optional = false
+python-versions = "*"
+files = [
+    {file = "django-ordered-model-3.7.4.tar.gz", hash = "sha256:f258b9762525c00a53009e82f8b8bf2a3aa315e8b453e281e8fdbbfe2b8cb3ba"},
+    {file = "django_ordered_model-3.7.4-py3-none-any.whl", hash = "sha256:dfcd3183fe0749dad1c9971cba1d6240ce7328742a30ddc92feca41107bb241d"},
+]
+
 [[package]]
 name = "django-override-storage"
 version = "0.3.2"
@@ -2467,7 +2478,6 @@ files = [
     {file = "lxml-5.2.1-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:9e2addd2d1866fe112bc6f80117bcc6bc25191c5ed1bfbcf9f1386a884252ae8"},
     {file = "lxml-5.2.1-cp37-cp37m-win32.whl", hash = "sha256:f51969bac61441fd31f028d7b3b45962f3ecebf691a510495e5d2cd8c8092dbd"},
     {file = "lxml-5.2.1-cp37-cp37m-win_amd64.whl", hash = "sha256:b0b58fbfa1bf7367dde8a557994e3b1637294be6cf2169810375caf8571a085c"},
-    {file = "lxml-5.2.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:3e183c6e3298a2ed5af9d7a356ea823bccaab4ec2349dc9ed83999fd289d14d5"},
     {file = "lxml-5.2.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:804f74efe22b6a227306dd890eecc4f8c59ff25ca35f1f14e7482bbce96ef10b"},
     {file = "lxml-5.2.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:08802f0c56ed150cc6885ae0788a321b73505d2263ee56dad84d200cab11c07a"},
     {file = "lxml-5.2.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f8c09ed18ecb4ebf23e02b8e7a22a05d6411911e6fabef3a36e4f371f4f2585"},
@@ -5259,4 +5269,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.12, <3.13"
-content-hash = "994213014ffbb4387604c85fddd76e01112f4e3b66a1be6bc77f601b5b1de1b8"
+content-hash = "c6a4dd1a9c6ecf961e254a3d6d0387f4d5e6f6fdb4181c33e2c55174e68d4454"

From d1a1708f363764056e4c6f9e0159e460675ad3da Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 5 Jun 2024 12:58:05 -0600
Subject: [PATCH 27/50] fix(opinion_order): update poetry.lock to solve merge
 conflicts

---
 poetry.lock | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/poetry.lock b/poetry.lock
index 6d7f85852a..cbc5ec2cc3 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1058,6 +1058,17 @@ files = [
 [package.dependencies]
 Django = ">=3.2"
 
+[[package]]
+name = "django-ordered-model"
+version = "3.7.4"
+description = "Allows Django models to be ordered and provides a simple admin interface for reordering them."
+optional = false
+python-versions = "*"
+files = [
+    {file = "django-ordered-model-3.7.4.tar.gz", hash = "sha256:f258b9762525c00a53009e82f8b8bf2a3aa315e8b453e281e8fdbbfe2b8cb3ba"},
+    {file = "django_ordered_model-3.7.4-py3-none-any.whl", hash = "sha256:dfcd3183fe0749dad1c9971cba1d6240ce7328742a30ddc92feca41107bb241d"},
+]
+
 [[package]]
 name = "django-override-storage"
 version = "0.3.2"
@@ -5254,4 +5265,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.12, <3.13"
-content-hash = "814ca0b0dc8db689f83e391fc58b494de48f6321085872bfaa8e37b7a7fc0e99"
+content-hash = "a64d61d094d3896cb204e882ff2471b4f3b69def7416a2b50cdcedc9acf6455e"

From 754d71fda6d7a12d4dfc6dddf121399d6d0582c9 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 5 Jun 2024 13:06:55 -0600
Subject: [PATCH 28/50] fix(opinion_order): rename migration

---
 .../{0031_order_opinions.py => 0032_order_opinions.py}          | 2 +-
 .../{0031_order_opinions.sql => 0032_order_opinions.sql}        | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename cl/search/migrations/{0031_order_opinions.py => 0032_order_opinions.py} (98%)
 rename cl/search/migrations/{0031_order_opinions.sql => 0032_order_opinions.sql} (100%)

diff --git a/cl/search/migrations/0031_order_opinions.py b/cl/search/migrations/0032_order_opinions.py
similarity index 98%
rename from cl/search/migrations/0031_order_opinions.py
rename to cl/search/migrations/0032_order_opinions.py
index 9e7774203d..b34bb01d48 100644
--- a/cl/search/migrations/0031_order_opinions.py
+++ b/cl/search/migrations/0032_order_opinions.py
@@ -7,7 +7,7 @@
 
 class Migration(migrations.Migration):
     dependencies = [
-        ("search", "0030_recapdocument_pacer_doc_id_idx"),
+        ("search", "0031_alter_opinion_type_alter_opinioncluster_source_noop"),
     ]
 
     operations = [
diff --git a/cl/search/migrations/0031_order_opinions.sql b/cl/search/migrations/0032_order_opinions.sql
similarity index 100%
rename from cl/search/migrations/0031_order_opinions.sql
rename to cl/search/migrations/0032_order_opinions.sql

From d7132ec90bf778ae9f28855b31724b4a21bee33e Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Fri, 19 Jul 2024 10:57:20 -0600
Subject: [PATCH 29/50] fix(opinion_order): update poetry.lock

---
 poetry.lock | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/poetry.lock b/poetry.lock
index f7f6c67e40..4d48c0c2ed 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1116,6 +1116,17 @@ files = [
 [package.dependencies]
 Django = ">=3.2"
 
+[[package]]
+name = "django-ordered-model"
+version = "3.7.4"
+description = "Allows Django models to be ordered and provides a simple admin interface for reordering them."
+optional = false
+python-versions = "*"
+files = [
+    {file = "django-ordered-model-3.7.4.tar.gz", hash = "sha256:f258b9762525c00a53009e82f8b8bf2a3aa315e8b453e281e8fdbbfe2b8cb3ba"},
+    {file = "django_ordered_model-3.7.4-py3-none-any.whl", hash = "sha256:dfcd3183fe0749dad1c9971cba1d6240ce7328742a30ddc92feca41107bb241d"},
+]
+
 [[package]]
 name = "django-override-storage"
 version = "0.3.2"
@@ -5461,4 +5472,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.12, <3.13"
-content-hash = "e6d34875888f1687912d03d33ea68038bba6c6d487037c6454d5b18449ec6d0c"
+content-hash = "5334f16d006f7486a5f9b905906f2a9a68e7f524684c04af3d0994ebd0999384"

From 2013633d8c5b87d6fc7dfc4dfc4c701ad0fb18c2 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Fri, 19 Jul 2024 12:22:47 -0600
Subject: [PATCH 30/50] refactor(update_opinions_order): refactor code

---
 .../commands/update_opinions_order.py         | 425 +-----------------
 1 file changed, 20 insertions(+), 405 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index 5b86c98130..85ed93e0e2 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -1,15 +1,20 @@
 import os.path
 import re
-from typing import Any, Optional
+from typing import Optional
 
-from bs4 import BeautifulSoup, NavigableString, Tag
+from bs4 import BeautifulSoup
 from django.core.management import BaseCommand
 from django.db import transaction
 from django.db.models import Count
 
-from cl.corpus_importer.utils import compare_documents, similarity_scores
+from cl.corpus_importer.import_columbia.columbia_utils import (
+    extract_columbia_opinions,
+    map_opinion_types,
+    process_extracted_opinions,
+    read_xml_to_soup,
+)
+from cl.corpus_importer.utils import EmptyOpinionException, match_opinion_lists
 from cl.lib.command_utils import logger
-from cl.lib.string_diff import get_cosine_similarity
 from cl.search.models import SOURCES, Opinion, OpinionCluster
 
 VALID_COLUMBIA_SOURCES = [
@@ -23,393 +28,6 @@
 ]
 
 
-# TODO remove the funcitions below and import them from utils.py and columbia_utils.py when those changes get merged
-
-SIMPLE_TAGS = [
-    "attorneys",
-    "caption",
-    "citation",
-    "court",
-    "date",
-    "docket",
-    "hearing_date",
-    "panel",
-    "posture",
-    "reporter_caption",
-]
-
-
-class EmptyOpinionException(Exception):
-    """An exception for opinions that raise a ZeroDivisionError Exception due empty
-    opinion tag or empty opinion content in cl"""
-
-    def __init__(self, message: str) -> None:
-        self.message = message
-
-
-def read_xml_to_soup(filepath: str) -> BeautifulSoup:
-    """This function reads the xml file, fixes the bad tags in columbia xml
-    files and returns a BeautifulSoup object
-
-    :param filepath: path to xml file
-    :return: BeautifulSoup object of parsed content
-    """
-    with open(filepath, "r", encoding="utf-8") as f:
-        file_content = f.read()
-        # Sometimes opening and ending tag mismatch (e.g. ed7c6b39dcb29c9c.xml)
-        file_content = file_content.replace(
-            "</footnote_body></block_quote>", "</block_quote></footnote_body>"
-        )
-        # Fix opinion with invalid attribute
-        if "<opinion unpublished=true>" in file_content:
-            file_content = file_content.replace(
-                "<opinion unpublished=true>", "<opinion unpublished='true'>"
-            )
-            file_content = file_content.replace("<unpublished>", "").replace(
-                "</unpublished>", ""
-            )
-    return BeautifulSoup(file_content, "lxml")
-
-
-def add_floating_opinion(
-    opinions: list, floating_content: list, opinion_order: int
-) -> list:
-    """We have found floating opinions in bs object, we keep the opinion
-    content as a new opinion
-
-    :param opinions: a list with opinions found
-    :param floating_content: content that is not in known non-opinion tags
-    :param opinion_order: opinion position
-    :return: updated list of opinions
-    """
-    op_type = "opinion"
-    if opinions:
-        if opinions[-1].get("type"):
-            # Use type of previous opinion if exists
-            op_type = opinions[-1].get("type")
-
-    # Get rid of double spaces from floating content
-    opinion_content = re.sub(
-        " +", " ", "\n".join(floating_content)
-    ).strip()  # type: str
-    if opinion_content:
-        opinions.append(
-            {
-                "opinion": opinion_content,
-                "order": opinion_order,
-                "byline": "",
-                "type": op_type,
-            }
-        )
-    return opinions
-
-
-def extract_columbia_opinions(
-    outer_opinion: BeautifulSoup,
-) -> list[Optional[dict]]:
-    """We extract all possible opinions from BeautifulSoup, with and without
-    author, and we create new opinions if floating content exists(content that
-    is not explicitly defined within an opinion tag or doesn't have an author)
-
-    :param outer_opinion: element containing all xml tags
-    :return: list of opinion dicts
-    """
-    opinions: list = []
-    floating_content = []
-    order = 0
-
-    # We iterate all content to look for all possible opinions
-    for i, content in enumerate(outer_opinion):  # type: int, Tag
-        if isinstance(content, NavigableString):
-            # We found a raw string, store it
-            floating_content.append(str(content))
-        else:
-            if content.name in SIMPLE_TAGS + [
-                "citation_line",
-                "opinion_byline",
-                "dissent_byline",
-                "concurrence_byline",
-            ]:
-                # Ignore these tags, it will be processed later
-                continue
-            elif content.name in [
-                "opinion_text",
-                "dissent_text",
-                "concurrence_text",
-            ]:
-                if floating_content:
-                    # We have found an opinion, but there is floating
-                    # content, we create a dict with the opinion using the
-                    # floating content with default type = "opinion"
-                    opinions = add_floating_opinion(
-                        opinions, floating_content, order
-                    )
-                    floating_content = []
-
-                byline = content.find_previous_sibling()
-                opinion_author = ""
-                if byline and "_byline" in byline.name:
-                    opinion_author = byline.get_text()
-
-                opinion_content = re.sub(
-                    " +", " ", content.decode_contents()
-                ).strip()
-                if opinion_content:
-                    # Now we create a dict with current opinion
-                    opinions.append(
-                        {
-                            "opinion": opinion_content,
-                            "order": order,
-                            "byline": opinion_author,
-                            "type": content.name.replace("_text", ""),
-                        }
-                    )
-                    order = order + 1
-
-            else:
-                if content.name not in SIMPLE_TAGS + ["syllabus"]:
-                    # We store content that is not inside _text tag and is
-                    # not in one of the known non-opinion tags
-                    floating_content.append(str(content))
-
-    # Combine the new content into another opinion. great.
-    if floating_content:
-        # If we end to go through all the found opinions and if we still
-        # have floating content out there, we create a new opinion with the
-        # last type of opinion
-        opinions = add_floating_opinion(opinions, floating_content, order)
-    return opinions
-
-
-def is_per_curiam_opinion(
-    content: Optional[str], byline: Optional[str]
-) -> bool:
-    """Check if opinion author is per curiam
-    :param content: opinion content
-    :param byline: opinion text author
-    :return: True if opinion author is per curiam
-    """
-    if byline and "per curiam" in byline[:1000].lower():
-        return True
-    if content and "per curiam" in content[:1000].lower():
-        return True
-    return False
-
-
-def merge_opinions(
-    opinions: list, content: list, current_order: int
-) -> tuple[list, int]:
-    """Merge last and previous opinion if are the same type or create a new
-    opinion if merge is not possible
-
-    :param opinions: list of opinions that is being updated constantly
-    :param content: list of opinions without an author
-    :param current_order: opinion position
-    :return: updated list of opinions
-    """
-
-    # We check if the previous stored opinion matches the type of the
-    # content, and we store the opinion dict temporary
-    relevant_opinions = (
-        [opinions[-1]]
-        if opinions and opinions[-1]["type"] == content[0].get("type")
-        else []
-    )
-
-    if relevant_opinions:
-        relevant_opinions[-1]["opinion"] += "\n" + "\n".join(
-            [f.get("opinion") for f in content if f.get("opinion")]
-        )
-
-    else:
-        # No relevant opinions found, create a new opinion with the content
-        opinion_content = "\n".join(
-            [f.get("opinion") for f in content if f.get("opinion")]
-        )
-        new_opinion = {
-            "byline": None,
-            "type": content[0].get("type"),
-            "opinion": opinion_content,
-            "order": current_order,
-            "per_curiam": is_per_curiam_opinion(opinion_content, None),
-        }
-        opinions.append(new_opinion)
-        current_order = current_order + 1
-
-    return opinions, current_order
-
-
-def process_extracted_opinions(extracted_opinions: list) -> list:
-    """We read the extracted data in extract_opinions function to merge all
-    possible floating opinions (it is not explicitly defined within an opinion
-    tag or doesn't have an author)
-
-    :param extracted_opinions: list of opinions obtained from xml file
-    :return: a list with extracted and processed opinions
-    """
-
-    opinions: list = []
-    authorless_content = []
-    order = 0
-
-    for i, found_content in enumerate(extracted_opinions, start=1):
-        byline = found_content.get("byline")
-        if not byline:
-            # Opinion has no byline, store opinion content
-            authorless_content.append(found_content)
-
-        if byline:
-            # Opinion has byline, get opinion type and content
-            opinion_type = found_content.get("type")
-            opinion_content = found_content.get("opinion", "")
-            # Store content that doesn't match the current opinion type
-            alternative_authorless_content = [
-                content
-                for content in authorless_content
-                if content.get("type") != opinion_type
-            ]
-            # Keep content that matches the current type
-            authorless_content = [
-                op_content
-                for op_content in authorless_content
-                if op_content.get("type") == opinion_type
-            ]
-
-            if alternative_authorless_content:
-                # Keep floating text that are not from the same type,
-                # we need to create a separate opinion for those,
-                # for example: in 2713f39c5a8e8684.xml we have an opinion
-                # without an author, and the next opinion with an author is
-                # a dissent opinion, we can't combine both
-                opinions, order = merge_opinions(
-                    opinions, alternative_authorless_content, order
-                )
-
-            opinion_content = (
-                "\n".join(
-                    [
-                        f.get("opinion")
-                        for f in authorless_content
-                        if f.get("type") == opinion_type
-                    ]
-                )
-                + "\n\n"
-                + opinion_content
-            )
-
-            # Add new opinion
-            new_opinion = {
-                "byline": byline,
-                "type": opinion_type,
-                "opinion": opinion_content,
-                "order": order,
-                "per_curiam": is_per_curiam_opinion(opinion_content, byline),
-            }
-
-            opinions.append(new_opinion)
-            order = order + 1
-            authorless_content = []
-
-        if len(extracted_opinions) == i and authorless_content:
-            # If is the last opinion, and we still have opinions without
-            # byline, create an opinion without an author and the contents
-            # that couldn't be merged
-            opinions, order = merge_opinions(
-                opinions, authorless_content, order
-            )
-
-    return opinions
-
-
-def map_opinion_types(opinions=None) -> None:
-    """Map opinion type to model field choice
-
-    :param opinions: a list that contains all opinions as dict elements
-    :return: None
-    """
-
-    if opinions is None:
-        opinions = []
-    lead = False
-    for op in opinions:
-        op_type = op.get("type")
-        # Only first opinion with "opinion" type is a lead opinion, the next
-        # opinion with "opinion" type is an addendum
-        if not lead and op_type and op_type == "opinion":
-            lead = True
-            op["type"] = "020lead"
-            continue
-        elif lead and op_type and op_type == "opinion":
-            op["type"] = "050addendum"
-        elif op_type and op_type == "dissent":
-            op["type"] = "040dissent"
-        elif op_type and op_type == "concurrence":
-            op["type"] = "030concurrence"
-
-
-def match_opinion_lists(
-    file_opinions_list: list[Any], cl_opinions_list: list[Any]
-) -> dict[int, int]:
-    """Try to match the opinions on two lists and generate a dict with position of
-    matching opinions
-
-    Remove non-alphanumeric and non-whitespace characters from lowercased text,
-    this tries to make both texts in equal conditions to prove if both are similar or
-    equal
-
-    get_cosine_similarity works great when both texts are almost the same with very
-    small variations
-
-    Sometimes cosine similarity fails when there are small variations in text,
-    such as parties, attorneys, case name, or court that are included in the content
-    of the opinion, compare_documents() checks the percentage of the file opinion
-    text that it is in courtlistener opinion, having a large percentage means that
-    almost all the file opinion is in courtlistener opinion, but there is a
-    possibility that the courtlistener opinion contains some additional data in que
-    opinion content (such as case name, parties, etc.)
-
-    compare_documents works good when the opinion from the file is a subset of the
-    opinion in CL, the percentage represents how much of the opinion of the file is
-    in the opinion from cl (content in cl opinion can have other data in the body
-    like posture, attorneys, etc. e.g. in cluster id: 7643871 we have the posture and
-    the opinion text but in the xml file we only have the opinion text, cosine_sim:
-    0.1639075094124459 and percent_match: 73)
-
-    Sometimes one algorithm performs better than the other, this is due to some
-    additional text, such as editor's notes, or the author, page number or posture
-    added to the opinion
-
-    Key is opinion position from file, Value is opinion position from cl opinion e.g.
-    matches {0: 1, 1: 2} 0 is file opinion and 1 in cl opinion, 1 is file opinion and
-    2 is cl opinion
-
-    :param file_opinions_list: Opinions from file
-    :param cl_opinions_list: CL opinions
-    :return: Matches if found or empty dict
-    """
-
-    scores = similarity_scores(file_opinions_list, cl_opinions_list)
-
-    matches = {}
-    for i, row in enumerate(scores):
-        j = row.argmax()  # type: ignore
-        file_opinion = re.sub(
-            r"[^a-zA-Z0-9 ]", "", file_opinions_list[i].lower()
-        )
-        cl_opinion = re.sub(r"[^a-zA-Z0-9 ]", "", cl_opinions_list[j].lower())
-
-        cosine_sim = get_cosine_similarity(file_opinion, cl_opinion)
-
-        percent_match = compare_documents(file_opinion, cl_opinion)
-
-        if cosine_sim < 0.60 and percent_match < 60:
-            continue
-
-        matches[i] = j
-
-    return matches
-
-
 def clean_opinion_content(text: str) -> str:
     """Clean opinion content
 
@@ -424,9 +42,6 @@ def clean_opinion_content(text: str) -> str:
     return re.sub(r"[^a-zA-Z0-9 ]", "", text.lower())
 
 
-# TODO ------------------------ remove until here -------------------------------
-
-
 def get_opinions_cleaned_content(
     cluster_id,
 ) -> tuple[Optional[str], list[dict], int, bool]:
@@ -531,6 +146,8 @@ def sort_harvard_opinions(start_id: int, end_id: int) -> None:
     """We assume that harvard data is already ordered, we just need to fill the order
     field in each opinion
 
+    The harvard importer created the opinions in order of appearance in the file
+
     :param start_id: skip any id lower than this value
     :param end_id: skip any id greater than this value
     :return: None
@@ -795,25 +412,23 @@ def add_arguments(self, parser):
         )
 
     def handle(self, *args, **options):
-        if options["process_harvard"] and options["process_columbia"]:
-            print(
-                "You can only select one option process-harvard or process-columbia"
+
+        if not options["process_harvard"] and not options["process_columbia"]:
+            logger.info(
+                "One option required: process-harvard or process-columbia"
             )
             return
 
-        if not options["process_harvard"] and not options["process_columbia"]:
-            print("One option required: process-harvard or process-columbia")
+        if options["process_harvard"] and options["process_columbia"]:
+            logger.info(
+                "You can only select one option process-harvard or process-columbia"
+            )
             return
 
         if options["process_harvard"]:
             sort_harvard_opinions(options["start_id"], options["end_id"])
 
-        if options["process_columbia"] and options["xml_dir"]:
+        if options["process_columbia"]:
             sort_columbia_opinions(
                 options["start_id"], options["end_id"], options["xml_dir"]
             )
-
-        if options["process_columbia"] and not options["xml_dir"]:
-            print(
-                "Argument --xml-dir required to read xml files from mounted directory"
-            )

From 1e47ff4b503459047caeda15f5fac4b03e77b59a Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Thu, 25 Jul 2024 12:56:00 -0600
Subject: [PATCH 31/50] feat(opinion_order): remove django-ordered-model

add order field to opinion model
add unique_together for cluster and order
---
 cl/search/migrations/0032_order_opinions.py  | 26 ++++++--------
 cl/search/migrations/0032_order_opinions.sql | 37 +++++++++-----------
 cl/search/models.py                          |  7 ++--
 cl/settings/django.py                        |  1 -
 poetry.lock                                  | 13 +------
 pyproject.toml                               |  1 -
 6 files changed, 31 insertions(+), 54 deletions(-)

diff --git a/cl/search/migrations/0032_order_opinions.py b/cl/search/migrations/0032_order_opinions.py
index b34bb01d48..dbbe4707d0 100644
--- a/cl/search/migrations/0032_order_opinions.py
+++ b/cl/search/migrations/0032_order_opinions.py
@@ -1,8 +1,8 @@
-# Generated by Django 4.2.1 on 2023-06-15 17:56
+# Generated by Django 5.0.7 on 2024-07-25 17:13
 
-from django.db import migrations, models
 import pgtrigger.compiler
 import pgtrigger.migrations
+from django.db import migrations, models
 
 
 class Migration(migrations.Migration):
@@ -11,10 +11,6 @@ class Migration(migrations.Migration):
     ]
 
     operations = [
-        migrations.AlterModelOptions(
-            name="opinion",
-            options={"ordering": ("order",)},
-        ),
         pgtrigger.migrations.RemoveTrigger(
             model_name="opinion",
             name="update_or_delete_snapshot_delete",
@@ -26,27 +22,25 @@ class Migration(migrations.Migration):
         migrations.AddField(
             model_name="opinion",
             name="order",
-            field=models.PositiveIntegerField(
-                db_index=True, default=1, editable=False, verbose_name="order"
-            ),
-            preserve_default=False,
+            field=models.IntegerField(blank=True, null=True),
         ),
         migrations.AddField(
             model_name="opinionevent",
             name="order",
-            field=models.PositiveIntegerField(
-                default=1, editable=False, verbose_name="order"
-            ),
-            preserve_default=False,
+            field=models.IntegerField(blank=True, null=True),
+        ),
+        migrations.AlterUniqueTogether(
+            name="opinion",
+            unique_together={("cluster", "order")},
         ),
         pgtrigger.migrations.AddTrigger(
             model_name="opinion",
             trigger=pgtrigger.compiler.Trigger(
                 name="update_or_delete_snapshot_update",
                 sql=pgtrigger.compiler.UpsertTriggerSql(
-                    condition='WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."order" IS DISTINCT FROM (NEW."order") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."cluster_id" IS DISTINCT FROM (NEW."cluster_id") OR OLD."author_id" IS DISTINCT FROM (NEW."author_id") OR OLD."author_str" IS DISTINCT FROM (NEW."author_str") OR OLD."per_curiam" IS DISTINCT FROM (NEW."per_curiam") OR OLD."joined_by_str" IS DISTINCT FROM (NEW."joined_by_str") OR OLD."type" IS DISTINCT FROM (NEW."type") OR OLD."sha1" IS DISTINCT FROM (NEW."sha1") OR OLD."page_count" IS DISTINCT FROM (NEW."page_count") OR OLD."download_url" IS DISTINCT FROM (NEW."download_url") OR OLD."local_path" IS DISTINCT FROM (NEW."local_path") OR OLD."plain_text" IS DISTINCT FROM (NEW."plain_text") OR OLD."html" IS DISTINCT FROM (NEW."html") OR OLD."html_lawbox" IS DISTINCT FROM (NEW."html_lawbox") OR OLD."html_columbia" IS DISTINCT FROM (NEW."html_columbia") OR OLD."html_anon_2020" IS DISTINCT FROM (NEW."html_anon_2020") OR OLD."xml_harvard" IS DISTINCT FROM (NEW."xml_harvard") OR OLD."html_with_citations" IS DISTINCT FROM (NEW."html_with_citations") OR OLD."extracted_by_ocr" IS DISTINCT FROM (NEW."extracted_by_ocr"))',
+                    condition='WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."cluster_id" IS DISTINCT FROM (NEW."cluster_id") OR OLD."author_id" IS DISTINCT FROM (NEW."author_id") OR OLD."author_str" IS DISTINCT FROM (NEW."author_str") OR OLD."per_curiam" IS DISTINCT FROM (NEW."per_curiam") OR OLD."joined_by_str" IS DISTINCT FROM (NEW."joined_by_str") OR OLD."type" IS DISTINCT FROM (NEW."type") OR OLD."sha1" IS DISTINCT FROM (NEW."sha1") OR OLD."page_count" IS DISTINCT FROM (NEW."page_count") OR OLD."download_url" IS DISTINCT FROM (NEW."download_url") OR OLD."local_path" IS DISTINCT FROM (NEW."local_path") OR OLD."plain_text" IS DISTINCT FROM (NEW."plain_text") OR OLD."html" IS DISTINCT FROM (NEW."html") OR OLD."html_lawbox" IS DISTINCT FROM (NEW."html_lawbox") OR OLD."html_columbia" IS DISTINCT FROM (NEW."html_columbia") OR OLD."html_anon_2020" IS DISTINCT FROM (NEW."html_anon_2020") OR OLD."xml_harvard" IS DISTINCT FROM (NEW."xml_harvard") OR OLD."html_with_citations" IS DISTINCT FROM (NEW."html_with_citations") OR OLD."extracted_by_ocr" IS DISTINCT FROM (NEW."extracted_by_ocr") OR OLD."order" IS DISTINCT FROM (NEW."order"))',
                     func='INSERT INTO "search_opinionevent" ("author_id", "author_str", "cluster_id", "date_created", "date_modified", "download_url", "extracted_by_ocr", "html", "html_anon_2020", "html_columbia", "html_lawbox", "html_with_citations", "id", "joined_by_str", "local_path", "order", "page_count", "per_curiam", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "type", "xml_harvard") VALUES (OLD."author_id", OLD."author_str", OLD."cluster_id", OLD."date_created", OLD."date_modified", OLD."download_url", OLD."extracted_by_ocr", OLD."html", OLD."html_anon_2020", OLD."html_columbia", OLD."html_lawbox", OLD."html_with_citations", OLD."id", OLD."joined_by_str", OLD."local_path", OLD."order", OLD."page_count", OLD."per_curiam", _pgh_attach_context(), NOW(), \'update_or_delete_snapshot\', OLD."id", OLD."plain_text", OLD."sha1", OLD."type", OLD."xml_harvard"); RETURN NULL;',
-                    hash="bcac41027f469bbd394e8671cb0b2fa33e7035f3",
+                    hash="89fec08f03e567ec8ecc7cd1e8ec5f665abf9d3b",
                     operation="UPDATE",
                     pgid="pgtrigger_update_or_delete_snapshot_update_67ecd",
                     table="search_opinion",
diff --git a/cl/search/migrations/0032_order_opinions.sql b/cl/search/migrations/0032_order_opinions.sql
index 3226cb510b..71161b2370 100644
--- a/cl/search/migrations/0032_order_opinions.sql
+++ b/cl/search/migrations/0032_order_opinions.sql
@@ -1,9 +1,5 @@
 BEGIN;
 --
--- Change Meta options on opinion
---
--- (no-op)
---
 -- Remove trigger update_or_delete_snapshot_delete from model opinion
 --
 DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_delete_1f4fd ON "search_opinion";
@@ -14,13 +10,15 @@ DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_update_67ecd ON "sear
 --
 -- Add field order to opinion
 --
-ALTER TABLE "search_opinion" ADD COLUMN "order" integer DEFAULT 1 NOT NULL CHECK ("order" >= 0);
-ALTER TABLE "search_opinion" ALTER COLUMN "order" DROP DEFAULT;
+ALTER TABLE "search_opinion" ADD COLUMN "order" integer NULL;
 --
 -- Add field order to opinionevent
 --
-ALTER TABLE "search_opinionevent" ADD COLUMN "order" integer DEFAULT 1 NOT NULL CHECK ("order" >= 0);
-ALTER TABLE "search_opinionevent" ALTER COLUMN "order" DROP DEFAULT;
+ALTER TABLE "search_opinionevent" ADD COLUMN "order" integer NULL;
+--
+-- Alter unique_together for opinion (1 constraint(s))
+--
+ALTER TABLE "search_opinion" ADD CONSTRAINT "search_opinion_cluster_id_order_8426d97d_uniq" UNIQUE ("cluster_id", "order");
 --
 -- Create trigger update_or_delete_snapshot_update on model opinion
 --
@@ -50,7 +48,7 @@ ALTER TABLE "search_opinionevent" ALTER COLUMN "order" DROP DEFAULT;
 
             CREATE OR REPLACE FUNCTION pgtrigger_update_or_delete_snapshot_update_67ecd()
             RETURNS TRIGGER AS $$
-
+                
                 BEGIN
                     IF ("public"._pgtrigger_should_ignore(TG_NAME) IS TRUE) THEN
                         IF (TG_OP = 'DELETE') THEN
@@ -66,13 +64,13 @@ ALTER TABLE "search_opinionevent" ALTER COLUMN "order" DROP DEFAULT;
             DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_update_67ecd ON "search_opinion";
             CREATE  TRIGGER pgtrigger_update_or_delete_snapshot_update_67ecd
                 AFTER UPDATE ON "search_opinion"
-
-
-                FOR EACH ROW WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."order" IS DISTINCT FROM (NEW."order") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."cluster_id" IS DISTINCT FROM (NEW."cluster_id") OR OLD."author_id" IS DISTINCT FROM (NEW."author_id") OR OLD."author_str" IS DISTINCT FROM (NEW."author_str") OR OLD."per_curiam" IS DISTINCT FROM (NEW."per_curiam") OR OLD."joined_by_str" IS DISTINCT FROM (NEW."joined_by_str") OR OLD."type" IS DISTINCT FROM (NEW."type") OR OLD."sha1" IS DISTINCT FROM (NEW."sha1") OR OLD."page_count" IS DISTINCT FROM (NEW."page_count") OR OLD."download_url" IS DISTINCT FROM (NEW."download_url") OR OLD."local_path" IS DISTINCT FROM (NEW."local_path") OR OLD."plain_text" IS DISTINCT FROM (NEW."plain_text") OR OLD."html" IS DISTINCT FROM (NEW."html") OR OLD."html_lawbox" IS DISTINCT FROM (NEW."html_lawbox") OR OLD."html_columbia" IS DISTINCT FROM (NEW."html_columbia") OR OLD."html_anon_2020" IS DISTINCT FROM (NEW."html_anon_2020") OR OLD."xml_harvard" IS DISTINCT FROM (NEW."xml_harvard") OR OLD."html_with_citations" IS DISTINCT FROM (NEW."html_with_citations") OR OLD."extracted_by_ocr" IS DISTINCT FROM (NEW."extracted_by_ocr"))
+                
+                
+                FOR EACH ROW WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."cluster_id" IS DISTINCT FROM (NEW."cluster_id") OR OLD."author_id" IS DISTINCT FROM (NEW."author_id") OR OLD."author_str" IS DISTINCT FROM (NEW."author_str") OR OLD."per_curiam" IS DISTINCT FROM (NEW."per_curiam") OR OLD."joined_by_str" IS DISTINCT FROM (NEW."joined_by_str") OR OLD."type" IS DISTINCT FROM (NEW."type") OR OLD."sha1" IS DISTINCT FROM (NEW."sha1") OR OLD."page_count" IS DISTINCT FROM (NEW."page_count") OR OLD."download_url" IS DISTINCT FROM (NEW."download_url") OR OLD."local_path" IS DISTINCT FROM (NEW."local_path") OR OLD."plain_text" IS DISTINCT FROM (NEW."plain_text") OR OLD."html" IS DISTINCT FROM (NEW."html") OR OLD."html_lawbox" IS DISTINCT FROM (NEW."html_lawbox") OR OLD."html_columbia" IS DISTINCT FROM (NEW."html_columbia") OR OLD."html_anon_2020" IS DISTINCT FROM (NEW."html_anon_2020") OR OLD."xml_harvard" IS DISTINCT FROM (NEW."xml_harvard") OR OLD."html_with_citations" IS DISTINCT FROM (NEW."html_with_citations") OR OLD."extracted_by_ocr" IS DISTINCT FROM (NEW."extracted_by_ocr") OR OLD."order" IS DISTINCT FROM (NEW."order"))
                 EXECUTE PROCEDURE pgtrigger_update_or_delete_snapshot_update_67ecd();
 
-            COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_update_67ecd ON "search_opinion" IS 'bcac41027f469bbd394e8671cb0b2fa33e7035f3';
-
+            COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_update_67ecd ON "search_opinion" IS '89fec08f03e567ec8ecc7cd1e8ec5f665abf9d3b';
+        
 --
 -- Create trigger update_or_delete_snapshot_delete on model opinion
 --
@@ -102,7 +100,7 @@ ALTER TABLE "search_opinionevent" ALTER COLUMN "order" DROP DEFAULT;
 
             CREATE OR REPLACE FUNCTION pgtrigger_update_or_delete_snapshot_delete_1f4fd()
             RETURNS TRIGGER AS $$
-
+                
                 BEGIN
                     IF ("public"._pgtrigger_should_ignore(TG_NAME) IS TRUE) THEN
                         IF (TG_OP = 'DELETE') THEN
@@ -118,12 +116,11 @@ ALTER TABLE "search_opinionevent" ALTER COLUMN "order" DROP DEFAULT;
             DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_delete_1f4fd ON "search_opinion";
             CREATE  TRIGGER pgtrigger_update_or_delete_snapshot_delete_1f4fd
                 AFTER DELETE ON "search_opinion"
-
-
-                FOR EACH ROW
+                
+                
+                FOR EACH ROW 
                 EXECUTE PROCEDURE pgtrigger_update_or_delete_snapshot_delete_1f4fd();
 
             COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_delete_1f4fd ON "search_opinion" IS '79bebd7cda3c6ed3bc40f28799cf9c0f2638e2ad';
-
-CREATE INDEX "search_opinion_order_d54dd126" ON "search_opinion" ("order");
+        
 COMMIT;
diff --git a/cl/search/models.py b/cl/search/models.py
index 59ad525e88..9c04940e3e 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -22,7 +22,6 @@
 from localflavor.us.models import USPostalCodeField, USZipCodeField
 from localflavor.us.us_states import OBSOLETE_STATES, USPS_CHOICES
 from model_utils import FieldTracker
-from ordered_model.models import OrderedModel
 
 from cl.citations.utils import get_citation_depth_between_clusters
 from cl.custom_filters.templatetags.text_filters import best_case_name
@@ -3149,7 +3148,7 @@ def sort_cites(c):
 
 
 @pghistory.track(AfterUpdateOrDeleteSnapshot())
-class Opinion(OrderedModel, AbstractDateTimeModel):
+class Opinion(AbstractDateTimeModel):
     COMBINED = "010combined"
     UNANIMOUS = "015unamimous"
     LEAD = "020lead"
@@ -3321,10 +3320,10 @@ class Opinion(OrderedModel, AbstractDateTimeModel):
             "sha1",
         ]
     )
-    order_with_respect_to = "cluster"
+    order = models.IntegerField(null=True, blank=True)
 
     class Meta:
-        ordering = ("order",)
+        unique_together = ("cluster", "order")
 
     @property
     def siblings(self) -> QuerySet:
diff --git a/cl/settings/django.py b/cl/settings/django.py
index e6d74c3949..968323bcb3 100644
--- a/cl/settings/django.py
+++ b/cl/settings/django.py
@@ -159,7 +159,6 @@
     "django_elasticsearch_dsl",
     "pghistory",
     "pgtrigger",
-    "ordered_model",
     # CourtListener Apps
     "cl.alerts",
     "cl.audio",
diff --git a/poetry.lock b/poetry.lock
index 65e8c26f25..a769a59f6a 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1116,17 +1116,6 @@ files = [
 [package.dependencies]
 Django = ">=3.2"
 
-[[package]]
-name = "django-ordered-model"
-version = "3.7.4"
-description = "Allows Django models to be ordered and provides a simple admin interface for reordering them."
-optional = false
-python-versions = "*"
-files = [
-    {file = "django-ordered-model-3.7.4.tar.gz", hash = "sha256:f258b9762525c00a53009e82f8b8bf2a3aa315e8b453e281e8fdbbfe2b8cb3ba"},
-    {file = "django_ordered_model-3.7.4-py3-none-any.whl", hash = "sha256:dfcd3183fe0749dad1c9971cba1d6240ce7328742a30ddc92feca41107bb241d"},
-]
-
 [[package]]
 name = "django-override-storage"
 version = "0.3.2"
@@ -5472,4 +5461,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.12, <3.13"
-content-hash = "5334f16d006f7486a5f9b905906f2a9a68e7f524684c04af3d0994ebd0999384"
+content-hash = "e6d34875888f1687912d03d33ea68038bba6c6d487037c6454d5b18449ec6d0c"
diff --git a/pyproject.toml b/pyproject.toml
index 44839b4a8d..33efc0846a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -115,7 +115,6 @@ tiktoken = "^0.6.0"
 hyperscan = "^0.7.7"
 openai = "^1.31.1"
 seal-rookery = "^2.2.3"
-django-ordered-model = "^3.7.4"
 
 
 [tool.poetry.group.dev.dependencies]

From 32821a5914afd2a96c139047e336f7fb293b2e65 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Thu, 25 Jul 2024 17:32:05 -0600
Subject: [PATCH 32/50] feat(opinion_order): update tests

update fixtures
add unique constraint
update migrations
---
 cl/search/fixtures/test_objects_search.json  |  4 +-
 cl/search/migrations/0032_order_opinions.py  | 16 ++++---
 cl/search/migrations/0032_order_opinions.sql |  8 ++--
 cl/search/models.py                          | 12 ++++-
 cl/search/tests/tests.py                     | 47 ++++++++++----------
 5 files changed, 52 insertions(+), 35 deletions(-)

diff --git a/cl/search/fixtures/test_objects_search.json b/cl/search/fixtures/test_objects_search.json
index 9fddb84fca..7ae3da4163 100644
--- a/cl/search/fixtures/test_objects_search.json
+++ b/cl/search/fixtures/test_objects_search.json
@@ -332,7 +332,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "order": 1
+      "order": 2
     },
     "model": "search.opinion",
     "pk": 5
@@ -355,7 +355,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "order": 1
+      "order": 3
     },
     "model": "search.opinion",
     "pk": 6
diff --git a/cl/search/migrations/0032_order_opinions.py b/cl/search/migrations/0032_order_opinions.py
index dbbe4707d0..9c7f3fa5d3 100644
--- a/cl/search/migrations/0032_order_opinions.py
+++ b/cl/search/migrations/0032_order_opinions.py
@@ -1,4 +1,4 @@
-# Generated by Django 5.0.7 on 2024-07-25 17:13
+# Generated by Django 5.0.7 on 2024-07-25 23:17
 
 import pgtrigger.compiler
 import pgtrigger.migrations
@@ -7,6 +7,10 @@
 
 class Migration(migrations.Migration):
     dependencies = [
+        (
+            "people_db",
+            "0016_remove_abarating_update_or_delete_snapshot_update_and_more",
+        ),
         ("search", "0031_alter_opinion_type_alter_opinioncluster_source_noop"),
     ]
 
@@ -29,10 +33,6 @@ class Migration(migrations.Migration):
             name="order",
             field=models.IntegerField(blank=True, null=True),
         ),
-        migrations.AlterUniqueTogether(
-            name="opinion",
-            unique_together={("cluster", "order")},
-        ),
         pgtrigger.migrations.AddTrigger(
             model_name="opinion",
             trigger=pgtrigger.compiler.Trigger(
@@ -62,4 +62,10 @@ class Migration(migrations.Migration):
                 ),
             ),
         ),
+        migrations.AddConstraint(
+            model_name="opinion",
+            constraint=models.UniqueConstraint(
+                fields=("cluster_id", "order"), name="unique_opinion_order"
+            ),
+        ),
     ]
diff --git a/cl/search/migrations/0032_order_opinions.sql b/cl/search/migrations/0032_order_opinions.sql
index 71161b2370..01cac8adf7 100644
--- a/cl/search/migrations/0032_order_opinions.sql
+++ b/cl/search/migrations/0032_order_opinions.sql
@@ -16,10 +16,6 @@ ALTER TABLE "search_opinion" ADD COLUMN "order" integer NULL;
 --
 ALTER TABLE "search_opinionevent" ADD COLUMN "order" integer NULL;
 --
--- Alter unique_together for opinion (1 constraint(s))
---
-ALTER TABLE "search_opinion" ADD CONSTRAINT "search_opinion_cluster_id_order_8426d97d_uniq" UNIQUE ("cluster_id", "order");
---
 -- Create trigger update_or_delete_snapshot_update on model opinion
 --
 
@@ -123,4 +119,8 @@ ALTER TABLE "search_opinion" ADD CONSTRAINT "search_opinion_cluster_id_order_842
 
             COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_delete_1f4fd ON "search_opinion" IS '79bebd7cda3c6ed3bc40f28799cf9c0f2638e2ad';
         
+--
+-- Create constraint unique_opinion_order on model opinion
+--
+ALTER TABLE "search_opinion" ADD CONSTRAINT "unique_opinion_order" UNIQUE ("cluster_id", "order");
 COMMIT;
diff --git a/cl/search/models.py b/cl/search/models.py
index 9c04940e3e..d6c17ba4f8 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -3323,7 +3323,11 @@ class Opinion(AbstractDateTimeModel):
     order = models.IntegerField(null=True, blank=True)
 
     class Meta:
-        unique_together = ("cluster", "order")
+        constraints = [
+            models.UniqueConstraint(
+                fields=["cluster_id", "order"], name="unique_opinion_order"
+            )
+        ]
 
     @property
     def siblings(self) -> QuerySet:
@@ -3350,6 +3354,12 @@ def save(
         *args: List,
         **kwargs: Dict,
     ) -> None:
+        if self.pk is None and self.order is None:
+            # Add order in new opinions with no defined order value
+            last_position = Opinion.objects.filter(
+                cluster=self.cluster
+            ).aggregate(models.Max("order"))["order__max"]
+            self.order = (last_position or 0) + 1
         super().save(*args, **kwargs)
         if index:
             from cl.search.tasks import add_items_to_solr
diff --git a/cl/search/tests/tests.py b/cl/search/tests/tests.py
index 740bcb3156..5c57cf72bd 100644
--- a/cl/search/tests/tests.py
+++ b/cl/search/tests/tests.py
@@ -65,7 +65,6 @@
     OpinionClusterFactory,
     OpinionClusterFactoryWithChildrenAndParents,
     OpinionFactory,
-    OpinionsCitedWithParentsFactory,
     OpinionWithChildrenFactory,
     OpinionWithParentsFactory,
     RECAPDocumentFactory,
@@ -301,7 +300,7 @@ def test_custom_manager_chained_filter(self) -> None:
         self.assertEqual(cluster_count, expected_count)
 
     def test_opinions_order(self) -> None:
-        """Test django-ordered-model library"""
+        """Test opinions order"""
 
         # Create court
         court = CourtFactory(id="nyappdiv")
@@ -336,32 +335,34 @@ def test_opinions_order(self) -> None:
 
         # Test that the value of the order field matches the order in which
         # they were created
-        self.assertEqual(op_1.order, 0)
-        self.assertEqual(op_2.order, 1)
-        self.assertEqual(op_3.order, 2)
+        self.assertEqual(op_1.order, 1)
+        self.assertEqual(op_2.order, 2)
+        self.assertEqual(op_3.order, 3)
 
-        # Use library method to move lead opinion to first position, we can
-        # use this function to easily reorder existing opinions
-        op_3.to(0)
+        # Can we update an opinion using an existing position?
+        with transaction.atomic():
+            with self.assertRaises(IntegrityError):
+                op_3.order = 2
+                op_3.save()
 
-        # The position of the elements was modified, we refresh the objects
-        op_1.refresh_from_db()
-        op_2.refresh_from_db()
-        op_3.refresh_from_db()
+        # Can we create an opinion using an existing position?
+        with transaction.atomic():
+            with self.assertRaises(IntegrityError):
+                op_4 = OpinionFactory(
+                    cluster=cluster, type="Lead Opinion", order=1
+                )
 
-        # Test new order
-        self.assertEqual(op_3.order, 0)
-        self.assertEqual(op_1.order, 1)
-        self.assertEqual(op_2.order, 2)
+        # Can we use negative positions?
+        op_4 = OpinionFactory(cluster=cluster, type="Lead Opinion", order=-1)
+        self.assertEqual(op_4.order, -1)
 
-        # Add new opinion to cluster
-        op_4 = OpinionFactory(
-            cluster=cluster,
-            type="Dissent",
+        # Can we order the opinions from a cluster using the field?
+        qs = (
+            cluster.sub_opinions.all()
+            .order_by("order")
+            .values_list("order", flat=True)
         )
-
-        # Test that the new opinion is in last place
-        self.assertEqual(op_4.order, 3)
+        self.assertEqual(list(qs), [-1, 1, 2, 3])
 
 
 class DocketValidationTest(TestCase):

From 5a6764e1bf45b99eda5b5f265e713a1665784e72 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Thu, 25 Jul 2024 17:51:30 -0600
Subject: [PATCH 33/50] feat(opinion_order): update fixture

---
 cl/search/fixtures/test_objects_search.json | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cl/search/fixtures/test_objects_search.json b/cl/search/fixtures/test_objects_search.json
index 7ae3da4163..e9a89f1ea5 100644
--- a/cl/search/fixtures/test_objects_search.json
+++ b/cl/search/fixtures/test_objects_search.json
@@ -309,7 +309,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "order": 1
+      "order": 2
     },
     "model": "search.opinion",
     "pk": 4
@@ -332,7 +332,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "order": 2
+      "order": 3
     },
     "model": "search.opinion",
     "pk": 5
@@ -355,7 +355,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "order": 3
+      "order": 4
     },
     "model": "search.opinion",
     "pk": 6

From 37eb6bc82109dca97ed55b90575ba9a8e892def1 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Thu, 25 Jul 2024 18:03:03 -0600
Subject: [PATCH 34/50] feat(opinion_order): update fixture
 test_objects_query_counts.json

---
 cl/search/fixtures/test_objects_query_counts.json | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cl/search/fixtures/test_objects_query_counts.json b/cl/search/fixtures/test_objects_query_counts.json
index b51117602a..af8b7f3e54 100644
--- a/cl/search/fixtures/test_objects_query_counts.json
+++ b/cl/search/fixtures/test_objects_query_counts.json
@@ -375,7 +375,7 @@
          "html_lawbox":"",
          "per_curiam":false,
          "type":"010combined",
-         "order": 1
+         "order": 2
       },
       "model":"search.opinion",
       "pk":4
@@ -400,7 +400,7 @@
          "html_lawbox":"",
          "per_curiam":false,
          "type":"010combined",
-         "order": 1
+         "order": 3
       },
       "model":"search.opinion",
       "pk":5
@@ -424,7 +424,7 @@
          "html_lawbox":"",
          "per_curiam":false,
          "type":"010combined",
-         "order": 1
+         "order": 4
       },
       "model":"search.opinion",
       "pk":6

From 4b4d97fdaaac94d69bf5815abd358bd080a7064c Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Thu, 25 Jul 2024 18:13:01 -0600
Subject: [PATCH 35/50] feat(opinion_order): update fixture
 opinions-issue-550.json and functest_opinions.json

---
 cl/search/fixtures/functest_opinions.json  | 2 +-
 cl/search/fixtures/opinions-issue-550.json | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cl/search/fixtures/functest_opinions.json b/cl/search/fixtures/functest_opinions.json
index 45f5f0b759..6bc9333003 100644
--- a/cl/search/fixtures/functest_opinions.json
+++ b/cl/search/fixtures/functest_opinions.json
@@ -187,7 +187,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "order": 1
+      "order": 2
     },
     "model": "search.opinion",
     "pk": 12
diff --git a/cl/search/fixtures/opinions-issue-550.json b/cl/search/fixtures/opinions-issue-550.json
index 829a94c7d2..c5f07cea17 100644
--- a/cl/search/fixtures/opinions-issue-550.json
+++ b/cl/search/fixtures/opinions-issue-550.json
@@ -88,7 +88,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "020lead",
-      "order": 1
+      "order": 2
     },
     "model": "search.opinion",
     "pk": 11

From 0050caa417378a0969c8310e7f66b02728eeb0da Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Tue, 30 Jul 2024 18:44:53 -0600
Subject: [PATCH 36/50] feat(opinion_order): rename order to ordering_key

ignore combined opinions, don't add order number
update fixtures
update tests
---
 .../import_columbia/columbia_utils.py         |  4 +-
 .../commands/update_opinions_order.py         | 99 ++++++-------------
 cl/search/fixtures/functest_opinions.json     |  8 +-
 cl/search/fixtures/opinions-issue-412.json    |  4 +-
 cl/search/fixtures/opinions-issue-550.json    |  4 +-
 .../fixtures/test_objects_query_counts.json   | 12 +--
 cl/search/fixtures/test_objects_search.json   | 12 +--
 cl/search/migrations/0032_order_opinions.py   | 19 ++--
 cl/search/migrations/0032_order_opinions.sql  | 22 ++---
 cl/search/models.py                           |  5 +-
 cl/search/tests/tests.py                      | 14 +--
 .../fixtures/api_scotus_map_data.json         |  4 +-
 .../fixtures/scotus_map_data.json             | 34 +++----
 13 files changed, 102 insertions(+), 139 deletions(-)

diff --git a/cl/corpus_importer/import_columbia/columbia_utils.py b/cl/corpus_importer/import_columbia/columbia_utils.py
index b1a62cfd6c..dec91fc1da 100644
--- a/cl/corpus_importer/import_columbia/columbia_utils.py
+++ b/cl/corpus_importer/import_columbia/columbia_utils.py
@@ -224,7 +224,7 @@ def extract_columbia_opinions(
     """
     opinions: list = []
     floating_content = []
-    order = 0
+    order = 1  # The opinion count starts from 1
 
     # We iterate all content to look for all possible opinions
     for i, content in enumerate(outer_opinion):  # type: int, Tag
@@ -363,7 +363,7 @@ def process_extracted_opinions(extracted_opinions: list) -> list:
 
     opinions: list = []
     authorless_content = []
-    order = 0
+    order = 1  # The opinion count starts from 1
 
     for i, found_content in enumerate(extracted_opinions, start=1):
         byline = found_content.get("byline")
diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index 85ed93e0e2..5c91d0e4b1 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -38,38 +38,29 @@ def clean_opinion_content(text: str) -> str:
     # Replace line breaks with spaces and get rid of double spaces
     text = re.sub(" +", " ", " ".join(text.split("\n"))).strip()
 
-    # Remove non-alphanumeric and non-whitespace characters from lowercased text
+    # Remove non-alphanumeric and non-whitespace characters from lowercase text
     return re.sub(r"[^a-zA-Z0-9 ]", "", text.lower())
 
 
 def get_opinions_cleaned_content(
     cluster_id,
-) -> tuple[Optional[str], list[dict], int, bool]:
+) -> tuple[Optional[str], list[dict]]:
     """Get cleaned opinions content for a cluster object
 
     :param cluster_id: Cluster ID for a set of opinions
-    :return: (xml path, list of extracted opinions, start position, True if combined
-    opinions exists in cluster)
+    :return: (xml path, list of extracted opinions)
     """
     cl_cleaned_opinions = []
     # by default the opinions are ordered by pk
-    opinions_from_cluster = Opinion.objects.filter(
-        cluster_id=cluster_id
-    ).order_by("id")
-    combined_opinions_cluster = opinions_from_cluster.filter(
-        type="010combined"
+    opinions_from_cluster = (
+        Opinion.objects.filter(cluster_id=cluster_id)
+        .order_by("id")
+        .exclude(type="010combined")
     )
+
     xml_path = None
-    cluster_has_combined_opinion = False
-    if combined_opinions_cluster:
-        # the combined opinion will be displayed at beginning
-        start_position = combined_opinions_cluster.count()
-        cluster_has_combined_opinion = True
-    else:
-        # we don't have combined opinions, we start ordering from 0 to n
-        start_position = 0
-
-    for i, op in enumerate(opinions_from_cluster.exclude(type="010combined")):
+
+    for i, op in enumerate(opinions_from_cluster):
         if op.local_path and not xml_path:
             xml_path = str(op.local_path)
 
@@ -101,8 +92,6 @@ def get_opinions_cleaned_content(
     return (
         xml_path,
         cl_cleaned_opinions,
-        start_position,
-        cluster_has_combined_opinion,
     )
 
 
@@ -170,26 +159,12 @@ def sort_harvard_opinions(start_id: int, end_id: int) -> None:
     # cluster_id: 4697264, the combined opinion will go to the last position
     for oc in clusters:
         logger.info(f"Processing cluster id: {oc}")
-        combined_opinions_cluster = oc.sub_opinions.filter(
-            type="010combined"
-        ).order_by("id")
-        if combined_opinions_cluster:
-            # the combined opinion will be displayed at first
-            start_position = combined_opinions_cluster.count()
-        else:
-            # we don't have combined opinions, we start ordering from 0 to n
-            start_position = 0
 
         for opinion_order, cluster_op in enumerate(
             oc.sub_opinions.exclude(type="010combined").order_by("id"),
-            start=start_position,
+            start=1,
         ):
-            cluster_op.order = opinion_order
-            cluster_op.save()
-
-        # Show combined opinions at beginning
-        for opinion_order, cluster_op in enumerate(combined_opinions_cluster):
-            cluster_op.order = opinion_order
+            cluster_op.ordering_key = opinion_order
             cluster_op.save()
 
         logger.info(msg=f"Opinions reordered for cluster id: {oc.id}")
@@ -200,18 +175,13 @@ def update_opinions(
     cl_opinions: list,
     columbia_opinions: list,
     matches: dict,
-    cluster_has_combined_opinion: bool,
-    start_position: int,
 ):
     """Update opinions with correct order
 
     :param cluster_id:
     :param cl_opinions: a list with cleaned opinions from cl
-    :param columbia_opinions: a ordered list with cleaned opinions from xml file
+    :param columbia_opinions: an ordered list with cleaned opinions from xml file
     :param matches: a dict with the matches of each opinion of both lists
-    :param cluster_has_combined_opinion: True if the cluster has combined opinions
-    :param start_position: the number from where the order should begin for
-    non-combined opinions
     :return: None
     """
     update_failed = False
@@ -221,7 +191,7 @@ def update_opinions(
             # file_pos is the correct index to find the opinion id to update
             file_opinion = columbia_opinions[file_pos]
             # the order was calculated using the xml file
-            file_order = file_opinion.get("order") + start_position
+            file_order = file_opinion.get("order")
             cl_opinion = cl_opinions[cl_pos]
             opinion_id_to_update = cl_opinion.get("id")
 
@@ -229,11 +199,10 @@ def update_opinions(
                 try:
                     # Update opinion order
                     op = Opinion.objects.get(id=opinion_id_to_update)
-                    op.order = file_order
+                    op.ordering_key = file_order
                     op.save()
                 except Opinion.DoesNotExist:
-                    # This should not happen, but it is better to be
-                    # cautious
+                    # This should not happen, but it is better to be cautious
                     logger.warning(
                         f"We can't update opinion, opinion doesn't exist "
                         f"with id: {opinion_id_to_update}"
@@ -241,18 +210,6 @@ def update_opinions(
                     update_failed = True
                     break
 
-        if cluster_has_combined_opinion and not update_failed:
-            combined_opinions_cluster = Opinion.objects.filter(
-                cluster_id=cluster_id, type="010combined"
-            ).order_by("id")
-
-            # Show combined opinions at beginning
-            for opinion_order, cluster_op in enumerate(
-                combined_opinions_cluster
-            ):
-                cluster_op.order = opinion_order
-                cluster_op.save()
-
         if update_failed:
             # There was an error updating an opinion, rollback all changes for
             # cluster's opinions
@@ -294,12 +251,9 @@ def sort_columbia_opinions(start_id: int, end_id: int, xml_dir: str) -> None:
         logger.info(f"Processing cluster id: {cluster_id}")
 
         try:
-            (
-                xml_path,
-                cl_cleaned_opinions,
-                start_position,
-                cluster_has_combined_opinion,
-            ) = get_opinions_cleaned_content(cluster_id)
+            xml_path, cl_cleaned_opinions = get_opinions_cleaned_content(
+                cluster_id
+            )
         except EmptyOpinionException:
             logger.warning(
                 f"At least one of the opinions from cluster id: {cluster_id} is empty."
@@ -321,7 +275,9 @@ def sort_columbia_opinions(start_id: int, end_id: int, xml_dir: str) -> None:
                     fixed_xml_filepath
                 )
             except UnicodeDecodeError:
-                logger.warning(f"Cannot decode file: {fixed_xml_filepath}")
+                logger.warning(
+                    f"Cannot decode file: {fixed_xml_filepath}, cluster id: {cluster_id}"
+                )
                 continue
 
         if cl_cleaned_opinions and extracted_columbia_opinions:
@@ -336,6 +292,13 @@ def sort_columbia_opinions(start_id: int, end_id: int, xml_dir: str) -> None:
                 if op.get("opinion")
             ]
 
+            if len(columbia_opinions_content) != len(cl_opinions_content):
+                logger.warning(
+                    f"The number of opinions in cl and the number of opinions in the xml is different, cluster id: {cluster_id}"
+                )
+                continue
+
+            # Try to match content between cl and xml
             matches = match_opinion_lists(
                 columbia_opinions_content,
                 cl_opinions_content,
@@ -360,14 +323,12 @@ def sort_columbia_opinions(start_id: int, end_id: int, xml_dir: str) -> None:
                     # Go to next cluster id
                     continue
 
-                # Update all opinions order
+                # All opinions matched, update all opinions order
                 update_opinions(
                     cluster_id,
                     cl_cleaned_opinions,
                     extracted_columbia_opinions,
                     matches,
-                    cluster_has_combined_opinion,
-                    start_position,
                 )
 
 
diff --git a/cl/search/fixtures/functest_opinions.json b/cl/search/fixtures/functest_opinions.json
index 6bc9333003..2cc992a633 100644
--- a/cl/search/fixtures/functest_opinions.json
+++ b/cl/search/fixtures/functest_opinions.json
@@ -65,7 +65,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "order": 1
+      "ordering_key": 1
     },
     "model": "search.opinion",
     "pk": 10
@@ -136,7 +136,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "order": 1
+      "ordering_key": 1
     },
     "model": "search.opinion",
     "pk": 11
@@ -187,7 +187,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "order": 2
+      "ordering_key": 2
     },
     "model": "search.opinion",
     "pk": 12
@@ -258,7 +258,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "order": 1
+      "ordering_key": 1
     },
     "model": "search.opinion",
     "pk": 12
diff --git a/cl/search/fixtures/opinions-issue-412.json b/cl/search/fixtures/opinions-issue-412.json
index 2e429ebecf..0e7fbdc7e6 100644
--- a/cl/search/fixtures/opinions-issue-412.json
+++ b/cl/search/fixtures/opinions-issue-412.json
@@ -65,7 +65,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "order": 1
+      "ordering_key": 1
     },
     "model": "search.opinion",
     "pk": 10
@@ -136,7 +136,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "order": 1
+      "ordering_key": 1
     },
     "model": "search.opinion",
     "pk": 11
diff --git a/cl/search/fixtures/opinions-issue-550.json b/cl/search/fixtures/opinions-issue-550.json
index c5f07cea17..3e359b044d 100644
--- a/cl/search/fixtures/opinions-issue-550.json
+++ b/cl/search/fixtures/opinions-issue-550.json
@@ -65,7 +65,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "order": 1
+      "ordering_key": 1
     },
     "model": "search.opinion",
     "pk": 10
@@ -88,7 +88,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "020lead",
-      "order": 2
+      "ordering_key": 2
     },
     "model": "search.opinion",
     "pk": 11
diff --git a/cl/search/fixtures/test_objects_query_counts.json b/cl/search/fixtures/test_objects_query_counts.json
index af8b7f3e54..6a3f97da23 100644
--- a/cl/search/fixtures/test_objects_query_counts.json
+++ b/cl/search/fixtures/test_objects_query_counts.json
@@ -301,7 +301,7 @@
          "html_lawbox":"",
          "per_curiam":false,
          "type":"020lead",
-         "order": 1
+         "ordering_key": 1
       },
       "model":"search.opinion",
       "pk":1
@@ -326,7 +326,7 @@
          "html_lawbox":"",
          "per_curiam":false,
          "type":"010combined",
-         "order": 1
+         "ordering_key": 1
       },
       "model":"search.opinion",
       "pk":2
@@ -351,7 +351,7 @@
          "html_lawbox":"",
          "per_curiam":false,
          "type":"010combined",
-         "order": 1
+         "ordering_key": 1
       },
       "model":"search.opinion",
       "pk":3
@@ -375,7 +375,7 @@
          "html_lawbox":"",
          "per_curiam":false,
          "type":"010combined",
-         "order": 2
+         "ordering_key": 2
       },
       "model":"search.opinion",
       "pk":4
@@ -400,7 +400,7 @@
          "html_lawbox":"",
          "per_curiam":false,
          "type":"010combined",
-         "order": 3
+         "ordering_key": 3
       },
       "model":"search.opinion",
       "pk":5
@@ -424,7 +424,7 @@
          "html_lawbox":"",
          "per_curiam":false,
          "type":"010combined",
-         "order": 4
+         "ordering_key": 4
       },
       "model":"search.opinion",
       "pk":6
diff --git a/cl/search/fixtures/test_objects_search.json b/cl/search/fixtures/test_objects_search.json
index e9a89f1ea5..542d297d54 100644
--- a/cl/search/fixtures/test_objects_search.json
+++ b/cl/search/fixtures/test_objects_search.json
@@ -240,7 +240,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "020lead",
-      "order": 1
+      "ordering_key": 1
     },
     "model": "search.opinion",
     "pk": 1
@@ -263,7 +263,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "order": 1
+      "ordering_key": 1
     },
     "model": "search.opinion",
     "pk": 2
@@ -286,7 +286,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "order": 1
+      "ordering_key": 1
     },
     "model": "search.opinion",
     "pk": 3
@@ -309,7 +309,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "order": 2
+      "ordering_key": 2
     },
     "model": "search.opinion",
     "pk": 4
@@ -332,7 +332,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "order": 3
+      "ordering_key": 3
     },
     "model": "search.opinion",
     "pk": 5
@@ -355,7 +355,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "order": 4
+      "ordering_key": 4
     },
     "model": "search.opinion",
     "pk": 6
diff --git a/cl/search/migrations/0032_order_opinions.py b/cl/search/migrations/0032_order_opinions.py
index 9c7f3fa5d3..9b4db9fbe7 100644
--- a/cl/search/migrations/0032_order_opinions.py
+++ b/cl/search/migrations/0032_order_opinions.py
@@ -1,4 +1,4 @@
-# Generated by Django 5.0.7 on 2024-07-25 23:17
+# Generated by Django 5.0.7 on 2024-07-30 18:59
 
 import pgtrigger.compiler
 import pgtrigger.migrations
@@ -25,12 +25,12 @@ class Migration(migrations.Migration):
         ),
         migrations.AddField(
             model_name="opinion",
-            name="order",
+            name="ordering_key",
             field=models.IntegerField(blank=True, null=True),
         ),
         migrations.AddField(
             model_name="opinionevent",
-            name="order",
+            name="ordering_key",
             field=models.IntegerField(blank=True, null=True),
         ),
         pgtrigger.migrations.AddTrigger(
@@ -38,9 +38,9 @@ class Migration(migrations.Migration):
             trigger=pgtrigger.compiler.Trigger(
                 name="update_or_delete_snapshot_update",
                 sql=pgtrigger.compiler.UpsertTriggerSql(
-                    condition='WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."cluster_id" IS DISTINCT FROM (NEW."cluster_id") OR OLD."author_id" IS DISTINCT FROM (NEW."author_id") OR OLD."author_str" IS DISTINCT FROM (NEW."author_str") OR OLD."per_curiam" IS DISTINCT FROM (NEW."per_curiam") OR OLD."joined_by_str" IS DISTINCT FROM (NEW."joined_by_str") OR OLD."type" IS DISTINCT FROM (NEW."type") OR OLD."sha1" IS DISTINCT FROM (NEW."sha1") OR OLD."page_count" IS DISTINCT FROM (NEW."page_count") OR OLD."download_url" IS DISTINCT FROM (NEW."download_url") OR OLD."local_path" IS DISTINCT FROM (NEW."local_path") OR OLD."plain_text" IS DISTINCT FROM (NEW."plain_text") OR OLD."html" IS DISTINCT FROM (NEW."html") OR OLD."html_lawbox" IS DISTINCT FROM (NEW."html_lawbox") OR OLD."html_columbia" IS DISTINCT FROM (NEW."html_columbia") OR OLD."html_anon_2020" IS DISTINCT FROM (NEW."html_anon_2020") OR OLD."xml_harvard" IS DISTINCT FROM (NEW."xml_harvard") OR OLD."html_with_citations" IS DISTINCT FROM (NEW."html_with_citations") OR OLD."extracted_by_ocr" IS DISTINCT FROM (NEW."extracted_by_ocr") OR OLD."order" IS DISTINCT FROM (NEW."order"))',
-                    func='INSERT INTO "search_opinionevent" ("author_id", "author_str", "cluster_id", "date_created", "date_modified", "download_url", "extracted_by_ocr", "html", "html_anon_2020", "html_columbia", "html_lawbox", "html_with_citations", "id", "joined_by_str", "local_path", "order", "page_count", "per_curiam", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "type", "xml_harvard") VALUES (OLD."author_id", OLD."author_str", OLD."cluster_id", OLD."date_created", OLD."date_modified", OLD."download_url", OLD."extracted_by_ocr", OLD."html", OLD."html_anon_2020", OLD."html_columbia", OLD."html_lawbox", OLD."html_with_citations", OLD."id", OLD."joined_by_str", OLD."local_path", OLD."order", OLD."page_count", OLD."per_curiam", _pgh_attach_context(), NOW(), \'update_or_delete_snapshot\', OLD."id", OLD."plain_text", OLD."sha1", OLD."type", OLD."xml_harvard"); RETURN NULL;',
-                    hash="89fec08f03e567ec8ecc7cd1e8ec5f665abf9d3b",
+                    condition='WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."cluster_id" IS DISTINCT FROM (NEW."cluster_id") OR OLD."author_id" IS DISTINCT FROM (NEW."author_id") OR OLD."author_str" IS DISTINCT FROM (NEW."author_str") OR OLD."per_curiam" IS DISTINCT FROM (NEW."per_curiam") OR OLD."joined_by_str" IS DISTINCT FROM (NEW."joined_by_str") OR OLD."type" IS DISTINCT FROM (NEW."type") OR OLD."sha1" IS DISTINCT FROM (NEW."sha1") OR OLD."page_count" IS DISTINCT FROM (NEW."page_count") OR OLD."download_url" IS DISTINCT FROM (NEW."download_url") OR OLD."local_path" IS DISTINCT FROM (NEW."local_path") OR OLD."plain_text" IS DISTINCT FROM (NEW."plain_text") OR OLD."html" IS DISTINCT FROM (NEW."html") OR OLD."html_lawbox" IS DISTINCT FROM (NEW."html_lawbox") OR OLD."html_columbia" IS DISTINCT FROM (NEW."html_columbia") OR OLD."html_anon_2020" IS DISTINCT FROM (NEW."html_anon_2020") OR OLD."xml_harvard" IS DISTINCT FROM (NEW."xml_harvard") OR OLD."html_with_citations" IS DISTINCT FROM (NEW."html_with_citations") OR OLD."extracted_by_ocr" IS DISTINCT FROM (NEW."extracted_by_ocr") OR OLD."ordering_key" IS DISTINCT FROM (NEW."ordering_key"))',
+                    func='INSERT INTO "search_opinionevent" ("author_id", "author_str", "cluster_id", "date_created", "date_modified", "download_url", "extracted_by_ocr", "html", "html_anon_2020", "html_columbia", "html_lawbox", "html_with_citations", "id", "joined_by_str", "local_path", "ordering_key", "page_count", "per_curiam", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "type", "xml_harvard") VALUES (OLD."author_id", OLD."author_str", OLD."cluster_id", OLD."date_created", OLD."date_modified", OLD."download_url", OLD."extracted_by_ocr", OLD."html", OLD."html_anon_2020", OLD."html_columbia", OLD."html_lawbox", OLD."html_with_citations", OLD."id", OLD."joined_by_str", OLD."local_path", OLD."ordering_key", OLD."page_count", OLD."per_curiam", _pgh_attach_context(), NOW(), \'update_or_delete_snapshot\', OLD."id", OLD."plain_text", OLD."sha1", OLD."type", OLD."xml_harvard"); RETURN NULL;',
+                    hash="7137855274503cc2c50a17729f82e150d2b7d872",
                     operation="UPDATE",
                     pgid="pgtrigger_update_or_delete_snapshot_update_67ecd",
                     table="search_opinion",
@@ -53,8 +53,8 @@ class Migration(migrations.Migration):
             trigger=pgtrigger.compiler.Trigger(
                 name="update_or_delete_snapshot_delete",
                 sql=pgtrigger.compiler.UpsertTriggerSql(
-                    func='INSERT INTO "search_opinionevent" ("author_id", "author_str", "cluster_id", "date_created", "date_modified", "download_url", "extracted_by_ocr", "html", "html_anon_2020", "html_columbia", "html_lawbox", "html_with_citations", "id", "joined_by_str", "local_path", "order", "page_count", "per_curiam", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "type", "xml_harvard") VALUES (OLD."author_id", OLD."author_str", OLD."cluster_id", OLD."date_created", OLD."date_modified", OLD."download_url", OLD."extracted_by_ocr", OLD."html", OLD."html_anon_2020", OLD."html_columbia", OLD."html_lawbox", OLD."html_with_citations", OLD."id", OLD."joined_by_str", OLD."local_path", OLD."order", OLD."page_count", OLD."per_curiam", _pgh_attach_context(), NOW(), \'update_or_delete_snapshot\', OLD."id", OLD."plain_text", OLD."sha1", OLD."type", OLD."xml_harvard"); RETURN NULL;',
-                    hash="79bebd7cda3c6ed3bc40f28799cf9c0f2638e2ad",
+                    func='INSERT INTO "search_opinionevent" ("author_id", "author_str", "cluster_id", "date_created", "date_modified", "download_url", "extracted_by_ocr", "html", "html_anon_2020", "html_columbia", "html_lawbox", "html_with_citations", "id", "joined_by_str", "local_path", "ordering_key", "page_count", "per_curiam", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "type", "xml_harvard") VALUES (OLD."author_id", OLD."author_str", OLD."cluster_id", OLD."date_created", OLD."date_modified", OLD."download_url", OLD."extracted_by_ocr", OLD."html", OLD."html_anon_2020", OLD."html_columbia", OLD."html_lawbox", OLD."html_with_citations", OLD."id", OLD."joined_by_str", OLD."local_path", OLD."ordering_key", OLD."page_count", OLD."per_curiam", _pgh_attach_context(), NOW(), \'update_or_delete_snapshot\', OLD."id", OLD."plain_text", OLD."sha1", OLD."type", OLD."xml_harvard"); RETURN NULL;',
+                    hash="98fb52aa60fd8e89a83f8f7ac77ba5892739fb37",
                     operation="DELETE",
                     pgid="pgtrigger_update_or_delete_snapshot_delete_1f4fd",
                     table="search_opinion",
@@ -65,7 +65,8 @@ class Migration(migrations.Migration):
         migrations.AddConstraint(
             model_name="opinion",
             constraint=models.UniqueConstraint(
-                fields=("cluster_id", "order"), name="unique_opinion_order"
+                fields=("cluster_id", "ordering_key"),
+                name="unique_opinion_ordering_key",
             ),
         ),
     ]
diff --git a/cl/search/migrations/0032_order_opinions.sql b/cl/search/migrations/0032_order_opinions.sql
index 01cac8adf7..e02c150f4d 100644
--- a/cl/search/migrations/0032_order_opinions.sql
+++ b/cl/search/migrations/0032_order_opinions.sql
@@ -8,13 +8,13 @@ DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_delete_1f4fd ON "sear
 --
 DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_update_67ecd ON "search_opinion";
 --
--- Add field order to opinion
+-- Add field ordering_key to opinion
 --
-ALTER TABLE "search_opinion" ADD COLUMN "order" integer NULL;
+ALTER TABLE "search_opinion" ADD COLUMN "ordering_key" integer NULL;
 --
--- Add field order to opinionevent
+-- Add field ordering_key to opinionevent
 --
-ALTER TABLE "search_opinionevent" ADD COLUMN "order" integer NULL;
+ALTER TABLE "search_opinionevent" ADD COLUMN "ordering_key" integer NULL;
 --
 -- Create trigger update_or_delete_snapshot_update on model opinion
 --
@@ -53,7 +53,7 @@ ALTER TABLE "search_opinionevent" ADD COLUMN "order" integer NULL;
                             RETURN NEW;
                         END IF;
                     END IF;
-                    INSERT INTO "search_opinionevent" ("author_id", "author_str", "cluster_id", "date_created", "date_modified", "download_url", "extracted_by_ocr", "html", "html_anon_2020", "html_columbia", "html_lawbox", "html_with_citations", "id", "joined_by_str", "local_path", "order", "page_count", "per_curiam", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "type", "xml_harvard") VALUES (OLD."author_id", OLD."author_str", OLD."cluster_id", OLD."date_created", OLD."date_modified", OLD."download_url", OLD."extracted_by_ocr", OLD."html", OLD."html_anon_2020", OLD."html_columbia", OLD."html_lawbox", OLD."html_with_citations", OLD."id", OLD."joined_by_str", OLD."local_path", OLD."order", OLD."page_count", OLD."per_curiam", _pgh_attach_context(), NOW(), 'update_or_delete_snapshot', OLD."id", OLD."plain_text", OLD."sha1", OLD."type", OLD."xml_harvard"); RETURN NULL;
+                    INSERT INTO "search_opinionevent" ("author_id", "author_str", "cluster_id", "date_created", "date_modified", "download_url", "extracted_by_ocr", "html", "html_anon_2020", "html_columbia", "html_lawbox", "html_with_citations", "id", "joined_by_str", "local_path", "ordering_key", "page_count", "per_curiam", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "type", "xml_harvard") VALUES (OLD."author_id", OLD."author_str", OLD."cluster_id", OLD."date_created", OLD."date_modified", OLD."download_url", OLD."extracted_by_ocr", OLD."html", OLD."html_anon_2020", OLD."html_columbia", OLD."html_lawbox", OLD."html_with_citations", OLD."id", OLD."joined_by_str", OLD."local_path", OLD."ordering_key", OLD."page_count", OLD."per_curiam", _pgh_attach_context(), NOW(), 'update_or_delete_snapshot', OLD."id", OLD."plain_text", OLD."sha1", OLD."type", OLD."xml_harvard"); RETURN NULL;
                 END;
             $$ LANGUAGE plpgsql;
 
@@ -62,10 +62,10 @@ ALTER TABLE "search_opinionevent" ADD COLUMN "order" integer NULL;
                 AFTER UPDATE ON "search_opinion"
                 
                 
-                FOR EACH ROW WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."cluster_id" IS DISTINCT FROM (NEW."cluster_id") OR OLD."author_id" IS DISTINCT FROM (NEW."author_id") OR OLD."author_str" IS DISTINCT FROM (NEW."author_str") OR OLD."per_curiam" IS DISTINCT FROM (NEW."per_curiam") OR OLD."joined_by_str" IS DISTINCT FROM (NEW."joined_by_str") OR OLD."type" IS DISTINCT FROM (NEW."type") OR OLD."sha1" IS DISTINCT FROM (NEW."sha1") OR OLD."page_count" IS DISTINCT FROM (NEW."page_count") OR OLD."download_url" IS DISTINCT FROM (NEW."download_url") OR OLD."local_path" IS DISTINCT FROM (NEW."local_path") OR OLD."plain_text" IS DISTINCT FROM (NEW."plain_text") OR OLD."html" IS DISTINCT FROM (NEW."html") OR OLD."html_lawbox" IS DISTINCT FROM (NEW."html_lawbox") OR OLD."html_columbia" IS DISTINCT FROM (NEW."html_columbia") OR OLD."html_anon_2020" IS DISTINCT FROM (NEW."html_anon_2020") OR OLD."xml_harvard" IS DISTINCT FROM (NEW."xml_harvard") OR OLD."html_with_citations" IS DISTINCT FROM (NEW."html_with_citations") OR OLD."extracted_by_ocr" IS DISTINCT FROM (NEW."extracted_by_ocr") OR OLD."order" IS DISTINCT FROM (NEW."order"))
+                FOR EACH ROW WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."cluster_id" IS DISTINCT FROM (NEW."cluster_id") OR OLD."author_id" IS DISTINCT FROM (NEW."author_id") OR OLD."author_str" IS DISTINCT FROM (NEW."author_str") OR OLD."per_curiam" IS DISTINCT FROM (NEW."per_curiam") OR OLD."joined_by_str" IS DISTINCT FROM (NEW."joined_by_str") OR OLD."type" IS DISTINCT FROM (NEW."type") OR OLD."sha1" IS DISTINCT FROM (NEW."sha1") OR OLD."page_count" IS DISTINCT FROM (NEW."page_count") OR OLD."download_url" IS DISTINCT FROM (NEW."download_url") OR OLD."local_path" IS DISTINCT FROM (NEW."local_path") OR OLD."plain_text" IS DISTINCT FROM (NEW."plain_text") OR OLD."html" IS DISTINCT FROM (NEW."html") OR OLD."html_lawbox" IS DISTINCT FROM (NEW."html_lawbox") OR OLD."html_columbia" IS DISTINCT FROM (NEW."html_columbia") OR OLD."html_anon_2020" IS DISTINCT FROM (NEW."html_anon_2020") OR OLD."xml_harvard" IS DISTINCT FROM (NEW."xml_harvard") OR OLD."html_with_citations" IS DISTINCT FROM (NEW."html_with_citations") OR OLD."extracted_by_ocr" IS DISTINCT FROM (NEW."extracted_by_ocr") OR OLD."ordering_key" IS DISTINCT FROM (NEW."ordering_key"))
                 EXECUTE PROCEDURE pgtrigger_update_or_delete_snapshot_update_67ecd();
 
-            COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_update_67ecd ON "search_opinion" IS '89fec08f03e567ec8ecc7cd1e8ec5f665abf9d3b';
+            COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_update_67ecd ON "search_opinion" IS '7137855274503cc2c50a17729f82e150d2b7d872';
         
 --
 -- Create trigger update_or_delete_snapshot_delete on model opinion
@@ -105,7 +105,7 @@ ALTER TABLE "search_opinionevent" ADD COLUMN "order" integer NULL;
                             RETURN NEW;
                         END IF;
                     END IF;
-                    INSERT INTO "search_opinionevent" ("author_id", "author_str", "cluster_id", "date_created", "date_modified", "download_url", "extracted_by_ocr", "html", "html_anon_2020", "html_columbia", "html_lawbox", "html_with_citations", "id", "joined_by_str", "local_path", "order", "page_count", "per_curiam", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "type", "xml_harvard") VALUES (OLD."author_id", OLD."author_str", OLD."cluster_id", OLD."date_created", OLD."date_modified", OLD."download_url", OLD."extracted_by_ocr", OLD."html", OLD."html_anon_2020", OLD."html_columbia", OLD."html_lawbox", OLD."html_with_citations", OLD."id", OLD."joined_by_str", OLD."local_path", OLD."order", OLD."page_count", OLD."per_curiam", _pgh_attach_context(), NOW(), 'update_or_delete_snapshot', OLD."id", OLD."plain_text", OLD."sha1", OLD."type", OLD."xml_harvard"); RETURN NULL;
+                    INSERT INTO "search_opinionevent" ("author_id", "author_str", "cluster_id", "date_created", "date_modified", "download_url", "extracted_by_ocr", "html", "html_anon_2020", "html_columbia", "html_lawbox", "html_with_citations", "id", "joined_by_str", "local_path", "ordering_key", "page_count", "per_curiam", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "type", "xml_harvard") VALUES (OLD."author_id", OLD."author_str", OLD."cluster_id", OLD."date_created", OLD."date_modified", OLD."download_url", OLD."extracted_by_ocr", OLD."html", OLD."html_anon_2020", OLD."html_columbia", OLD."html_lawbox", OLD."html_with_citations", OLD."id", OLD."joined_by_str", OLD."local_path", OLD."ordering_key", OLD."page_count", OLD."per_curiam", _pgh_attach_context(), NOW(), 'update_or_delete_snapshot', OLD."id", OLD."plain_text", OLD."sha1", OLD."type", OLD."xml_harvard"); RETURN NULL;
                 END;
             $$ LANGUAGE plpgsql;
 
@@ -117,10 +117,10 @@ ALTER TABLE "search_opinionevent" ADD COLUMN "order" integer NULL;
                 FOR EACH ROW 
                 EXECUTE PROCEDURE pgtrigger_update_or_delete_snapshot_delete_1f4fd();
 
-            COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_delete_1f4fd ON "search_opinion" IS '79bebd7cda3c6ed3bc40f28799cf9c0f2638e2ad';
+            COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_delete_1f4fd ON "search_opinion" IS '98fb52aa60fd8e89a83f8f7ac77ba5892739fb37';
         
 --
--- Create constraint unique_opinion_order on model opinion
+-- Create constraint unique_opinion_ordering_key on model opinion
 --
-ALTER TABLE "search_opinion" ADD CONSTRAINT "unique_opinion_order" UNIQUE ("cluster_id", "order");
+ALTER TABLE "search_opinion" ADD CONSTRAINT "unique_opinion_ordering_key" UNIQUE ("cluster_id", "ordering_key");
 COMMIT;
diff --git a/cl/search/models.py b/cl/search/models.py
index d6c17ba4f8..a0c9fa7eef 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -3320,12 +3320,13 @@ class Opinion(AbstractDateTimeModel):
             "sha1",
         ]
     )
-    order = models.IntegerField(null=True, blank=True)
+    ordering_key = models.IntegerField(null=True, blank=True)
 
     class Meta:
         constraints = [
             models.UniqueConstraint(
-                fields=["cluster_id", "order"], name="unique_opinion_order"
+                fields=["cluster_id", "ordering_key"],
+                name="unique_opinion_ordering_key",
             )
         ]
 
diff --git a/cl/search/tests/tests.py b/cl/search/tests/tests.py
index 5c57cf72bd..ca5c384651 100644
--- a/cl/search/tests/tests.py
+++ b/cl/search/tests/tests.py
@@ -335,14 +335,14 @@ def test_opinions_order(self) -> None:
 
         # Test that the value of the order field matches the order in which
         # they were created
-        self.assertEqual(op_1.order, 1)
-        self.assertEqual(op_2.order, 2)
-        self.assertEqual(op_3.order, 3)
+        self.assertEqual(op_1.ordering_key, 1)
+        self.assertEqual(op_2.ordering_key, 2)
+        self.assertEqual(op_3.ordering_key, 3)
 
         # Can we update an opinion using an existing position?
         with transaction.atomic():
             with self.assertRaises(IntegrityError):
-                op_3.order = 2
+                op_3.ordering_key = 2
                 op_3.save()
 
         # Can we create an opinion using an existing position?
@@ -354,13 +354,13 @@ def test_opinions_order(self) -> None:
 
         # Can we use negative positions?
         op_4 = OpinionFactory(cluster=cluster, type="Lead Opinion", order=-1)
-        self.assertEqual(op_4.order, -1)
+        self.assertEqual(op_4.ordering_key, -1)
 
         # Can we order the opinions from a cluster using the field?
         qs = (
             cluster.sub_opinions.all()
-            .order_by("order")
-            .values_list("order", flat=True)
+            .order_by("ordering_key")
+            .values_list("ordering_key", flat=True)
         )
         self.assertEqual(list(qs), [-1, 1, 2, 3])
 
diff --git a/cl/visualizations/fixtures/api_scotus_map_data.json b/cl/visualizations/fixtures/api_scotus_map_data.json
index 46dc2f9856..3a13c3e4e7 100644
--- a/cl/visualizations/fixtures/api_scotus_map_data.json
+++ b/cl/visualizations/fixtures/api_scotus_map_data.json
@@ -122,7 +122,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "020lead",
-      "order": 1
+      "ordering_key": 1
     },
     "model": "search.opinion",
     "pk": 1
@@ -145,7 +145,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "order": 1
+      "ordering_key": 1
     },
     "model": "search.opinion",
     "pk": 2
diff --git a/cl/visualizations/fixtures/scotus_map_data.json b/cl/visualizations/fixtures/scotus_map_data.json
index a885e4df54..e0760f42bf 100644
--- a/cl/visualizations/fixtures/scotus_map_data.json
+++ b/cl/visualizations/fixtures/scotus_map_data.json
@@ -903,7 +903,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "order": 1
+    "ordering_key": 1
   },
   "model": "search.opinion",
   "pk": 111014
@@ -926,7 +926,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "order": 1
+    "ordering_key": 1
   },
   "model": "search.opinion",
   "pk": 111113
@@ -949,7 +949,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "order": 1
+    "ordering_key": 1
   },
   "model": "search.opinion",
   "pk": 111464
@@ -972,7 +972,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "order": 1
+    "ordering_key": 1
   },
   "model": "search.opinion",
   "pk": 111505
@@ -995,7 +995,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "order": 1
+    "ordering_key": 1
   },
   "model": "search.opinion",
   "pk": 111924
@@ -1018,7 +1018,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "order": 1
+    "ordering_key": 1
   },
   "model": "search.opinion",
   "pk": 112331
@@ -1041,7 +1041,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "order": 1
+    "ordering_key": 1
   },
   "model": "search.opinion",
   "pk": 112646
@@ -1064,7 +1064,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "order": 1
+    "ordering_key": 1
   },
   "model": "search.opinion",
   "pk": 112779
@@ -1087,7 +1087,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "order": 1
+    "ordering_key": 1
   },
   "model": "search.opinion",
   "pk": 112874
@@ -1110,7 +1110,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "order": 1
+    "ordering_key": 1
   },
   "model": "search.opinion",
   "pk": 117967
@@ -1133,7 +1133,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "order": 1
+    "ordering_key": 1
   },
   "model": "search.opinion",
   "pk": 118377
@@ -1156,7 +1156,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "order": 1
+    "ordering_key": 1
   },
   "model": "search.opinion",
   "pk": 121168
@@ -1179,7 +1179,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "order": 1
+    "ordering_key": 1
   },
   "model": "search.opinion",
   "pk": 136984
@@ -1202,7 +1202,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "order": 1
+    "ordering_key": 1
   },
   "model": "search.opinion",
   "pk": 142900
@@ -1225,7 +1225,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "order": 1
+    "ordering_key": 1
   },
   "model": "search.opinion",
   "pk": 799990
@@ -1248,7 +1248,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "order": 1
+    "ordering_key": 1
   },
   "model": "search.opinion",
   "pk": 799993
@@ -1271,7 +1271,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "order": 1
+    "ordering_key": 1
   },
   "model": "search.opinion",
   "pk": 2674862

From 3eeaafe572121c230459f8f9b36637bd21c4392d Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Tue, 30 Jul 2024 18:56:56 -0600
Subject: [PATCH 37/50] feat(opinion_order): update model

---
 cl/search/models.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cl/search/models.py b/cl/search/models.py
index a0c9fa7eef..2c73363836 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -3355,12 +3355,12 @@ def save(
         *args: List,
         **kwargs: Dict,
     ) -> None:
-        if self.pk is None and self.order is None:
+        if self.pk is None and self.ordering_key is None:
             # Add order in new opinions with no defined order value
             last_position = Opinion.objects.filter(
                 cluster=self.cluster
             ).aggregate(models.Max("order"))["order__max"]
-            self.order = (last_position or 0) + 1
+            self.ordering_key = (last_position or 0) + 1
         super().save(*args, **kwargs)
         if index:
             from cl.search.tasks import add_items_to_solr

From ac98d938cef121df8fe7cac8fff12ae58a11a08f Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Tue, 30 Jul 2024 19:56:53 -0600
Subject: [PATCH 38/50] feat(opinion_order): update model

---
 cl/search/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cl/search/models.py b/cl/search/models.py
index 2c73363836..5e755f5062 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -3359,7 +3359,7 @@ def save(
             # Add order in new opinions with no defined order value
             last_position = Opinion.objects.filter(
                 cluster=self.cluster
-            ).aggregate(models.Max("order"))["order__max"]
+            ).aggregate(models.Max("ordering_key"))["ordering_key__max"]
             self.ordering_key = (last_position or 0) + 1
         super().save(*args, **kwargs)
         if index:

From c22eb04c0de730962727f3b596498211274544a1 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Tue, 30 Jul 2024 19:57:40 -0600
Subject: [PATCH 39/50] feat(opinion_order): update model

---
 cl/search/models.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/cl/search/models.py b/cl/search/models.py
index 5e755f5062..a6b54b9819 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -3355,12 +3355,6 @@ def save(
         *args: List,
         **kwargs: Dict,
     ) -> None:
-        if self.pk is None and self.ordering_key is None:
-            # Add order in new opinions with no defined order value
-            last_position = Opinion.objects.filter(
-                cluster=self.cluster
-            ).aggregate(models.Max("ordering_key"))["ordering_key__max"]
-            self.ordering_key = (last_position or 0) + 1
         super().save(*args, **kwargs)
         if index:
             from cl.search.tasks import add_items_to_solr

From 60744a19e7dca4757ccf692c29deded2cf32185a Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Tue, 30 Jul 2024 20:22:47 -0600
Subject: [PATCH 40/50] feat(opinion_order): update tests

---
 cl/search/tests/tests.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/cl/search/tests/tests.py b/cl/search/tests/tests.py
index ca5c384651..a32ffe8868 100644
--- a/cl/search/tests/tests.py
+++ b/cl/search/tests/tests.py
@@ -321,16 +321,19 @@ def test_opinions_order(self) -> None:
         op_1 = OpinionFactory(
             cluster=cluster,
             type="Concurrence Opinion",
+            ordering_key=1,
         )
 
         op_2 = OpinionFactory(
             cluster=cluster,
             type="Dissent",
+            ordering_key=2,
         )
 
         op_3 = OpinionFactory(
             cluster=cluster,
             type="Lead Opinion",
+            ordering_key=3,
         )
 
         # Test that the value of the order field matches the order in which
@@ -349,11 +352,13 @@ def test_opinions_order(self) -> None:
         with transaction.atomic():
             with self.assertRaises(IntegrityError):
                 op_4 = OpinionFactory(
-                    cluster=cluster, type="Lead Opinion", order=1
+                    cluster=cluster, type="Lead Opinion", ordering_key=1
                 )
 
         # Can we use negative positions?
-        op_4 = OpinionFactory(cluster=cluster, type="Lead Opinion", order=-1)
+        op_4 = OpinionFactory(
+            cluster=cluster, type="Lead Opinion", ordering_key=-1
+        )
         self.assertEqual(op_4.ordering_key, -1)
 
         # Can we order the opinions from a cluster using the field?
@@ -364,6 +369,10 @@ def test_opinions_order(self) -> None:
         )
         self.assertEqual(list(qs), [-1, 1, 2, 3])
 
+        # Order default value is null
+        op_5 = OpinionFactory(cluster=cluster, type="Lead Opinion")
+        self.assertEqual(op_5.ordering_key, None)
+
 
 class DocketValidationTest(TestCase):
     @classmethod

From e8a9c68169fd6d376154590e90c573069438385d Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Wed, 31 Jul 2024 19:14:20 -0600
Subject: [PATCH 41/50] feat(opinion_order): update code for harvard source

---
 .../commands/update_opinions_order.py         | 66 ++++++++++++-------
 1 file changed, 43 insertions(+), 23 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index 5c91d0e4b1..dc00b24818 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -5,7 +5,7 @@
 from bs4 import BeautifulSoup
 from django.core.management import BaseCommand
 from django.db import transaction
-from django.db.models import Count
+from django.db.models import Count, Q
 
 from cl.corpus_importer.import_columbia.columbia_utils import (
     extract_columbia_opinions,
@@ -142,28 +142,38 @@ def sort_harvard_opinions(start_id: int, end_id: int) -> None:
     :return: None
     """
 
+    # The filepath_json_harvard field can only be filled by the harvard importer,
+    # this helps us confirm that it was imported from a Harvard json
+    base_filter = Q(
+        opinions_count__gt=1, source__in=VALID_HARVARD_SOURCES
+    ) & ~Q(filepath_json_harvard="")
+
+    if start_id:
+        base_filter &= Q(pk__gte=start_id)
+
+    if end_id:
+        base_filter &= Q(pk__lte=end_id)
+
     # Get all harvard clusters with more than one opinion
     clusters = (
         OpinionCluster.objects.prefetch_related("sub_opinions")
         .annotate(opinions_count=Count("sub_opinions"))
-        .filter(opinions_count__gt=1, source__in=VALID_HARVARD_SOURCES)
+        .filter(base_filter)
         .order_by("id")
     )
 
-    if start_id:
-        clusters = clusters.filter(pk__gte=start_id)
-
-    if end_id:
-        clusters = clusters.filter(pk__lte=end_id)
-
-    # cluster_id: 4697264, the combined opinion will go to the last position
     for oc in clusters:
         logger.info(f"Processing cluster id: {oc}")
 
-        for opinion_order, cluster_op in enumerate(
-            oc.sub_opinions.exclude(type="010combined").order_by("id"),
-            start=1,
-        ):
+        cluster_opinions = oc.sub_opinions.exclude(
+            type="010combined"
+        ).order_by("id")
+
+        if not cluster_opinions:
+            logger.info(f"No opinions left to order for cluster id: {oc}")
+            continue
+
+        for opinion_order, cluster_op in enumerate(cluster_opinions, start=1):
             cluster_op.ordering_key = opinion_order
             cluster_op.save()
 
@@ -344,27 +354,23 @@ def add_arguments(self, parser):
             action="store_true",
             help="Fix harvard opinions order",
         )
-
         parser.add_argument(
             "--process-columbia",
             action="store_true",
             help="Fix columbia opinions order",
         )
-
         parser.add_argument(
             "--xml-dir",
             default="/opt/courtlistener/_columbia",
             required=False,
             help="The absolute path to the directory with columbia xml files",
         )
-
         parser.add_argument(
             "--start-id",
             type=int,
             default=0,
             help="Start id for a range of clusters (inclusive)",
         )
-
         parser.add_argument(
             "--end-id",
             type=int,
@@ -372,18 +378,32 @@ def add_arguments(self, parser):
             help="End id for a range of clusters (inclusive)",
         )
 
-    def handle(self, *args, **options):
+    def validate_args(self, opts):
+        """Validate arguments passed to the command
 
-        if not options["process_harvard"] and not options["process_columbia"]:
-            logger.info(
+        :param opts: dictionary with arguments from the command
+        :return: true if validations are satisfied else false
+        """
+        if opts["end_id"] > opts["start_id"]:
+            logger.error("end-id should be greater or equal than start-id")
+            return False
+
+        if not opts["process_harvard"] and not opts["process_columbia"]:
+            logger.error(
                 "One option required: process-harvard or process-columbia"
             )
-            return
+            return False
 
-        if options["process_harvard"] and options["process_columbia"]:
-            logger.info(
+        if opts["process_harvard"] and opts["process_columbia"]:
+            logger.error(
                 "You can only select one option process-harvard or process-columbia"
             )
+            return False
+        return True
+
+    def handle(self, *args, **options):
+
+        if not self.validate_args(options):
             return
 
         if options["process_harvard"]:

From cb2a1d398d75bc353c153ce569b58f7c3496f2bc Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Fri, 2 Aug 2024 15:31:11 -0400
Subject: [PATCH 42/50] feat(search.models): Add validation for ordering key

Dont allow negative or 0 as a key
Add validation in save
Make check explicit
---
 cl/search/models.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/cl/search/models.py b/cl/search/models.py
index a6b54b9819..1bde2ebad0 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -3347,6 +3347,11 @@ def get_absolute_url(self) -> str:
     def clean(self) -> None:
         if self.type == "":
             raise ValidationError("'type' is a required field.")
+        if self.ordering_key is not None and self.ordering_key != "":
+            if self.ordering_key < 1:
+                raise ValidationError(
+                    {"ordering_key": "Ordering key cannot be zero or negative"}
+                )
 
     def save(
         self,
@@ -3355,6 +3360,7 @@ def save(
         *args: List,
         **kwargs: Dict,
     ) -> None:
+        self.clean()
         super().save(*args, **kwargs)
         if index:
             from cl.search.tasks import add_items_to_solr

From 18ba5421355682b1e9813b5322e86cf2061fd4f9 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Fri, 2 Aug 2024 15:32:36 -0400
Subject: [PATCH 43/50] refactor(update_opinion_order): Drop columbia

Drop columbia from opinion ordering
Refactor the argparse to be more CL-ish
using skip-until and limit

Update filtering commands
---
 .../commands/update_opinions_order.py         | 427 +++---------------
 cl/search/tests/tests.py                      |  40 +-
 2 files changed, 80 insertions(+), 387 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index dc00b24818..ab445a1491 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -1,415 +1,96 @@
-import os.path
-import re
-from typing import Optional
-
-from bs4 import BeautifulSoup
-from django.core.management import BaseCommand
-from django.db import transaction
 from django.db.models import Count, Q
 
-from cl.corpus_importer.import_columbia.columbia_utils import (
-    extract_columbia_opinions,
-    map_opinion_types,
-    process_extracted_opinions,
-    read_xml_to_soup,
-)
-from cl.corpus_importer.utils import EmptyOpinionException, match_opinion_lists
-from cl.lib.command_utils import logger
+from cl.lib.command_utils import VerboseCommand, logger
 from cl.search.models import SOURCES, Opinion, OpinionCluster
 
-VALID_COLUMBIA_SOURCES = [
-    key
-    for key in dict(SOURCES.NAMES).keys()
-    if SOURCES.COLUMBIA_ARCHIVE in key
-]
-
-VALID_HARVARD_SOURCES = [
-    key for key in dict(SOURCES.NAMES).keys() if SOURCES.HARVARD_CASELAW in key
-]
-
-
-def clean_opinion_content(text: str) -> str:
-    """Clean opinion content
-
-    :param text: text to clean
-    :return: cleaned text
-    """
-
-    # Replace line breaks with spaces and get rid of double spaces
-    text = re.sub(" +", " ", " ".join(text.split("\n"))).strip()
-
-    # Remove non-alphanumeric and non-whitespace characters from lowercase text
-    return re.sub(r"[^a-zA-Z0-9 ]", "", text.lower())
-
-
-def get_opinions_cleaned_content(
-    cluster_id,
-) -> tuple[Optional[str], list[dict]]:
-    """Get cleaned opinions content for a cluster object
-
-    :param cluster_id: Cluster ID for a set of opinions
-    :return: (xml path, list of extracted opinions)
-    """
-    cl_cleaned_opinions = []
-    # by default the opinions are ordered by pk
-    opinions_from_cluster = (
-        Opinion.objects.filter(cluster_id=cluster_id)
-        .order_by("id")
-        .exclude(type="010combined")
-    )
-
-    xml_path = None
 
-    for i, op in enumerate(opinions_from_cluster):
-        if op.local_path and not xml_path:
-            xml_path = str(op.local_path)
+def sort_harvard_opinions(options) -> None:
+    """Sort harvard opinions
 
-        content = None
-
-        # We can only use columbia's content to infer the ordering
-        if len(op.html_columbia) > 1:
-            content = op.html_columbia
-
-        if not content:
-            raise EmptyOpinionException(
-                "There is no content in html_columbia field"
-            )
-
-        soup = BeautifulSoup(content, features="html.parser")
-        opinion_text = soup.getText(separator=" ", strip=True)
-        prep_text = clean_opinion_content(opinion_text)
-
-        cl_cleaned_opinions.append(
-            {
-                "id": op.id,
-                "byline": op.author_str,
-                "type": op.type,
-                "opinion": prep_text,
-                "order": i,
-            }
-        )
-
-    return (
-        xml_path,
-        cl_cleaned_opinions,
-    )
-
-
-def fix_filepath(filepath: str) -> str:
-    """Fix filepath from file field
-
-    :param filepath: path from file field
-    :return: new file path
-    """
-    if "/home/mlissner/columbia/opinions/" in filepath:
-        filepath = filepath.replace("/home/mlissner/columbia/opinions/", "")
-    return filepath
-
-
-def get_opinions_columbia_file(xml_filepath: str) -> list:
-    """Get opinions from columbia xml file and convert it into dict
-
-    :param xml_filepath: path of xml file
-    :return: dict with data
-    """
-    soup = read_xml_to_soup(xml_filepath)
-
-    # Find the outer <opinion> tag to have all elements inside
-    outer_opinion = soup.find("opinion")
-
-    extracted_opinions = extract_columbia_opinions(outer_opinion)
-    opinions = process_extracted_opinions(extracted_opinions)
-    map_opinion_types(opinions)
-
-    for op in opinions:
-        opinion_content = op.get("opinion")
-        soup = BeautifulSoup(opinion_content, "html.parser")
-        opinion_text = soup.getText(separator=" ", strip=True)
-        cleaned_opinion = clean_opinion_content(opinion_text)
-        op["opinion"] = cleaned_opinion
-
-    return opinions
-
-
-def sort_harvard_opinions(start_id: int, end_id: int) -> None:
-    """We assume that harvard data is already ordered, we just need to fill the order
-    field in each opinion
+    We assume that harvard data is already ordered, we just need to fill
+    the order field in each opinion
 
     The harvard importer created the opinions in order of appearance in the file
 
-    :param start_id: skip any id lower than this value
-    :param end_id: skip any id greater than this value
+    :param options: dict of arguments skip until and limit if given
     :return: None
     """
 
-    # The filepath_json_harvard field can only be filled by the harvard importer,
-    # this helps us confirm that it was imported from a Harvard json
-    base_filter = Q(
-        opinions_count__gt=1, source__in=VALID_HARVARD_SOURCES
-    ) & ~Q(filepath_json_harvard="")
-
-    if start_id:
-        base_filter &= Q(pk__gte=start_id)
-
-    if end_id:
-        base_filter &= Q(pk__lte=end_id)
+    skip_until = options.get("skip_until", None)
+    limit = options.get("limit", None)
 
-    # Get all harvard clusters with more than one opinion
-    clusters = (
-        OpinionCluster.objects.prefetch_related("sub_opinions")
+    base_filter = (
+        OpinionCluster.objects.exclude(filepath_json_harvard="")
         .annotate(opinions_count=Count("sub_opinions"))
-        .filter(base_filter)
-        .order_by("id")
+        .filter(opinions_count__gt=1)
     )
 
-    for oc in clusters:
-        logger.info(f"Processing cluster id: {oc}")
-
-        cluster_opinions = oc.sub_opinions.exclude(
-            type="010combined"
-        ).order_by("id")
-
-        if not cluster_opinions:
-            logger.info(f"No opinions left to order for cluster id: {oc}")
-            continue
-
-        for opinion_order, cluster_op in enumerate(cluster_opinions, start=1):
-            cluster_op.ordering_key = opinion_order
-            cluster_op.save()
-
-        logger.info(msg=f"Opinions reordered for cluster id: {oc.id}")
-
-
-def update_opinions(
-    cluster_id: int,
-    cl_opinions: list,
-    columbia_opinions: list,
-    matches: dict,
-):
-    """Update opinions with correct order
+    if skip_until:
+        base_filter &= Q(pk__gte=skip_until)
 
-    :param cluster_id:
-    :param cl_opinions: a list with cleaned opinions from cl
-    :param columbia_opinions: an ordered list with cleaned opinions from xml file
-    :param matches: a dict with the matches of each opinion of both lists
-    :return: None
-    """
-    update_failed = False
-
-    with transaction.atomic():
-        for file_pos, cl_pos in matches.items():
-            # file_pos is the correct index to find the opinion id to update
-            file_opinion = columbia_opinions[file_pos]
-            # the order was calculated using the xml file
-            file_order = file_opinion.get("order")
-            cl_opinion = cl_opinions[cl_pos]
-            opinion_id_to_update = cl_opinion.get("id")
-
-            if opinion_id_to_update:
-                try:
-                    # Update opinion order
-                    op = Opinion.objects.get(id=opinion_id_to_update)
-                    op.ordering_key = file_order
-                    op.save()
-                except Opinion.DoesNotExist:
-                    # This should not happen, but it is better to be cautious
-                    logger.warning(
-                        f"We can't update opinion, opinion doesn't exist "
-                        f"with id: {opinion_id_to_update}"
-                    )
-                    update_failed = True
-                    break
-
-        if update_failed:
-            # There was an error updating an opinion, rollback all changes for
-            # cluster's opinions
-            logger.warning(
-                f"There was an error updating the order of opinions of the "
-                f"cluster id: {cluster_id}"
-            )
-            transaction.set_rollback(True)
-        else:
-            logger.info(
-                f"The order of opinions was updated, cluster id: {cluster_id}"
-            )
-
-
-def sort_columbia_opinions(start_id: int, end_id: int, xml_dir: str) -> None:
-    """Update opinion ordering for columbia clusters
-
-    :param start_id: skip any id lower than this value
-    :param end_id: skip any id greater than this value
-    :param xml_dir: absolute path to the directory with columbia xml files
-    :return: None
-    """
-
-    # Get all columbia cluster ids with more than one opinion
-    clusters = (
+    harvard_clusters = (
         OpinionCluster.objects.annotate(opinions_count=Count("sub_opinions"))
-        .filter(opinions_count__gt=1, source__in=VALID_COLUMBIA_SOURCES)
+        .filter(base_filter)
         .order_by("id")
-        .values_list("id", flat=True)
     )
+    if limit:
+        harvard_clusters = harvard_clusters[:limit]
 
-    if start_id:
-        clusters = filter(lambda x: x >= start_id, clusters)
-
-    if end_id:
-        clusters = filter(lambda x: x <= end_id, clusters)
-
-    for cluster_id in clusters:
-        logger.info(f"Processing cluster id: {cluster_id}")
-
-        try:
-            xml_path, cl_cleaned_opinions = get_opinions_cleaned_content(
-                cluster_id
-            )
-        except EmptyOpinionException:
-            logger.warning(
-                f"At least one of the opinions from cluster id: {cluster_id} is empty."
+    for cluster in harvard_clusters:
+        logger.info(f"Processing cluster id: {cluster}")
+        sub_opinions = cluster.sub_opinions.exclude(
+            type=Opinion.COMBINED,
+        ).order_by("id")
+        if not sub_opinions:
+            logger.info(
+                f"No sub_opinions left to order for cluster id: {cluster}"
             )
             continue
+        for opinion_order, cluster_op in enumerate(sub_opinions, start=1):
+            cluster_op.ordering_key = opinion_order
+            cluster_op.save()
+        logger.info(msg=f"Opinions reordered for cluster id: {cluster.id}")
 
-        extracted_columbia_opinions = None
-        if xml_path:
-            fixed_xml_filepath = os.path.join(xml_dir, fix_filepath(xml_path))
-
-            if not os.path.exists(fixed_xml_filepath):
-                logger.warning(
-                    f"Xml file not found in {fixed_xml_filepath}, cluster id: {cluster_id}"
-                )
-                continue
-
-            try:
-                extracted_columbia_opinions = get_opinions_columbia_file(
-                    fixed_xml_filepath
-                )
-            except UnicodeDecodeError:
-                logger.warning(
-                    f"Cannot decode file: {fixed_xml_filepath}, cluster id: {cluster_id}"
-                )
-                continue
 
-        if cl_cleaned_opinions and extracted_columbia_opinions:
-            columbia_opinions_content = [
-                op.get("opinion")
-                for op in extracted_columbia_opinions
-                if op.get("opinion")
-            ]
-            cl_opinions_content = [
-                op.get("opinion")
-                for op in cl_cleaned_opinions
-                if op.get("opinion")
-            ]
+class Command(VerboseCommand):
+    help = "Add ordering Key for sub opinions"
 
-            if len(columbia_opinions_content) != len(cl_opinions_content):
-                logger.warning(
-                    f"The number of opinions in cl and the number of opinions in the xml is different, cluster id: {cluster_id}"
-                )
-                continue
+    def __init__(self, *args, **kwargs):
+        super(Command, self).__init__(*args, **kwargs)
 
-            # Try to match content between cl and xml
-            matches = match_opinion_lists(
-                columbia_opinions_content,
-                cl_opinions_content,
+    def valid_actions(self, s):
+        if s.lower() not in self.VALID_ACTIONS:
+            raise argparse.ArgumentTypeError(
+                "Unable to parse action. Valid actions are: %s"
+                % (", ".join(self.VALID_ACTIONS.keys()))
             )
 
-            if matches:
-                if len(matches.values()) != len(set(matches.values())):
-                    # We don't have a unique match for each opinion, they were
-                    # probably combined incorrectly
-                    logger.info(
-                        f"We can't infer opinions order for cluster id: {cluster_id}"
-                    )
-                    # Go to next cluster id
-                    continue
-
-                if len(cl_cleaned_opinions) > len(set(matches.values())):
-                    # We have more opinions than matches
-                    logger.info(
-                        f"We couldn't match all cl opinions to the file's "
-                        f"content, cluster id: {cluster_id}"
-                    )
-                    # Go to next cluster id
-                    continue
-
-                # All opinions matched, update all opinions order
-                update_opinions(
-                    cluster_id,
-                    cl_cleaned_opinions,
-                    extracted_columbia_opinions,
-                    matches,
-                )
-
-
-class Command(BaseCommand):
-    help = "Fill order field in Opinion objects"
-
-    def __init__(self, *args, **kwargs):
-        super(Command, self).__init__(*args, **kwargs)
+        return self.VALID_ACTIONS[s]
 
     def add_arguments(self, parser):
         parser.add_argument(
-            "--process-harvard",
-            action="store_true",
-            help="Fix harvard opinions order",
-        )
-        parser.add_argument(
-            "--process-columbia",
-            action="store_true",
-            help="Fix columbia opinions order",
-        )
-        parser.add_argument(
-            "--xml-dir",
-            default="/opt/courtlistener/_columbia",
+            "--skip-until",
+            help="Specific cluster id to skip until",
+            type=int,
             required=False,
-            help="The absolute path to the directory with columbia xml files",
         )
         parser.add_argument(
-            "--start-id",
+            "--limit",
             type=int,
-            default=0,
-            help="Start id for a range of clusters (inclusive)",
+            help="Number of files to sort",
+            required=False,
         )
+
         parser.add_argument(
-            "--end-id",
-            type=int,
-            default=0,
-            help="End id for a range of clusters (inclusive)",
+            "--action",
+            type=self.valid_actions,
+            required=True,
+            help="The action you wish to take. Valid choices are: %s"
+            % (", ".join(self.VALID_ACTIONS.keys())),
         )
 
-    def validate_args(self, opts):
-        """Validate arguments passed to the command
-
-        :param opts: dictionary with arguments from the command
-        :return: true if validations are satisfied else false
-        """
-        if opts["end_id"] > opts["start_id"]:
-            logger.error("end-id should be greater or equal than start-id")
-            return False
-
-        if not opts["process_harvard"] and not opts["process_columbia"]:
-            logger.error(
-                "One option required: process-harvard or process-columbia"
-            )
-            return False
-
-        if opts["process_harvard"] and opts["process_columbia"]:
-            logger.error(
-                "You can only select one option process-harvard or process-columbia"
-            )
-            return False
-        return True
-
     def handle(self, *args, **options):
+        super().handle(*args, **options)
+        options["action"](options)
 
-        if not self.validate_args(options):
-            return
-
-        if options["process_harvard"]:
-            sort_harvard_opinions(options["start_id"], options["end_id"])
-
-        if options["process_columbia"]:
-            sort_columbia_opinions(
-                options["start_id"], options["end_id"], options["xml_dir"]
-            )
+    VALID_ACTIONS = {"sort-harvard": sort_harvard_opinions}
diff --git a/cl/search/tests/tests.py b/cl/search/tests/tests.py
index ff7ed177b7..b8f85f719d 100644
--- a/cl/search/tests/tests.py
+++ b/cl/search/tests/tests.py
@@ -321,19 +321,17 @@ def test_opinions_order(self) -> None:
         # Create three opinions
         op_1 = OpinionFactory(
             cluster=cluster,
-            type="Concurrence Opinion",
+            type=Opinion.LEAD,
             ordering_key=1,
         )
-
         op_2 = OpinionFactory(
             cluster=cluster,
-            type="Dissent",
+            type=Opinion.CONCURRENCE,
             ordering_key=2,
         )
-
         op_3 = OpinionFactory(
             cluster=cluster,
-            type="Lead Opinion",
+            type=Opinion.DISSENT,
             ordering_key=3,
         )
 
@@ -343,24 +341,38 @@ def test_opinions_order(self) -> None:
         self.assertEqual(op_2.ordering_key, 2)
         self.assertEqual(op_3.ordering_key, 3)
 
+        # Can we swap orders?
+        op_1.ordering_key = None
+        op_1.save()
+
+        op_2.ordering_key = 1
+        op_2.save()
+
+        op_1.ordering_key = 2
+        op_1.save()
+
         # Can we update an opinion using an existing position?
         with transaction.atomic():
             with self.assertRaises(IntegrityError):
                 op_3.ordering_key = 2
                 op_3.save()
 
-        # Can we create an opinion using an existing position?
+        # Validate unique cluster/order
         with transaction.atomic():
             with self.assertRaises(IntegrityError):
-                op_4 = OpinionFactory(
-                    cluster=cluster, type="Lead Opinion", ordering_key=1
+                op = OpinionFactory(
+                    cluster=cluster,
+                    type=Opinion.ADDENDUM,
                 )
+                op.ordering_key = 3
+                op.save()
 
-        # Can we use negative positions?
-        op_4 = OpinionFactory(
-            cluster=cluster, type="Lead Opinion", ordering_key=-1
-        )
-        self.assertEqual(op_4.ordering_key, -1)
+        # Can we use avoid negative positions?
+        with transaction.atomic():
+            with self.assertRaises(ValidationError):
+                op = OpinionFactory(cluster=cluster, type=Opinion.LEAD)
+                op.ordering_key = -1
+                op.save()
 
         # Can we order the opinions from a cluster using the field?
         qs = (
@@ -368,7 +380,7 @@ def test_opinions_order(self) -> None:
             .order_by("ordering_key")
             .values_list("ordering_key", flat=True)
         )
-        self.assertEqual(list(qs), [-1, 1, 2, 3])
+        self.assertEqual(list(qs), [1, 2, 3, None])
 
         # Order default value is null
         op_5 = OpinionFactory(cluster=cluster, type="Lead Opinion")

From aae5840b7e43344fa1d6ce4f357f619805b537be Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Fri, 2 Aug 2024 15:45:59 -0400
Subject: [PATCH 44/50] tests(fixtures): Set fixture values to null

None of the fixtures are ordered

Also i removed the 550 - not sure where it is used or why
it exists but i dont see a reason so i removed it.
---
 cl/search/fixtures/functest_opinions.json     |  8 +-
 cl/search/fixtures/opinions-issue-412.json    |  4 +-
 cl/search/fixtures/opinions-issue-550.json    | 96 -------------------
 .../fixtures/test_objects_query_counts.json   | 12 +--
 cl/search/fixtures/test_objects_search.json   | 12 +--
 .../fixtures/api_scotus_map_data.json         |  4 +-
 .../fixtures/scotus_map_data.json             | 34 +++----
 7 files changed, 37 insertions(+), 133 deletions(-)
 delete mode 100644 cl/search/fixtures/opinions-issue-550.json

diff --git a/cl/search/fixtures/functest_opinions.json b/cl/search/fixtures/functest_opinions.json
index 2cc992a633..f1e6f2da44 100644
--- a/cl/search/fixtures/functest_opinions.json
+++ b/cl/search/fixtures/functest_opinions.json
@@ -65,7 +65,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "ordering_key": 1
+      "ordering_key": null
     },
     "model": "search.opinion",
     "pk": 10
@@ -136,7 +136,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "ordering_key": 1
+      "ordering_key": null
     },
     "model": "search.opinion",
     "pk": 11
@@ -187,7 +187,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "ordering_key": 2
+      "ordering_key": null
     },
     "model": "search.opinion",
     "pk": 12
@@ -258,7 +258,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "ordering_key": 1
+      "ordering_key": null
     },
     "model": "search.opinion",
     "pk": 12
diff --git a/cl/search/fixtures/opinions-issue-412.json b/cl/search/fixtures/opinions-issue-412.json
index 0e7fbdc7e6..fa7d716ccb 100644
--- a/cl/search/fixtures/opinions-issue-412.json
+++ b/cl/search/fixtures/opinions-issue-412.json
@@ -65,7 +65,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "ordering_key": 1
+      "ordering_key": null
     },
     "model": "search.opinion",
     "pk": 10
@@ -136,7 +136,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "ordering_key": 1
+      "ordering_key": null
     },
     "model": "search.opinion",
     "pk": 11
diff --git a/cl/search/fixtures/opinions-issue-550.json b/cl/search/fixtures/opinions-issue-550.json
deleted file mode 100644
index 3e359b044d..0000000000
--- a/cl/search/fixtures/opinions-issue-550.json
+++ /dev/null
@@ -1,96 +0,0 @@
-[
-  {
-    "fields": {
-      "date_blocked": null,
-      "date_reargument_denied": "2015-08-15",
-      "court": "ca1",
-      "date_reargued": "2015-08-15",
-      "case_name_full": "Voutila v. Bonvini",
-      "date_argued": "2015-08-15",
-      "date_modified": "2015-08-15T13:55:03.669Z",
-      "case_name": "case name docket 10",
-      "date_created": "2015-08-15T13:55:03.669Z",
-      "case_name_short": "short name for Voutila v. Bonvini",
-      "docket_number": "1337-np",
-      "slug": "case-name",
-      "source": 0,
-      "blocked": false
-    },
-    "model": "search.docket",
-    "pk": 10
-  },
-  {
-    "fields": {
-      "date_blocked": null,
-      "case_name_full": "Reference to Voutila v. Bonvini",
-      "case_name_short": "Case name in short for Voutila v. Bonvini",
-      "blocked": false,
-      "syllabus": "some rando syllabus",
-      "date_filed": "2015-12-20",
-      "procedural_history": "some rando history",
-      "source": "C",
-      "panel": [],
-      "judges": "",
-      "case_name": "Voutila v. Bonvini",
-      "attorneys": "a bunch of crooks!",
-      "slug": "case-name-cluster",
-      "posture": "",
-      "date_modified": "2015-08-15T14:10:56.801Z",
-      "precedential_status": "Published",
-      "citation_count": 1,
-      "scdb_id": "",
-      "nature_of_suit": "",
-      "non_participating_judges": [],
-      "date_created": "2015-08-15T14:10:56.801Z",
-      "docket": 10
-    },
-    "model": "search.opinioncluster",
-    "pk": 10
-  },
-  {
-    "fields": {
-      "sha1": "asdfasdfasdfasdfasdfasddf",
-      "date_modified": "2015-12-20T14:20:00.801Z",
-      "extracted_by_ocr": false,
-      "author": null,
-      "plain_text": "This is a combined opinion.",
-      "html": "",
-      "download_url": null,
-      "cluster": 10,
-      "html_with_citations": "",
-      "local_path": "doc/2005/05/04/state_of_indiana_v._charles_barker.doc",
-      "html_columbia": "",
-      "joined_by": [],
-      "date_created": "2015-08-15T14:10:56.801Z",
-      "html_lawbox": "",
-      "per_curiam": false,
-      "type": "010combined",
-      "ordering_key": 1
-    },
-    "model": "search.opinion",
-    "pk": 10
-  },
-  {
-    "fields": {
-      "sha1": "asdfasdfasdfasdfasdfasddf",
-      "date_modified": "2015-12-20T14:20:00.801Z",
-      "extracted_by_ocr": false,
-      "author": null,
-      "plain_text": "This is a lead opinion too.",
-      "html": "",
-      "download_url": null,
-      "cluster": 10,
-      "html_with_citations": "",
-      "local_path": "txt/2015/12/28/opinion_text.txt",
-      "html_columbia": "",
-      "joined_by": [],
-      "date_created": "2015-08-15T14:10:56.801Z",
-      "html_lawbox": "",
-      "per_curiam": false,
-      "type": "020lead",
-      "ordering_key": 2
-    },
-    "model": "search.opinion",
-    "pk": 11
-  }
-]
diff --git a/cl/search/fixtures/test_objects_query_counts.json b/cl/search/fixtures/test_objects_query_counts.json
index 6a3f97da23..ca69a08ccc 100644
--- a/cl/search/fixtures/test_objects_query_counts.json
+++ b/cl/search/fixtures/test_objects_query_counts.json
@@ -301,7 +301,7 @@
          "html_lawbox":"",
          "per_curiam":false,
          "type":"020lead",
-         "ordering_key": 1
+         "ordering_key": null
       },
       "model":"search.opinion",
       "pk":1
@@ -326,7 +326,7 @@
          "html_lawbox":"",
          "per_curiam":false,
          "type":"010combined",
-         "ordering_key": 1
+         "ordering_key": null
       },
       "model":"search.opinion",
       "pk":2
@@ -351,7 +351,7 @@
          "html_lawbox":"",
          "per_curiam":false,
          "type":"010combined",
-         "ordering_key": 1
+         "ordering_key": null
       },
       "model":"search.opinion",
       "pk":3
@@ -375,7 +375,7 @@
          "html_lawbox":"",
          "per_curiam":false,
          "type":"010combined",
-         "ordering_key": 2
+         "ordering_key": null
       },
       "model":"search.opinion",
       "pk":4
@@ -400,7 +400,7 @@
          "html_lawbox":"",
          "per_curiam":false,
          "type":"010combined",
-         "ordering_key": 3
+         "ordering_key": null
       },
       "model":"search.opinion",
       "pk":5
@@ -424,7 +424,7 @@
          "html_lawbox":"",
          "per_curiam":false,
          "type":"010combined",
-         "ordering_key": 4
+         "ordering_key": null
       },
       "model":"search.opinion",
       "pk":6
diff --git a/cl/search/fixtures/test_objects_search.json b/cl/search/fixtures/test_objects_search.json
index 542d297d54..66c9915581 100644
--- a/cl/search/fixtures/test_objects_search.json
+++ b/cl/search/fixtures/test_objects_search.json
@@ -240,7 +240,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "020lead",
-      "ordering_key": 1
+      "ordering_key": null
     },
     "model": "search.opinion",
     "pk": 1
@@ -263,7 +263,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "ordering_key": 1
+      "ordering_key": null
     },
     "model": "search.opinion",
     "pk": 2
@@ -286,7 +286,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "ordering_key": 1
+      "ordering_key": null
     },
     "model": "search.opinion",
     "pk": 3
@@ -309,7 +309,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "ordering_key": 2
+      "ordering_key": null
     },
     "model": "search.opinion",
     "pk": 4
@@ -332,7 +332,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "ordering_key": 3
+      "ordering_key": null
     },
     "model": "search.opinion",
     "pk": 5
@@ -355,7 +355,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "ordering_key": 4
+      "ordering_key": null
     },
     "model": "search.opinion",
     "pk": 6
diff --git a/cl/visualizations/fixtures/api_scotus_map_data.json b/cl/visualizations/fixtures/api_scotus_map_data.json
index 3a13c3e4e7..3bce46e664 100644
--- a/cl/visualizations/fixtures/api_scotus_map_data.json
+++ b/cl/visualizations/fixtures/api_scotus_map_data.json
@@ -122,7 +122,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "020lead",
-      "ordering_key": 1
+      "ordering_key": null
     },
     "model": "search.opinion",
     "pk": 1
@@ -145,7 +145,7 @@
       "html_lawbox": "",
       "per_curiam": false,
       "type": "010combined",
-      "ordering_key": 1
+      "ordering_key": null
     },
     "model": "search.opinion",
     "pk": 2
diff --git a/cl/visualizations/fixtures/scotus_map_data.json b/cl/visualizations/fixtures/scotus_map_data.json
index e0760f42bf..bf97605525 100644
--- a/cl/visualizations/fixtures/scotus_map_data.json
+++ b/cl/visualizations/fixtures/scotus_map_data.json
@@ -903,7 +903,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "ordering_key": 1
+    "ordering_key": null
   },
   "model": "search.opinion",
   "pk": 111014
@@ -926,7 +926,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "ordering_key": 1
+    "ordering_key": null
   },
   "model": "search.opinion",
   "pk": 111113
@@ -949,7 +949,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "ordering_key": 1
+    "ordering_key": null
   },
   "model": "search.opinion",
   "pk": 111464
@@ -972,7 +972,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "ordering_key": 1
+    "ordering_key": null
   },
   "model": "search.opinion",
   "pk": 111505
@@ -995,7 +995,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "ordering_key": 1
+    "ordering_key": null
   },
   "model": "search.opinion",
   "pk": 111924
@@ -1018,7 +1018,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "ordering_key": 1
+    "ordering_key": null
   },
   "model": "search.opinion",
   "pk": 112331
@@ -1041,7 +1041,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "ordering_key": 1
+    "ordering_key": null
   },
   "model": "search.opinion",
   "pk": 112646
@@ -1064,7 +1064,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "ordering_key": 1
+    "ordering_key": null
   },
   "model": "search.opinion",
   "pk": 112779
@@ -1087,7 +1087,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "ordering_key": 1
+    "ordering_key": null
   },
   "model": "search.opinion",
   "pk": 112874
@@ -1110,7 +1110,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "ordering_key": 1
+    "ordering_key": null
   },
   "model": "search.opinion",
   "pk": 117967
@@ -1133,7 +1133,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "ordering_key": 1
+    "ordering_key": null
   },
   "model": "search.opinion",
   "pk": 118377
@@ -1156,7 +1156,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "ordering_key": 1
+    "ordering_key": null
   },
   "model": "search.opinion",
   "pk": 121168
@@ -1179,7 +1179,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "ordering_key": 1
+    "ordering_key": null
   },
   "model": "search.opinion",
   "pk": 136984
@@ -1202,7 +1202,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "ordering_key": 1
+    "ordering_key": null
   },
   "model": "search.opinion",
   "pk": 142900
@@ -1225,7 +1225,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "ordering_key": 1
+    "ordering_key": null
   },
   "model": "search.opinion",
   "pk": 799990
@@ -1248,7 +1248,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "ordering_key": 1
+    "ordering_key": null
   },
   "model": "search.opinion",
   "pk": 799993
@@ -1271,7 +1271,7 @@
     "html_lawbox": "",
     "per_curiam": false,
     "type": "010combined",
-    "ordering_key": 1
+    "ordering_key": null
   },
   "model": "search.opinion",
   "pk": 2674862

From 611a174c61f741028efd511cbdb0f29c9c24d035 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Fri, 2 Aug 2024 15:49:06 -0400
Subject: [PATCH 45/50] refactor(columbia_utils): remove ordering from utils
 columbia

Unwind the rest of the columbia order
---
 cl/corpus_importer/import_columbia/columbia_utils.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/cl/corpus_importer/import_columbia/columbia_utils.py b/cl/corpus_importer/import_columbia/columbia_utils.py
index dec91fc1da..57bac9a66d 100644
--- a/cl/corpus_importer/import_columbia/columbia_utils.py
+++ b/cl/corpus_importer/import_columbia/columbia_utils.py
@@ -224,7 +224,6 @@ def extract_columbia_opinions(
     """
     opinions: list = []
     floating_content = []
-    order = 1  # The opinion count starts from 1
 
     # We iterate all content to look for all possible opinions
     for i, content in enumerate(outer_opinion):  # type: int, Tag
@@ -363,7 +362,6 @@ def process_extracted_opinions(extracted_opinions: list) -> list:
 
     opinions: list = []
     authorless_content = []
-    order = 1  # The opinion count starts from 1
 
     for i, found_content in enumerate(extracted_opinions, start=1):
         byline = found_content.get("byline")

From 7d86408acd6338485fe2bec16ec5200155842bd1 Mon Sep 17 00:00:00 2001
From: William Palin <bill@free.law>
Date: Fri, 2 Aug 2024 15:50:47 -0400
Subject: [PATCH 46/50] refactor(columbia_utils): Reset order utils - line

---
 cl/corpus_importer/import_columbia/columbia_utils.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/cl/corpus_importer/import_columbia/columbia_utils.py b/cl/corpus_importer/import_columbia/columbia_utils.py
index 57bac9a66d..b1a62cfd6c 100644
--- a/cl/corpus_importer/import_columbia/columbia_utils.py
+++ b/cl/corpus_importer/import_columbia/columbia_utils.py
@@ -224,6 +224,7 @@ def extract_columbia_opinions(
     """
     opinions: list = []
     floating_content = []
+    order = 0
 
     # We iterate all content to look for all possible opinions
     for i, content in enumerate(outer_opinion):  # type: int, Tag
@@ -362,6 +363,7 @@ def process_extracted_opinions(extracted_opinions: list) -> list:
 
     opinions: list = []
     authorless_content = []
+    order = 0
 
     for i, found_content in enumerate(extracted_opinions, start=1):
         byline = found_content.get("byline")

From 57182e80f4feb83be70cbdb5bdd90c1968634128 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Mon, 5 Aug 2024 14:07:40 -0600
Subject: [PATCH 47/50] feat(opinion_order): update clean method in Opinion
 model update command to order harvard opinions

---
 .../commands/update_opinions_order.py         | 65 ++++++++++++-------
 cl/search/models.py                           |  9 ++-
 2 files changed, 45 insertions(+), 29 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index ab445a1491..592848c3e4 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -1,7 +1,11 @@
-from django.db.models import Count, Q
+import argparse
+import time
+
+from django.db import transaction
+from django.db.models import Count
 
 from cl.lib.command_utils import VerboseCommand, logger
-from cl.search.models import SOURCES, Opinion, OpinionCluster
+from cl.search.models import Opinion, OpinionCluster
 
 
 def sort_harvard_opinions(options) -> None:
@@ -12,44 +16,51 @@ def sort_harvard_opinions(options) -> None:
 
     The harvard importer created the opinions in order of appearance in the file
 
-    :param options: dict of arguments skip until and limit if given
+    :param options: dict of arguments passed to the command
     :return: None
     """
 
     skip_until = options.get("skip_until", None)
     limit = options.get("limit", None)
 
-    base_filter = (
+    # The filepath_json_harvard field can only be filled by the harvard importer,
+    # this helps us confirm that it was imported from a Harvard json
+    harvard_clusters = (
         OpinionCluster.objects.exclude(filepath_json_harvard="")
+        .prefetch_related("sub_opinions")
         .annotate(opinions_count=Count("sub_opinions"))
         .filter(opinions_count__gt=1)
+        .order_by("id")
     )
-
     if skip_until:
-        base_filter &= Q(pk__gte=skip_until)
+        harvard_clusters = harvard_clusters.filter(pk__gte=skip_until)
 
-    harvard_clusters = (
-        OpinionCluster.objects.annotate(opinions_count=Count("sub_opinions"))
-        .filter(base_filter)
-        .order_by("id")
-    )
     if limit:
         harvard_clusters = harvard_clusters[:limit]
 
     for cluster in harvard_clusters:
         logger.info(f"Processing cluster id: {cluster}")
-        sub_opinions = cluster.sub_opinions.exclude(
-            type=Opinion.COMBINED,
-        ).order_by("id")
-        if not sub_opinions:
-            logger.info(
-                f"No sub_opinions left to order for cluster id: {cluster}"
-            )
-            continue
-        for opinion_order, cluster_op in enumerate(sub_opinions, start=1):
-            cluster_op.ordering_key = opinion_order
-            cluster_op.save()
-        logger.info(msg=f"Opinions reordered for cluster id: {cluster.id}")
+        opinion_order = 1
+        any_update = False
+        with transaction.atomic():
+            # We need to make sure they are ordered by id
+            for cluster_op in cluster.sub_opinions.all().order_by("id"):
+                if cluster_op.type == Opinion.COMBINED:
+                    continue
+                cluster_op.ordering_key = opinion_order
+                cluster_op.save()
+                opinion_order = opinion_order + 1
+                any_update = True
+            if not any_update:
+                # We want to know if you found anything unexpected, like for example
+                # only having combined opinions
+                logger.info(
+                    f"No sub_opinions updated for cluster id: {cluster}"
+                )
+                continue
+            logger.info(msg=f"Opinions reordered for cluster id: {cluster.id}")
+            # Wait between each processed cluster to avoid issues with elastic
+            time.sleep(options["delay"])
 
 
 class Command(VerboseCommand):
@@ -80,7 +91,6 @@ def add_arguments(self, parser):
             help="Number of files to sort",
             required=False,
         )
-
         parser.add_argument(
             "--action",
             type=self.valid_actions,
@@ -88,6 +98,13 @@ def add_arguments(self, parser):
             help="The action you wish to take. Valid choices are: %s"
             % (", ".join(self.VALID_ACTIONS.keys())),
         )
+        parser.add_argument(
+            "--delay",
+            type=float,
+            default=0.2,
+            help="How long to wait to update each opinion (in seconds, allows "
+            "floating numbers).",
+        )
 
     def handle(self, *args, **options):
         super().handle(*args, **options)
diff --git a/cl/search/models.py b/cl/search/models.py
index 1bde2ebad0..e2cdedc905 100644
--- a/cl/search/models.py
+++ b/cl/search/models.py
@@ -3347,11 +3347,10 @@ def get_absolute_url(self) -> str:
     def clean(self) -> None:
         if self.type == "":
             raise ValidationError("'type' is a required field.")
-        if self.ordering_key is not None and self.ordering_key != "":
-            if self.ordering_key < 1:
-                raise ValidationError(
-                    {"ordering_key": "Ordering key cannot be zero or negative"}
-                )
+        if isinstance(self.ordering_key, int) and self.ordering_key < 1:
+            raise ValidationError(
+                {"ordering_key": "Ordering key cannot be zero or negative"}
+            )
 
     def save(
         self,

From d1e2e004f24f326d96b0d0754965de72b28b8590 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Mon, 5 Aug 2024 14:20:41 -0600
Subject: [PATCH 48/50] feat(opinion_order): rename migrations

---
 .../{0032_order_opinions.py => 0033_order_opinions.py}        | 4 ++--
 .../{0032_order_opinions.sql => 0033_order_opinions.sql}      | 0
 2 files changed, 2 insertions(+), 2 deletions(-)
 rename cl/search/migrations/{0032_order_opinions.py => 0033_order_opinions.py} (97%)
 rename cl/search/migrations/{0032_order_opinions.sql => 0033_order_opinions.sql} (100%)

diff --git a/cl/search/migrations/0032_order_opinions.py b/cl/search/migrations/0033_order_opinions.py
similarity index 97%
rename from cl/search/migrations/0032_order_opinions.py
rename to cl/search/migrations/0033_order_opinions.py
index 9b4db9fbe7..ce5ea91c13 100644
--- a/cl/search/migrations/0032_order_opinions.py
+++ b/cl/search/migrations/0033_order_opinions.py
@@ -1,4 +1,4 @@
-# Generated by Django 5.0.7 on 2024-07-30 18:59
+# Generated by Django 5.0.7 on 2024-08-05 20:19
 
 import pgtrigger.compiler
 import pgtrigger.migrations
@@ -11,7 +11,7 @@ class Migration(migrations.Migration):
             "people_db",
             "0016_remove_abarating_update_or_delete_snapshot_update_and_more",
         ),
-        ("search", "0031_alter_opinion_type_alter_opinioncluster_source_noop"),
+        ("search", "0032_update_docket_numbering_fields"),
     ]
 
     operations = [
diff --git a/cl/search/migrations/0032_order_opinions.sql b/cl/search/migrations/0033_order_opinions.sql
similarity index 100%
rename from cl/search/migrations/0032_order_opinions.sql
rename to cl/search/migrations/0033_order_opinions.sql

From 5958f54bd5a23b21a5bbd28682e50b98984492ad Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Mon, 5 Aug 2024 15:03:17 -0600
Subject: [PATCH 49/50] feat(opinion_order): add customers sql update replica
 sql

---
 cl/search/migrations/0033_order_opinions.sql  | 112 ------------------
 .../0033_order_opinions_customers.sql         |  10 ++
 2 files changed, 10 insertions(+), 112 deletions(-)
 create mode 100644 cl/search/migrations/0033_order_opinions_customers.sql

diff --git a/cl/search/migrations/0033_order_opinions.sql b/cl/search/migrations/0033_order_opinions.sql
index e02c150f4d..e2e07aee39 100644
--- a/cl/search/migrations/0033_order_opinions.sql
+++ b/cl/search/migrations/0033_order_opinions.sql
@@ -1,13 +1,5 @@
 BEGIN;
 --
--- Remove trigger update_or_delete_snapshot_delete from model opinion
---
-DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_delete_1f4fd ON "search_opinion";
---
--- Remove trigger update_or_delete_snapshot_update from model opinion
---
-DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_update_67ecd ON "search_opinion";
---
 -- Add field ordering_key to opinion
 --
 ALTER TABLE "search_opinion" ADD COLUMN "ordering_key" integer NULL;
@@ -15,110 +7,6 @@ ALTER TABLE "search_opinion" ADD COLUMN "ordering_key" integer NULL;
 -- Add field ordering_key to opinionevent
 --
 ALTER TABLE "search_opinionevent" ADD COLUMN "ordering_key" integer NULL;
---
--- Create trigger update_or_delete_snapshot_update on model opinion
---
-
-            CREATE OR REPLACE FUNCTION "public"._pgtrigger_should_ignore(
-                trigger_name NAME
-            )
-            RETURNS BOOLEAN AS $$
-                DECLARE
-                    _pgtrigger_ignore TEXT[];
-                    _result BOOLEAN;
-                BEGIN
-                    BEGIN
-                        SELECT INTO _pgtrigger_ignore
-                            CURRENT_SETTING('pgtrigger.ignore');
-                        EXCEPTION WHEN OTHERS THEN
-                    END;
-                    IF _pgtrigger_ignore IS NOT NULL THEN
-                        SELECT trigger_name = ANY(_pgtrigger_ignore)
-                        INTO _result;
-                        RETURN _result;
-                    ELSE
-                        RETURN FALSE;
-                    END IF;
-                END;
-            $$ LANGUAGE plpgsql;
-
-            CREATE OR REPLACE FUNCTION pgtrigger_update_or_delete_snapshot_update_67ecd()
-            RETURNS TRIGGER AS $$
-                
-                BEGIN
-                    IF ("public"._pgtrigger_should_ignore(TG_NAME) IS TRUE) THEN
-                        IF (TG_OP = 'DELETE') THEN
-                            RETURN OLD;
-                        ELSE
-                            RETURN NEW;
-                        END IF;
-                    END IF;
-                    INSERT INTO "search_opinionevent" ("author_id", "author_str", "cluster_id", "date_created", "date_modified", "download_url", "extracted_by_ocr", "html", "html_anon_2020", "html_columbia", "html_lawbox", "html_with_citations", "id", "joined_by_str", "local_path", "ordering_key", "page_count", "per_curiam", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "type", "xml_harvard") VALUES (OLD."author_id", OLD."author_str", OLD."cluster_id", OLD."date_created", OLD."date_modified", OLD."download_url", OLD."extracted_by_ocr", OLD."html", OLD."html_anon_2020", OLD."html_columbia", OLD."html_lawbox", OLD."html_with_citations", OLD."id", OLD."joined_by_str", OLD."local_path", OLD."ordering_key", OLD."page_count", OLD."per_curiam", _pgh_attach_context(), NOW(), 'update_or_delete_snapshot', OLD."id", OLD."plain_text", OLD."sha1", OLD."type", OLD."xml_harvard"); RETURN NULL;
-                END;
-            $$ LANGUAGE plpgsql;
-
-            DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_update_67ecd ON "search_opinion";
-            CREATE  TRIGGER pgtrigger_update_or_delete_snapshot_update_67ecd
-                AFTER UPDATE ON "search_opinion"
-                
-                
-                FOR EACH ROW WHEN (OLD."id" IS DISTINCT FROM (NEW."id") OR OLD."date_created" IS DISTINCT FROM (NEW."date_created") OR OLD."cluster_id" IS DISTINCT FROM (NEW."cluster_id") OR OLD."author_id" IS DISTINCT FROM (NEW."author_id") OR OLD."author_str" IS DISTINCT FROM (NEW."author_str") OR OLD."per_curiam" IS DISTINCT FROM (NEW."per_curiam") OR OLD."joined_by_str" IS DISTINCT FROM (NEW."joined_by_str") OR OLD."type" IS DISTINCT FROM (NEW."type") OR OLD."sha1" IS DISTINCT FROM (NEW."sha1") OR OLD."page_count" IS DISTINCT FROM (NEW."page_count") OR OLD."download_url" IS DISTINCT FROM (NEW."download_url") OR OLD."local_path" IS DISTINCT FROM (NEW."local_path") OR OLD."plain_text" IS DISTINCT FROM (NEW."plain_text") OR OLD."html" IS DISTINCT FROM (NEW."html") OR OLD."html_lawbox" IS DISTINCT FROM (NEW."html_lawbox") OR OLD."html_columbia" IS DISTINCT FROM (NEW."html_columbia") OR OLD."html_anon_2020" IS DISTINCT FROM (NEW."html_anon_2020") OR OLD."xml_harvard" IS DISTINCT FROM (NEW."xml_harvard") OR OLD."html_with_citations" IS DISTINCT FROM (NEW."html_with_citations") OR OLD."extracted_by_ocr" IS DISTINCT FROM (NEW."extracted_by_ocr") OR OLD."ordering_key" IS DISTINCT FROM (NEW."ordering_key"))
-                EXECUTE PROCEDURE pgtrigger_update_or_delete_snapshot_update_67ecd();
-
-            COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_update_67ecd ON "search_opinion" IS '7137855274503cc2c50a17729f82e150d2b7d872';
-        
---
--- Create trigger update_or_delete_snapshot_delete on model opinion
---
-
-            CREATE OR REPLACE FUNCTION "public"._pgtrigger_should_ignore(
-                trigger_name NAME
-            )
-            RETURNS BOOLEAN AS $$
-                DECLARE
-                    _pgtrigger_ignore TEXT[];
-                    _result BOOLEAN;
-                BEGIN
-                    BEGIN
-                        SELECT INTO _pgtrigger_ignore
-                            CURRENT_SETTING('pgtrigger.ignore');
-                        EXCEPTION WHEN OTHERS THEN
-                    END;
-                    IF _pgtrigger_ignore IS NOT NULL THEN
-                        SELECT trigger_name = ANY(_pgtrigger_ignore)
-                        INTO _result;
-                        RETURN _result;
-                    ELSE
-                        RETURN FALSE;
-                    END IF;
-                END;
-            $$ LANGUAGE plpgsql;
-
-            CREATE OR REPLACE FUNCTION pgtrigger_update_or_delete_snapshot_delete_1f4fd()
-            RETURNS TRIGGER AS $$
-                
-                BEGIN
-                    IF ("public"._pgtrigger_should_ignore(TG_NAME) IS TRUE) THEN
-                        IF (TG_OP = 'DELETE') THEN
-                            RETURN OLD;
-                        ELSE
-                            RETURN NEW;
-                        END IF;
-                    END IF;
-                    INSERT INTO "search_opinionevent" ("author_id", "author_str", "cluster_id", "date_created", "date_modified", "download_url", "extracted_by_ocr", "html", "html_anon_2020", "html_columbia", "html_lawbox", "html_with_citations", "id", "joined_by_str", "local_path", "ordering_key", "page_count", "per_curiam", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id", "plain_text", "sha1", "type", "xml_harvard") VALUES (OLD."author_id", OLD."author_str", OLD."cluster_id", OLD."date_created", OLD."date_modified", OLD."download_url", OLD."extracted_by_ocr", OLD."html", OLD."html_anon_2020", OLD."html_columbia", OLD."html_lawbox", OLD."html_with_citations", OLD."id", OLD."joined_by_str", OLD."local_path", OLD."ordering_key", OLD."page_count", OLD."per_curiam", _pgh_attach_context(), NOW(), 'update_or_delete_snapshot', OLD."id", OLD."plain_text", OLD."sha1", OLD."type", OLD."xml_harvard"); RETURN NULL;
-                END;
-            $$ LANGUAGE plpgsql;
-
-            DROP TRIGGER IF EXISTS pgtrigger_update_or_delete_snapshot_delete_1f4fd ON "search_opinion";
-            CREATE  TRIGGER pgtrigger_update_or_delete_snapshot_delete_1f4fd
-                AFTER DELETE ON "search_opinion"
-                
-                
-                FOR EACH ROW 
-                EXECUTE PROCEDURE pgtrigger_update_or_delete_snapshot_delete_1f4fd();
-
-            COMMENT ON TRIGGER pgtrigger_update_or_delete_snapshot_delete_1f4fd ON "search_opinion" IS '98fb52aa60fd8e89a83f8f7ac77ba5892739fb37';
-        
 --
 -- Create constraint unique_opinion_ordering_key on model opinion
 --
diff --git a/cl/search/migrations/0033_order_opinions_customers.sql b/cl/search/migrations/0033_order_opinions_customers.sql
new file mode 100644
index 0000000000..e7158e3002
--- /dev/null
+++ b/cl/search/migrations/0033_order_opinions_customers.sql
@@ -0,0 +1,10 @@
+BEGIN;
+--
+-- Add field ordering_key to opinion
+--
+ALTER TABLE "search_opinion" ADD COLUMN "ordering_key" integer NULL;
+--
+-- Create constraint unique_opinion_ordering_key on model opinion
+--
+ALTER TABLE "search_opinion" ADD CONSTRAINT "unique_opinion_ordering_key" UNIQUE ("cluster_id", "ordering_key");
+COMMIT;

From ed564f932b4a0170f374afccceeab869269e4bc9 Mon Sep 17 00:00:00 2001
From: Kevin Ramirez <kvnzavalza@hotmail.com>
Date: Tue, 6 Aug 2024 12:46:16 -0600
Subject: [PATCH 50/50] feat(opinion_order): exclude columbia from clusters

---
 .../management/commands/update_opinions_order.py            | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/cl/corpus_importer/management/commands/update_opinions_order.py b/cl/corpus_importer/management/commands/update_opinions_order.py
index 592848c3e4..1f1e5308e9 100644
--- a/cl/corpus_importer/management/commands/update_opinions_order.py
+++ b/cl/corpus_importer/management/commands/update_opinions_order.py
@@ -5,7 +5,7 @@
 from django.db.models import Count
 
 from cl.lib.command_utils import VerboseCommand, logger
-from cl.search.models import Opinion, OpinionCluster
+from cl.search.models import SOURCES, Opinion, OpinionCluster
 
 
 def sort_harvard_opinions(options) -> None:
@@ -24,12 +24,14 @@ def sort_harvard_opinions(options) -> None:
     limit = options.get("limit", None)
 
     # The filepath_json_harvard field can only be filled by the harvard importer,
-    # this helps us confirm that it was imported from a Harvard json
+    # this helps us confirm that it was imported from a Harvard json. We exclude
+    # clusters merged with columbia because those may need some extra verification
     harvard_clusters = (
         OpinionCluster.objects.exclude(filepath_json_harvard="")
         .prefetch_related("sub_opinions")
         .annotate(opinions_count=Count("sub_opinions"))
         .filter(opinions_count__gt=1)
+        .exclude(source__contains=SOURCES.COLUMBIA_ARCHIVE)
         .order_by("id")
     )
     if skip_until: