freelawproject · flooie · Feb 1, 2023 · Feb 1, 2023 · Feb 1, 2023 · Feb 1, 2023
diff --git a/juriscraper/opinions/united_states/__init__.py b/juriscraper/opinions/united_states/__init__.py
@@ -1,5 +1,6 @@
 __all__ = [
     "administrative_agency",
+    "attorney_general",
     "federal_appellate",
     "federal_district",
     "federal_bankruptcy",

diff --git a/juriscraper/opinions/united_states/attorney_general/__init__.py b/juriscraper/opinions/united_states/attorney_general/__init__.py
@@ -0,0 +1,47 @@
+__all__ = [
+    "alaag",
+    "alaskaag",
+    "azag",
+    "arkag",
+    "calag",
+    "coloag",
+    "connag",
+    "dcag",
+    "delag",
+    "flaag",
+    "gaag",
+    "hawag",
+    "idahoag",
+    "illinoisag",
+    "indianaag",
+    "ksag",
+    "kyag",
+    "laag",
+    "maineag",
+    "mdag",
+    "michag",
+    "minnag",
+    "moag",
+    "montag",
+    "ncag",
+    "ndag",
+    "nebag",
+    "nevag",
+    "nhag",
+    "njag",
+    "nmiag",
+    "nyag",
+    "ohag",
+    "oklaag",
+    "orag",
+    "paag",
+    "scag",
+    "sdag",
+    "tennag",
+    "texag",
+    "vaag",
+    "vtag",
+    "waag",
+    "wiscag",
+    "wvaag",
+]
diff --git a/juriscraper/opinions/united_states/attorney_general/alaag.py b/juriscraper/opinions/united_states/attorney_general/alaag.py
@@ -0,0 +1,101 @@
+"""
+Scraper for Alabama AG
+CourtID: alaag
+Court Short Name: Ala AG
+Author: William E. Palin
+History:
+ - 2023-01-29: Created.
+"""
+import re
+from datetime import date, timedelta
+
+from lxml.html import fromstring
+
+from juriscraper.lib.html_utils import get_html_parsed_text
+from juriscraper.OpinionSiteLinear import OpinionSiteLinear
+
+
+class Site(OpinionSiteLinear):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.court_id = self.__module__
+        self.status = "Published"
+        self.url = "https://www.alabamaag.gov/opinions"
+        self.td = date.today()
+        today = self.td.strftime("%Y-%m-%d")
+        last_month = (self.td - timedelta(days=61)).strftime("%Y-%m-%d")
+        self.headers = {
+            "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.2 Safari/605.1.15",
+        }
+        self.parameters = {
+            "ctl00$ContentFullBody$ContentBody$ucSearchOpinions1$ScriptManager1": "ctl00$ContentFullBody$ContentBody$ucSearchOpinions1$UpdatePanel1|ctl00$ContentFullBody$ContentBody$ucSearchOpinions1$btnSearch",
+            "ctl00$txtSearch": "",
+            "ctl00$ContentFullBody$ContentBody$ucSearchOpinions1$txtSearchName": "20",
+            "ctl00$ContentFullBody$ContentBody$ucSearchOpinions1$rbdSearchType": "1",
+            "ctl00$ContentFullBody$ContentBody$ucSearchOpinions1$txtSearchAfterDate": last_month,
+            "ctl00$ContentFullBody$ContentBody$ucSearchOpinions1$txtSearchBeforeDate": today,
+            "__LASTFOCUS": "",
+            "__VIEWSTATEENCRYPTED": "",
+            "__ASYNCPOST": "true",
+            "ctl00$ContentFullBody$ContentBody$ucSearchOpinions1$btnSearch": "Search",
+        }
+
+    def _download(self, request_dict={}):
+        """Custom download method
+
+        :param request_dict: Empty dict
+        :return: HTML content
+        """
+        if self.test_mode_enabled():
+            self.html = get_html_parsed_text(open(self.url).read())
+            return self.html
+        if self.html:
+            self._update_query_params()
+            r = self.request["session"].post(
+                self.url, headers=self.headers, data=self.parameters
+            )
+            self.html = fromstring(r.text)
+        else:
+            html = super()._download(request_dict)
+            return html
+
+    def _update_query_params(self):
+        """Update the query parameters for next page
+
+        :return: None
+        """
+        vs_xpath = "//input[@name='__VIEWSTATE']"
+        ev_xpath = "//input[@name='__EVENTVALIDATION']"
+        vsg_xpath = "//input[@name='__VIEWSTATEGENERATOR']"
+
+        self.parameters["__VIEWSTATE"] = self.html.xpath(vs_xpath)[0].attrib[
+            "value"
+        ]
+        self.parameters["__EVENTVALIDATION"] = self.html.xpath(ev_xpath)[
+            0
+        ].attrib["value"]
+        self.parameters["__VIEWSTATEGENERATOR"] = self.html.xpath(vsg_xpath)[
+            0
+        ].attrib["value"]
+
+    def _process_html(self):
+        """Process the html
+
+        :return: None
+        """
+        self._download()
+        for row in self.html.xpath(".//tr/td/.."):
+            if not row.xpath(".//td/a/@href"):
+                continue
+            docket = re.sub(
+                r"\r\n", "", row.xpath(".//td[2]")[0].text_content()
+            )
+            self.cases.append(
+                {
+                    "name": f"Untitled AG Opinion: {docket.strip()}",
+                    "docket": docket.strip(),
+                    "url": row.xpath(".//td/a/@href")[0],
+                    "summary": row.xpath(".//td[3]")[0].text_content().strip(),
+                    "date": row.xpath(".//td[4]")[0].text_content().strip(),
+                }
+            )
diff --git a/juriscraper/opinions/united_states/attorney_general/alaskaag.py b/juriscraper/opinions/united_states/attorney_general/alaskaag.py
@@ -0,0 +1,37 @@
+"""
+Scraper for Alaska AG
+CourtID: alaskaag
+Court Short Name: Alaska AG
+Author: William E. Palin
+History:
+ - 2023-01-29: Created.
+"""
+from juriscraper.OpinionSiteLinear import OpinionSiteLinear
+
+
+class Site(OpinionSiteLinear):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.court_id = self.__module__
+        self.status = "Published"
+        self.url = "https://law.alaska.gov/doclibrary/opinions-index/opinions_chron.html"
+
+    def _process_html(self):
+        """Process the html
+
+        :return: None
+        """
+        for row in self.html.xpath(".//li/a[contains(@href, '.pdf')]/.."):
+            date, other = row.text_content().split(" ", 1)
+            name = other.split("(PDF")[0].strip(" -")
+            url = row.xpath(".//a/@href")[0]
+            dn = url.split("_")[-1][:-4]
+            docket = f"AGO No. {dn}"
+            self.cases.append(
+                {
+                    "name": name,
+                    "docket": docket,
+                    "url": url,
+                    "date": date,
+                }
+            )
diff --git a/juriscraper/opinions/united_states/attorney_general/arkag.py b/juriscraper/opinions/united_states/attorney_general/arkag.py
@@ -0,0 +1,65 @@
+"""
+Scraper for Arkansas Attorney General
+CourtID: arkag
+Court Short Name: Arkansas AG
+Author: William E. Palin
+History:
+ - 2023-01-29: Created.
+"""
+import datetime
+import re
+from typing import Optional
+
+from juriscraper.DeferringList import DeferringList
+from juriscraper.OpinionSiteLinear import OpinionSiteLinear
+
+
+class Site(OpinionSiteLinear):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.court_id = self.__module__
+        self.status = "Published"
+        self.url = "https://ag-opinions.ark.org/?type=recent&q=90"
+        self.seeds = []
+
+    def _process_html(self):
+        """Process the html
+
+        :return: None
+        """
+        for row in self.html.xpath(".//div[@class='card']"):
+            name = row.xpath(".//h5/text()")[0]
+            url = row.xpath(".//a[contains(@href, '.pdf')]/@href")[0]
+            html_url = row.xpath(".//a[contains(@href, '.html')]/@href")[0]
+            docket = re.sub(r"\r\n", "", name.split()[1])
+            self.seeds.append(html_url)
+            self.cases.append(
+                {
+                    "name": name,
+                    "docket": docket,
+                    "url": url,
+                    "summary": row.xpath(".//p/text()")[0],
+                    "date": "",
+                }
+            )
+
+    def _get_case_dates(self) -> DeferringList:
+        """Get case names using a deferring list."""
+
+        def get_case_date(link: str) -> Optional[datetime.date]:
+            """Abstract out the case date from the case page."""
+            if self.test_mode_enabled():
+                return datetime.datetime.strptime(
+                    "2022-01-01", "%Y-%m-%d"
+                ).date()
+            html = self._get_html_tree_by_url(link)
+            for p in html.xpath(".//p"):
+                try:
+                    dt = datetime.datetime.strptime(
+                        p.text_content(), "%B %d, %Y"
+                    )
+                    return dt.date()
+                except ValueError:
+                    pass
+
+        return DeferringList(seed=self.seeds, fetcher=get_case_date)
diff --git a/juriscraper/opinions/united_states/attorney_general/azag.py b/juriscraper/opinions/united_states/attorney_general/azag.py
@@ -0,0 +1,45 @@
+"""
+Scraper for Arizona Attorney General
+CourtID: azaag
+Court Short Name: Arizona AG
+Author: William E. Palin
+History:
+ - 2023-01-29: Created.
+"""
+import datetime
+
+from juriscraper.OpinionSiteLinear import OpinionSiteLinear
+
+
+class Site(OpinionSiteLinear):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.court_id = self.__module__
+        self.status = "Published"
+        year = datetime.date.today().year
+        # year = 2022
+        self.url = f"https://www.azag.gov/opinions/{year}"
+
+    def _process_html(self):
+        """Process the html
+
+        :return: None
+        """
+        for row in self.html.xpath(".//div/div/span/a/../../.."):
+            url = row.xpath(".//a/@href")[0]
+            name = row.xpath(".//a/text()")[0]
+            date = row.xpath(".//time/text()")
+            if not date:
+                continue
+            date = str(
+                datetime.datetime.strptime(date[0], "%A, %B %d, %Y").date()
+            )
+            docket = url.split("/")[-1].upper()
+            self.cases.append(
+                {
+                    "name": name,
+                    "docket": f"No. {docket}",
+                    "url": url,
+                    "date": date,
+                }
+            )
diff --git a/...per/opinions/united_states/state/calag.py → ...s/united_states/attorney_general/calag.py b/...per/opinions/united_states/state/calag.py → ...s/united_states/attorney_general/calag.py
diff --git a/juriscraper/opinions/united_states/attorney_general/coloag.py b/juriscraper/opinions/united_states/attorney_general/coloag.py
@@ -0,0 +1,73 @@
+"""
+Scraper for Colorado AG
+CourtID: coloag
+Court Short Name: Colorado AG
+Author: William E. Palin
+History:
+ - 2023-01-29: Created.
+"""
+import datetime
+import re
+
+from lxml.html import tostring
+
+from juriscraper.OpinionSiteLinear import OpinionSiteLinear
+
+
+class Site(OpinionSiteLinear):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.court_id = self.__module__
+        self.status = "Published"
+        self.url = "https://coag.gov/attorney-general-opinions/"
+
+    def _process_html(self):
+        """Process the html
+
+        :return: None
+        """
+        if not self.test_mode_enabled():
+            m = re.findall(
+                r"\d+ Formal AG Opinions", tostring(self.html).decode()
+            )
+            if not m:
+                return
+            year = m[0].split()[0]
+            key = m[0].replace(" ", "-")
+            self.url = f"https://coag.gov/attorney-general-opinions/{key}/"
+            self.html = super()._download()
+        else:
+            year = "2021-01-31"
+        for row in self.html.xpath(".//li/a[contains(@href, '.pdf')]/.."):
+            url = row.xpath(".//a/@href")[0].replace("http", "https")
+            name = row.xpath(".//a/text()")[0]
+            self.cases.append(
+                {
+                    "url": url,
+                    "name": name,
+                    "docket": name,
+                    "summary": row.text_content(),
+                    "date": year,
+                    "date_filed_is_approximate": True,
+                }
+            )
+
+    def extract_from_text(self, scraped_text):
+        """Extract date info from text
+
+        :param scraped_text: Scraped text
+        :return: The metadata containing date filed
+        """
+        pattern = re.compile(r"([A-Z][a-z]+ \d{1,2}, \d{4})")
+        match = pattern.search(scraped_text)
+        if match:
+            date_filed = datetime.datetime.strptime(
+                match.group(), "%B %d, %Y"
+            ).strftime("%Y-%m-%d")
+            metadata = {
+                "OpinionCluster": {
+                    "date_filed": date_filed,
+                    "date_filed_is_approximate": False,
+                },
+            }
+            return metadata