Skip to content

Commit

Permalink
feat: paysage optimizations
Browse files Browse the repository at this point in the history
  • Loading branch information
ahonestla committed Oct 23, 2024
1 parent b97ce8b commit e70d579
Show file tree
Hide file tree
Showing 9 changed files with 239 additions and 76 deletions.
90 changes: 84 additions & 6 deletions notebooks/paysage_api_test.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,103 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 30,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import os\n",
"import requests\n",
"import pandas as pd"
"\n",
"import pandas as pd\n",
"from dotenv import load_dotenv\n",
"\n",
"load_dotenv()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"url = \"https://paysage-api.staging.dataesr.ovh/relations?limit=10000&filters[relationTag]=structure-categorie&filters[relatedObjectId]=mCpLW\"\n",
"headers = {\"X-API-KEY\": os.getenv(\"PAYSAGE_API_KEY\")}\n",
"\n",
"response = requests.get(url, headers=headers)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'mCpLW'"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"response.json().get(\"data\")[0].get(\"relatedObjectId\")"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
"url = \"https://paysage-api.staging.dataesr.ovh/relations?limit=10000&filters[relationTag]=structure-categorie&filters[relatedObjectId]=mCpLW&filters[relatedObjectId]=Eg7tX&filters[relatedObjectId]=93BR1&filters[relatedObjectId]=2ZdzP&filters[relatedObjectId]=MTFHZ&filters[relatedObjectId]=UfEnK&filters[relatedObjectId]=Sv5bb&filters[relatedObjectId]=mNJ1Z&filters[relatedObjectId]=WCat8&filters[relatedObjectId]=fQ6GL&filters[relatedObjectId]=WkSgR&filters[relatedObjectId]=YNqFb&filters[relatedObjectId]=iyn79&filters[relatedObjectId]=NsMkU\"\n",
"headers = {\"X-API-KEY\": \"xkeypsg-72b39GvylkDPoEe6AwUz\"}\n",
"id = \"m7K6T\"\n",
"url = f\"https://paysage-api.staging.dataesr.ovh/structures/{id}\"\n",
"url = f\"https://paysage-api.staging.dataesr.ovh/relations?limit=100&filters[relationTag]=structure-categorie&filters[resourceId]={id}\"\n",
"headers = {\"X-API-KEY\": os.getenv(\"PAYSAGE_API_KEY\")}\n",
"\n",
"response = requests.get(url, headers=headers)"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'usualNameFr': 'Université',\n",
" 'priority': 1,\n",
" 'id': 'mCpLW',\n",
" 'displayName': 'Université',\n",
" 'collection': 'categories',\n",
" 'href': '/categories/mCpLW',\n",
" 'usualNameEn': None,\n",
" 'descriptionFr': None,\n",
" 'descriptionEn': None}"
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"response.json().get(\"data\")[16].get(\"relatedObject\")"
]
}
],
"metadata": {
Expand Down
2 changes: 1 addition & 1 deletion project/client/dist/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/fonts/remixicon.css" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Affiliation Matcher</title>
<script type="module" crossorigin src="/static/index-78f5f2fe.js"></script>
<script type="module" crossorigin src="/static/index-09aab7a4.js"></script>
<link rel="stylesheet" href="/static/index-b7346e81.css">
</head>

Expand Down

Large diffs are not rendered by default.

6 changes: 5 additions & 1 deletion project/client/src/components/results/debug/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ import { MatchDebug } from "../../../types"

type ResultsDebugArgs = {
resultsDebug: MatchDebug
resultsLogs: string
}
export default function ResultsDebug({ resultsDebug }: ResultsDebugArgs) {
export default function ResultsDebug({ resultsDebug, resultsLogs }: ResultsDebugArgs) {
const intl = useIntl()
if (!resultsDebug) return null

Expand Down Expand Up @@ -40,6 +41,9 @@ export default function ResultsDebug({ resultsDebug }: ResultsDebugArgs) {
))}
</Container>
))}
<Container>
<div dangerouslySetInnerHTML={{ __html: resultsLogs }} />
</Container>
</Accordion>
)
}
95 changes: 83 additions & 12 deletions project/client/src/components/results/index.tsx
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { useEffect, useState } from "react"
import { useIntl } from "react-intl"
import { Container, Text, Badge } from "@dataesr/dsfr-plus"
import { MatchResults } from "../../types"
import { MatchIds, MatchResults } from "../../types"
import Error from "../error"
import Result from "./result"
import Fetching from "../fetching"
Expand All @@ -10,6 +10,81 @@ import useUrl from "../../hooks/useUrl"
import Info from "../info"
import useMatch from "../../hooks/useMatch"

type MatcherResultsArgs = {
matchIds: MatchIds
matchResults: MatchResults
setTitle: any
}

function MatcherResults({ matchIds, matchResults, setTitle }: MatcherResultsArgs) {
const intl = useIntl()
return (
<Container fluid>
<Container className="fr-mt-3w">
<Text size="md">{intl.formatMessage({ id: "match.count" }, { count: matchIds.length })}</Text>
</Container>
<Container fluid className="fr-mt-3w">
{matchIds.map((id, index) => {
return <Result key={index} resultData={matchResults} resultId={id} setTitle={setTitle} />
})}
</Container>
</Container>
)
}

function PaysageResults({ matchIds, matchResults, setTitle }: MatcherResultsArgs) {
const intl = useIntl()

const DEFAULT_CATEGORY = "Others"
const DEFAULT_PRIORITY = 99

const categories = matchResults?.enriched_results?.reduce((acc, res) => {
if (res?.paysage_categories) {
const lowestPriority = Math.min(...res.paysage_categories.map((category) => category?.priority || DEFAULT_PRIORITY))
res.paysage_categories
.filter((category) => (category?.priority || DEFAULT_PRIORITY) === lowestPriority)
.forEach((category) => {
const label = category?.label || DEFAULT_CATEGORY
const priority = category?.priority || DEFAULT_PRIORITY
acc[label] = acc?.[label]
? { ...acc[label], ids: [...acc[label].ids, res.id] }
: { ids: [res.id], priority: priority }
})
} else {
acc[DEFAULT_CATEGORY] = acc?.[DEFAULT_CATEGORY]
? { ...acc[DEFAULT_CATEGORY], ids: [...acc[DEFAULT_CATEGORY].ids, res.id] }
: { ids: [res.id], priority: DEFAULT_PRIORITY }
}
return acc
}, {} as Record<string, { ids: MatchIds; priority: number }>)

if (!categories || (Object.keys(categories)?.length === 1 && Object.keys(categories)[0] === DEFAULT_CATEGORY))
return <MatcherResults matchIds={matchIds} matchResults={matchResults} setTitle={setTitle} />

return (
<Container fluid>
{Object.entries(categories)
.sort((a, b) => a[1].priority - b[1].priority)
.map(([key, values]) => (
<Container fluid>
<Container className="fr-mt-3w">
<Text size="md">
{key}
{" : "}
{intl.formatMessage({ id: "match.count" }, { count: values.ids.length })}
</Text>
</Container>
<Container fluid className="fr-mt-3w">
{values.ids.map((id, index) => {
return <Result key={index} resultData={matchResults} resultId={id} setTitle={setTitle} />
})}
</Container>
</Container>
))}
</Container>
)
}

export default function Results() {
const intl = useIntl()
const { currentQuery, currentMatcher, currentYear } = useUrl()
Expand Down Expand Up @@ -42,7 +117,7 @@ export default function Results() {
<Container className="fr-mt-3w">
<Badge color="error">{`${currentMatcher} : ${intl.formatMessage({ id: "match.count" }, { count: 0 })}`}</Badge>
</Container>
<ResultsDebug resultsDebug={matchResults?.debug} />
<ResultsDebug resultsDebug={matchResults?.debug} resultsLogs={matchResults?.logs} />
</Container>
)

Expand All @@ -51,16 +126,12 @@ export default function Results() {
<Container className="sticky card">
<Text size="lead">{currentTitle}</Text>
</Container>
<Container className="fr-mt-3w">
<Text size="md">{intl.formatMessage({ id: "match.count" }, { count: matchIds.length })}</Text>
</Container>
<Container fluid className="fr-mt-3w">
{matchIds.map((id, index) => {
return <Result key={index} resultData={matchResults} resultId={id} setTitle={setTitle} />
})}
</Container>
<ResultsDebug resultsDebug={matchResults?.debug} />
{/* <div dangerouslySetInnerHTML={{ __html: matchResults?.logs }} /> */}
{currentMatcher === "paysage" ? (
<PaysageResults matchIds={matchIds} matchResults={matchResults} setTitle={setTitle} />
) : (
<MatcherResults matchIds={matchIds} matchResults={matchResults} setTitle={setTitle} />
)}
<ResultsDebug resultsDebug={matchResults?.debug} resultsLogs={matchResults?.logs} />
</Container>
)
}
2 changes: 1 addition & 1 deletion project/client/src/locales/en.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"info.missing.query": "Please enter a text affiliation",
"info.missing.matcher": "Please select a matcher",
"debug.accordion.title": "Matching details",
"debug.accordion.title": "Matching logs",
"match.count": "{count, plural, =0 {# matches} one {# match} other {# matches}}",
"possibility.count": "{count, plural, =0 {# possibilities} one {# possibility} other {# possibilities}}"
}
1 change: 1 addition & 0 deletions project/client/src/types/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ export type MatchEnrichedResult = {
acronym?: Array<string>
city?: Array<string>
country?: Array<string>
paysage_categories?: Array<{ id: string; label?: string; priority?: number }>
}
export type MatchEnrichedResults = Array<MatchEnrichedResult>

Expand Down
2 changes: 1 addition & 1 deletion project/server/main/load_paysage.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def load_paysage(index_prefix: str = "matcher") -> dict:
for criterion_value in criterion_values:
if criterion_value not in es_data[criterion]:
es_data[criterion][criterion_value] = []
es_data[criterion][criterion_value].append({"id": data_point["id"], "categories": data_point["categories"]})
es_data[criterion][criterion_value].append({"id": data_point["id"]})

# Bulk insert data into ES
actions = []
Expand Down
Loading

0 comments on commit e70d579

Please sign in to comment.