Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SERVICE PUBLIC]Update service public definition #421

Merged
merged 8 commits into from
Nov 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions helpers/labels/urssaf.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{
"CDG PAM": "535104756",
"UR ile-de-France (116) Pole GE": "788617793",
"URSSAF ILE-DE-FRANCE": "788617793",
"URSSAF DE CORSE": "782993141",
"Champagne-Ardenne": "752855825",
"URSSAF DE PICARDIE": "753663277",
"Urssaf Centre Val de Loire": "795120039",
"Urssaf de Bourgogne": "794714733",
"Urssaf Normandie": "902097997",
"Urssaf Midi-Pyrénées-TGE": "535146500",
"Nord-Pas-de-Calais": "753673409",
"URSSAF DE LORRAINE": "753334481",
"URSSAF Alsace": "753570043",
"FRANCHE-COMTE": "795070101",
"CNTFS FRANCHE-COMTE": "795070101",
"URSSAF Pays de Loire": "535104756",
"URSSAF Bretagne": "753759570",
"URSSAF DU POITOU-CHARENTES": "753644152",
"URSSAF DU NORD - LILLE - TGE": "783662059",
"URSSAF RHONE ALPES": "794846501",
"URSSAF AQUITAINE": "788778777",
"URSSAF MIDI-PYRENEES": "535146500",
"Urssaf du Limousin": "753919521",
"CNTFS": "794846501",
"URSSAF D AUVERGNE": "535138218",
"URSSAF Languedoc-Roussillon": "753664127",
"URSSAF PACA": "794487231",
"CGSS DE GUADELOUPE": "314572025",
"CGSS DE MARTINIQUE": "314024969",
"CGSS DE LA GUYANE": "315190769",
"CGSS DE LA REUNION": "314635483",
"CGSS DE MAYOTTE": "479281719",
"CENTRE DE GESTION PAM": "314635483"
}
26 changes: 26 additions & 0 deletions tests/unit_tests/test_data_enrichment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""
import pytest
from dag_datalake_sirene.workflows.data_pipelines.elasticsearch.data_enrichment import (
is_service_public,
)

# Sample URSSAF SIREN numbers for testing
urssaf_siren_numbers = {"123456789", "987654321"}


@pytest.mark.parametrize(
"nature_juridique, siren, expected",
[
("4711", "123456789", True), # Valid prefix
("72", "987654321", True), # Valid prefix
("1234", "320252489", True), # BPI France
("1234", "123456789", True), # URSSAF SIREN
("4120", "775663438", False), # RATP should be excluded
("1234", "111111111", False), # Non-public SIREN
("", "123456789", False), # Empty nature juridique
(None, "123456789", False), # None nature juridique
],
)
def test_is_service_public(nature_juridique, siren, expected):
assert is_service_public(nature_juridique, siren) == expected
"""
39 changes: 33 additions & 6 deletions workflows/data_pipelines/elasticsearch/data_enrichment.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ def load_file(file_name: str):
mapping_dep_to_reg = load_file("dep_to_reg.json")
mapping_role_dirigeants = load_file("roles_dirigeants.json")
mapping_commune_to_epci = load_file("epci.json")
urssaf_data = load_file("urssaf.json")
urssaf_siren_numbers = set(urssaf_data.values())


# Nom complet
Expand Down Expand Up @@ -153,14 +155,39 @@ def is_ess(est_ess_france, ess_insee):

# Service public
def is_service_public(nature_juridique_unite_legale, siren):
if (
nature_juridique_unite_legale
and nature_juridique_unite_legale.startswith(("4", "71", "72", "73", "74"))
) or siren == "320252489":
return True
else:
"""
Determine if a given entity is classified as a public service.

Parameters:
- nature_juridique_unite_legale (str): The legal nature of the entity.
- siren (str): The SIREN number of the entity.

Returns:
- bool: True if the entity is classified as a public service,
False otherwise. Exceptions include:
- BPI France (SIREN: 320252489) and URSSAF are considered public.
- RATP (SIREN: 775663438) is explicitly excluded.
"""

# Exclude RATP
if siren == "775663438":
return False

# Define valid prefixes for public service
valid_prefixes = {"4", "71", "72", "73", "74"}

# Check if the entity is classified as a public service
is_public = (
(
nature_juridique_unite_legale
and nature_juridique_unite_legale.startswith(tuple(valid_prefixes))
)
or siren == "320252489" # BPI France
or siren in urssaf_siren_numbers # Check against URSSAF SIREN numbers
HAEKADI marked this conversation as resolved.
Show resolved Hide resolved
)

return is_public


# Association
def is_association(nature_juridique_unite_legale, identifiant_association):
Expand Down
Loading