Skip to content

Commit

Permalink
add a first workaround for the new Zensus zip content type
Browse files Browse the repository at this point in the history
  • Loading branch information
pmayd committed Feb 6, 2024
1 parent b80ae79 commit 3d4d6dc
Show file tree
Hide file tree
Showing 6 changed files with 129 additions and 71 deletions.
Empty file added nb/new_zensus.ipynb
Empty file.
116 changes: 76 additions & 40 deletions nb/presentation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 38,
"execution_count": 1,
"id": "14555430",
"metadata": {},
"outputs": [],
Expand All @@ -12,6 +12,16 @@
"import pystatis"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "bbf0b1de",
"metadata": {},
"outputs": [],
"source": [
"pystatis.clear_cache()"
]
},
{
"cell_type": "markdown",
"id": "baa7eec1",
Expand Down Expand Up @@ -93,7 +103,7 @@
},
{
"cell_type": "code",
"execution_count": 39,
"execution_count": 3,
"id": "27ff0988",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -165,7 +175,7 @@
},
{
"cell_type": "code",
"execution_count": 40,
"execution_count": 4,
"id": "7af5949f",
"metadata": {},
"outputs": [
Expand All @@ -181,7 +191,7 @@
"| 4 | AI019 | Regionalatlas Deutschland Themenbereich \"Umwelt\" Indikatoren zu \"Haushaltsabfälle\" | |"
]
},
"execution_count": 40,
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -200,7 +210,7 @@
},
{
"cell_type": "code",
"execution_count": 41,
"execution_count": 5,
"id": "e4bc3879",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -274,7 +284,7 @@
"4 AI019 Regionalatlas Deutschland Themenbereich \"Umwelt\" Indikatoren zu \"Haushaltsabfälle\" "
]
},
"execution_count": 41,
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -293,7 +303,7 @@
},
{
"cell_type": "code",
"execution_count": 42,
"execution_count": 6,
"id": "17564ec7",
"metadata": {},
"outputs": [
Expand All @@ -303,7 +313,7 @@
"['32121-01-02-4', '32121-01-02-4-B', '32151-01-01-4']"
]
},
"execution_count": 42,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -322,7 +332,7 @@
},
{
"cell_type": "code",
"execution_count": 43,
"execution_count": 7,
"id": "63123a8f",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -390,7 +400,7 @@
},
{
"cell_type": "code",
"execution_count": 44,
"execution_count": 3,
"id": "66d0e9cf",
"metadata": {},
"outputs": [],
Expand All @@ -409,7 +419,7 @@
},
{
"cell_type": "code",
"execution_count": 45,
"execution_count": 6,
"id": "927c0a2c",
"metadata": {},
"outputs": [],
Expand All @@ -419,7 +429,7 @@
},
{
"cell_type": "code",
"execution_count": 46,
"execution_count": 7,
"id": "4308ec4d",
"metadata": {},
"outputs": [
Expand All @@ -437,7 +447,7 @@
" '31231;Fortschreibung Wohngebäude- und Wohnungsbestand;STAG;Stichtag;31.12.2022;DINSG;Deutschland insgesamt;DG;Deutschland;43366919;514;3996995;92,2;47,4;189920514;4,4;2,3']"
]
},
"execution_count": 46,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -448,7 +458,7 @@
},
{
"cell_type": "code",
"execution_count": 47,
"execution_count": 8,
"id": "d37138dd",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -669,7 +679,7 @@
"7 31231 Fortschreibung Wohngebäude- und Wohnungsbestand STAG Stichtag 31.12.2022 DINSG Deutschland insgesamt DG Deutschland 43366919 514 3996995 92,2 47,4 189920514 4,4 2,3"
]
},
"execution_count": 47,
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -688,7 +698,7 @@
},
{
"cell_type": "code",
"execution_count": 48,
"execution_count": 9,
"id": "6a433995",
"metadata": {},
"outputs": [],
Expand All @@ -698,7 +708,7 @@
},
{
"cell_type": "code",
"execution_count": 49,
"execution_count": 10,
"id": "88cb7561",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -856,7 +866,7 @@
"7 31.12.2022 Deutschland 43366919 514 3996995 92,2 47,4 189920514 4,4 2,3"
]
},
"execution_count": 49,
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -875,7 +885,7 @@
},
{
"cell_type": "code",
"execution_count": 50,
"execution_count": 11,
"id": "f1f4957d",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -996,7 +1006,7 @@
},
{
"cell_type": "code",
"execution_count": 51,
"execution_count": 12,
"id": "533a7e96",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -1611,7 +1621,7 @@
"51 2022 Deutschland Insgesamt 293024354.0 273144778.0 98388835.0 2.395591e+09"
]
},
"execution_count": 51,
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -1625,16 +1635,16 @@
},
{
"cell_type": "code",
"execution_count": 52,
"execution_count": 13,
"id": "b00f1c76",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/macbookpro/Git Repos/pystatis/src/pystatis/table.py:46: DtypeWarning: Columns (7) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" self.data = pd.read_csv(data_str, sep=\";\", na_values = [\"...\",\".\",\"-\",\"/\",\"x\"])\n"
"/Users/miay/git/github/CorrelAid/pystatis/src/pystatis/table.py:48: DtypeWarning: Columns (7) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" self.data = pd.read_csv(\n"
]
},
{
Expand Down Expand Up @@ -1836,7 +1846,7 @@
"[48420 rows x 10 columns]"
]
},
"execution_count": 52,
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -1850,7 +1860,7 @@
},
{
"cell_type": "code",
"execution_count": 53,
"execution_count": 4,
"id": "4ce4c9d5",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -1994,7 +2004,7 @@
"12 2011-05-09 Deutschland Ohne Schulabschluss 3241010"
]
},
"execution_count": 53,
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -2016,7 +2026,7 @@
},
{
"cell_type": "code",
"execution_count": 54,
"execution_count": 5,
"id": "f912518e",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -2183,7 +2193,7 @@
"[1092 rows x 7 columns]"
]
},
"execution_count": 54,
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -3066,9 +3076,9 @@
}
],
"source": [
"fig=plt.figure(figsize=(10,5))\n",
"fig = plt.figure(figsize=(10, 5))\n",
"\n",
"ax1=fig.add_subplot(131)\n",
"ax1 = fig.add_subplot(131)\n",
"year = 2015\n",
"plt.title(str(year))\n",
"gdf_merged = pd.merge(\n",
Expand All @@ -3079,11 +3089,19 @@
")\n",
"gdf_merged.ratio_international\n",
"gdf_merged.plot(\n",
" \"ratio_international\", ax=ax1, legend=True, missing_kwds={\"color\": \"lightgrey\"},\n",
" legend_kwds={\"label\": \"ratio of int. students\", \"orientation\": \"horizontal\"}, vmin=0.08,vmax=0.23\n",
" \"ratio_international\",\n",
" ax=ax1,\n",
" legend=True,\n",
" missing_kwds={\"color\": \"lightgrey\"},\n",
" legend_kwds={\n",
" \"label\": \"ratio of int. students\",\n",
" \"orientation\": \"horizontal\",\n",
" },\n",
" vmin=0.08,\n",
" vmax=0.23,\n",
")\n",
"\n",
"ax2=fig.add_subplot(132)\n",
"ax2 = fig.add_subplot(132)\n",
"year = 2018\n",
"plt.title(str(year))\n",
"gdf_merged = pd.merge(\n",
Expand All @@ -3094,11 +3112,19 @@
")\n",
"gdf_merged.ratio_international\n",
"gdf_merged.plot(\n",
" \"ratio_international\", ax=ax2, legend=True, missing_kwds={\"color\": \"lightgrey\"},\n",
" legend_kwds={\"label\": \"ratio of int. students\", \"orientation\": \"horizontal\"}, vmin=0.08,vmax=0.23\n",
" \"ratio_international\",\n",
" ax=ax2,\n",
" legend=True,\n",
" missing_kwds={\"color\": \"lightgrey\"},\n",
" legend_kwds={\n",
" \"label\": \"ratio of int. students\",\n",
" \"orientation\": \"horizontal\",\n",
" },\n",
" vmin=0.08,\n",
" vmax=0.23,\n",
")\n",
"\n",
"ax3=fig.add_subplot(133)\n",
"ax3 = fig.add_subplot(133)\n",
"year = 2021\n",
"plt.title(str(year))\n",
"gdf_merged = pd.merge(\n",
Expand All @@ -3109,8 +3135,16 @@
")\n",
"gdf_merged.ratio_international\n",
"gdf_merged.plot(\n",
" \"ratio_international\", ax=ax3, legend=True, missing_kwds={\"color\": \"lightgrey\"},\n",
" legend_kwds={\"label\": \"ratio of int. students\", \"orientation\": \"horizontal\"}, vmin=0.08,vmax=0.23\n",
" \"ratio_international\",\n",
" ax=ax3,\n",
" legend=True,\n",
" missing_kwds={\"color\": \"lightgrey\"},\n",
" legend_kwds={\n",
" \"label\": \"ratio of int. students\",\n",
" \"orientation\": \"horizontal\",\n",
" },\n",
" vmin=0.08,\n",
" vmax=0.23,\n",
")"
]
},
Expand All @@ -3127,7 +3161,9 @@
"id": "7a119d0e",
"metadata": {},
"source": [
"- `quality=on`"
"- `quality=on` -> handle the different quality identifiers\n",
"- `Find` to work across all databases -> search for and find results over all supported databases with a single query\n",
"- `LLM`? -> ideation: provide some kind of interface that allows to talk with GENESIS via a LLM like ChatGPT"
]
},
{
Expand Down
7 changes: 6 additions & 1 deletion nb/presentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@

import pystatis

# %%
pystatis.clear_cache()

# %% [markdown]
# # CorrelAid
#
Expand Down Expand Up @@ -385,7 +388,9 @@
# ## Outlook

# %% [markdown]
# - `quality=on`
# - `quality=on` -> handle the different quality identifiers
# - `Find` to work across all databases -> search for and find results over all supported databases with a single query
# - `LLM`? -> ideation: provide some kind of interface that allows to talk with GENESIS via a LLM like ChatGPT

# %% [markdown]
#
Loading

0 comments on commit 3d4d6dc

Please sign in to comment.