diff --git a/environment.yml b/environment.yml index 3e83829..7213f50 100755 --- a/environment.yml +++ b/environment.yml @@ -49,8 +49,8 @@ dependencies: - python-ternary - python-dotenv - scipy<1.13 - - libbezier - pip: + - bezier - scattertext - adjustText - matplotlib-scalebar diff --git "a/notebooks/vis-course/2024-oto\303\261o/11-estructura.ipynb" "b/notebooks/vis-course/2024-oto\303\261o/11-estructura.ipynb" new file mode 100644 index 0000000..cb01b5c --- /dev/null +++ "b/notebooks/vis-course/2024-oto\303\261o/11-estructura.ipynb" @@ -0,0 +1,600 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "from dotenv import load_dotenv\n", + "import os\n", + "import sys\n", + "from pathlib import Path\n", + "from aves.config import setup_style\n", + "\n", + "load_dotenv()\n", + "setup_style()\n", + "\n", + "AVES_ROOT = Path(os.environ['AVES_ROOT'])\n", + "EOD_PATH = AVES_ROOT / \"data\" / \"external\" / \"EOD_STGO\"\n", + "CENSUS_GEO_ROOT = Path(os.environ['CENSUS_GEO_ROOT'])" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "import numpy as np\n", + "import pandas as pd\n", + "import geopandas as gpd\n", + "from aves.data import eod, census\n", + "import matplotlib as mpl" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "viajes = eod.read_trips(EOD_PATH)\n", + "\n", + "# descartamos sectores que no sean relevantes en los orígenes y destinos de los viajes\n", + "viajes = viajes[\n", + " (viajes[\"SectorOrigen\"] != \"Exterior a RM\")\n", + " & (viajes[\"SectorDestino\"] != \"Exterior a RM\")\n", + " & (viajes[\"SectorOrigen\"] != \"Extensión Sur-Poniente\")\n", + " & (viajes[\"SectorDestino\"] != \"Extensión Sur-Poniente\")\n", + " & pd.notnull(viajes[\"SectorOrigen\"])\n", + " & pd.notnull(viajes[\"SectorDestino\"])\n", + "]\n", + "\n", + "print(len(viajes))" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "personas = eod.read_people(EOD_PATH)\n", + "hogares = eod.read_homes(EOD_PATH)\n", + "tabla = viajes.merge(personas).merge(hogares.drop('TipoDia', axis=1))" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "tabla[\"Peso\"] = (\n", + " tabla[\"FactorExpansion\"] * tabla[\"FactorPersona\"]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "zones = gpd.read_file(AVES_ROOT / 'data' / 'processed' / 'scl_zonas_urbanas.json').set_index('ID')\n", + "zones.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "from aves.data.census.loading import read_census_map\n", + "comunas = read_census_map('comuna', path=CENSUS_GEO_ROOT / \"R13\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "comunas_urbanas = comunas[comunas['COMUNA'].isin(zones['Com'].unique())].drop('NOM_COMUNA', axis=1).copy()\n", + "comunas_urbanas['NombreComuna'] = comunas_urbanas['COMUNA'].map(dict(zip(zones['Com'], zones['Comuna'])))\n", + "comunas_urbanas.plot(facecolor=\"none\", edgecolor=\"#abacab\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from aves.features.geo import clip_area_geodataframe\n", + "\n", + "bounding_box = zones.total_bounds\n", + "comunas_urbanas = clip_area_geodataframe(comunas_urbanas, zones.total_bounds, buffer=0.02)\n", + "comunas_urbanas.plot()\n", + "zones.plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from aves.features.utils import normalize_rows\n", + "\n", + "matrix = (\n", + " tabla[\n", + " (tabla[\"Proposito\"] != \"volver a casa\")\n", + " & (tabla[\"ComunaOrigen\"].isin(comunas_urbanas[\"NombreComuna\"]))\n", + " & (tabla[\"ComunaDestino\"].isin(comunas_urbanas[\"NombreComuna\"]))\n", + " & (tabla[\"ComunaOrigen\"] != tabla[\"ComunaDestino\"])\n", + " ]\n", + " .groupby([\"Proposito\", \"ComunaOrigen\", \"ComunaDestino\", \"ZonaOrigen\", \"ZonaDestino\"])\n", + " .agg(n_viajes=(\"Peso\", \"sum\"))\n", + " .sort_values(\"n_viajes\", ascending=False)\n", + " #.assign(cumsum_viajes=lambda x: x[\"n_viajes\"].cumsum() / x[\"n_viajes\"].sum())\n", + " #.pipe(lambda x: x[x[\"cumsum_viajes\"] <= 0.75])\n", + " .reset_index()\n", + ")\n", + "\n", + "matrix" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "matrix['n_viajes'].sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [], + "source": [ + "fixed_zones = zones.reset_index().dissolve(by='ID').reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from aves.models.network import Network\n", + "\n", + "network = Network.from_edgelist(\n", + " matrix[\n", + " (matrix[\"Proposito\"] == \"Al estudio\")\n", + " & (matrix[\"ZonaOrigen\"] != matrix[\"ZonaDestino\"])\n", + " & (matrix[\"ZonaOrigen\"].isin(fixed_zones[\"ID\"]))\n", + " & (matrix[\"ZonaDestino\"].isin(fixed_zones[\"ID\"]))\n", + " ].pipe(\n", + " lambda x: x.sort_values(\"n_viajes\", ascending=False)\n", + " .assign(cumsum_viajes=lambda x: x[\"n_viajes\"].cumsum() / x[\"n_viajes\"].sum())\n", + " .pipe(lambda x: x[x[\"n_viajes\"] >= x['n_viajes'].quantile(0.975)])\n", + " ),\n", + " source=\"ZonaOrigen\",\n", + " target=\"ZonaDestino\",\n", + " weight=\"n_viajes\",\n", + ")\n", + "network" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from aves.visualization.networks import NodeLink\n", + "\n", + "nodelink = NodeLink(network)\n", + "nodelink.layout_nodes(method=\"geographical\", geodataframe=fixed_zones, node_column=\"ID\")" + ] + }, + { + "cell_type": "code", + "execution_count": 203, + "metadata": {}, + "outputs": [], + "source": [ + "nodelink.set_node_drawing(\"plain\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from aves.visualization.figures import figure_from_geodataframe\n", + "\n", + "fig, ax = figure_from_geodataframe(zones, height=7)\n", + "\n", + "# contexto\n", + "zones.plot(ax=ax, facecolor='#efefef', edgecolor='white', zorder=0)\n", + "comunas_urbanas.plot(ax=ax, facecolor='none', edgecolor='#abacab', zorder=1)\n", + "\n", + "nodelink.plot(ax, nodes=dict(palette='PuRd', edgecolor='black', node_size=1, alpha=0.95), edges=dict(alpha=0.5), zorder=2)\n", + "\n", + "ax.set_title('Viajes al trabajo en Santiago (en días laborales, EOD 2012)')\n", + "\n", + "fig.tight_layout()" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "metadata": {}, + "outputs": [], + "source": [ + "from aves.features.geo import to_point_geodataframe\n", + "\n", + "origenes_viajes = to_point_geodataframe(\n", + " tabla, \"OrigenCoordX\", \"OrigenCoordY\", crs=\"epsg:32719\"\n", + ").to_crs(zones.crs)\n", + "\n", + "destinos_viajes = to_point_geodataframe(\n", + " tabla, \"DestinoCoordX\", \"DestinoCoordY\", crs=\"epsg:32719\"\n", + ").to_crs(zones.crs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from aves.models.grid import H3Grid\n", + "\n", + "hex_grid = H3Grid.from_geodf(zones, grid_level=7, extra_margin=0.025)\n", + "hex_grid.geodf" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "activities = {'Al estudio': 'Recurrentes',\n", + " 'Al trabajo': 'Recurrentes',\n", + " 'Por trabajo': 'Recurrentes',\n", + " 'Por estudio': 'Recurrentes',\n", + " 'volver a casa': 'N/A',\n", + " 'De compras': 'Mantención',\n", + " 'Trámites': 'Mantención',\n", + " 'De salud': 'Salud',\n", + " 'Buscar o Dejar a alguien': 'Discrecional',\n", + " 'Visitar a alguien': 'Discrecional',\n", + " 'Recreación': 'Discrecional',\n", + " 'Otra actividad (especifique)': 'Discrecional',\n", + " 'Comer o Tomar algo': 'Discrecional',\n", + " 'Buscar o dejar algo': 'Discrecional'}\n", + "\n", + "activities\n" + ] + }, + { + "cell_type": "code", + "execution_count": 160, + "metadata": {}, + "outputs": [], + "source": [ + "grid_destinos = (\n", + " gpd.sjoin(destinos_viajes, hex_grid.geodf, predicate=\"within\")\n", + " .pipe(lambda x: x[x[\"Proposito\"].isin(activities.keys())])\n", + " .assign(actividad=lambda x: x[\"Proposito\"].map(activities))\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from aves.features.utils import logodds_ratio_with_uninformative_dirichlet_prior\n", + "\n", + "grid_actividades = (\n", + " grid_destinos.groupby([\"actividad\", \"h3_cell_id\"])[\"Peso\"]\n", + " .sum()\n", + " .unstack()\n", + " .T.fillna(0)\n", + " .pipe(logodds_ratio_with_uninformative_dirichlet_prior)\n", + ")\n", + "\n", + "grid_actividades" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from aves.visualization.figures import small_multiples_from_geodataframe\n", + "from aves.visualization.maps import choropleth_map\n", + "\n", + "fig, axes = small_multiples_from_geodataframe(zones, 4)\n", + "\n", + "activity_order = ['Recurrentes', 'Mantención', 'Salud', 'Discrecional']\n", + "\n", + "for ax, col in zip(axes, activity_order):\n", + " choropleth_map(ax, hex_grid.geodf.join(grid_actividades, on='h3_cell_id'),col, k=7, binning='fisher_jenks')\n", + " ax.set_title(col)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fig, axes = plt.subplots(1, 4, figsize=(9, 4))\n", + "\n", + "for ax, col in zip(axes, activity_order):\n", + " sns.histplot(data=grid_destinos[grid_destinos['actividad'] == col], x='TiempoViaje', weights='Peso', stat='density', bins=[0, 15, 30, 45, 60, 90, 120], ax=ax)\n", + " ax.set_title(col)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.gridspec as gridspec\n", + "\n", + "h_space = 0.1\n", + "w_space = 0.05\n", + "\n", + "bounding_box = zones.total_bounds\n", + "map_aspect_ratio = (bounding_box[2] - bounding_box[0]) / (bounding_box[3] - bounding_box[1])\n", + "\n", + "height_ratios = [1.0, 0.5]\n", + "\n", + "y_size = sum(height_ratios) * 3\n", + "x_size = map_aspect_ratio * len(activity_order) * 3\n", + "\n", + "\n", + "fig = plt.figure(figsize=(x_size, y_size))\n", + "fig.set_facecolor('#efefef')\n", + "\n", + "gs = gridspec.GridSpec(\n", + " len(height_ratios),\n", + " len(activity_order),\n", + " figure=fig,\n", + " hspace=0.05,\n", + " wspace=0.05,\n", + " height_ratios=height_ratios,\n", + ")\n", + "\n", + "for i, col in enumerate(activity_order):\n", + " fig.add_subplot(gs[0,i])\n", + " fig.add_subplot(gs[1,i])\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "height_ratios = [1.1, 0.3]\n", + "\n", + "y_size = sum(height_ratios) * 3\n", + "x_size = map_aspect_ratio * len(activity_order) * 3\n", + "\n", + "fig = plt.figure(figsize=(x_size, y_size))\n", + "fig.set_facecolor('#efefef')\n", + "\n", + "gs = gridspec.GridSpec(\n", + " len(height_ratios),\n", + " len(activity_order),\n", + " figure=fig,\n", + " hspace=0.05,\n", + " wspace=0.05,\n", + " height_ratios=height_ratios,\n", + ")\n", + "\n", + "for i, col in enumerate(activity_order):\n", + " ax_map = fig.add_subplot(gs[0,i])\n", + " ax_hist = fig.add_subplot(gs[1,i])\n", + "\n", + " choropleth_map(ax_map, hex_grid.geodf.join(grid_actividades, on='h3_cell_id'),col, k=7, binning='fisher_jenks', edgecolor='none', cbar_args=dict(\n", + " label=\"log-odds\",\n", + " height=\"60%\",\n", + " width=\"4%\",\n", + " orientation=\"vertical\",\n", + " location=\"center left\",\n", + " label_size=\"small\",\n", + " bbox_to_anchor=(0.0, 0.0, 0.9, 1.0),\n", + " ))\n", + " ax_map.set_title(col)\n", + " ax_map.set_aspect('equal')\n", + " ax_map.set_axis_off()\n", + "\n", + " _xlim = ax_map.get_xlim()\n", + " _ylim = ax_map.get_ylim()\n", + "\n", + " # geopandas cambia los límites\n", + " comunas.plot(ax=ax_map, edgecolor='grey', facecolor='none', linewidth=0.5)\n", + "\n", + " ax_map.set_xlim(_xlim)\n", + " ax_map.set_ylim(_ylim)\n", + "\n", + " sns.histplot(data=grid_destinos[grid_destinos['actividad'] == col], x='TiempoViaje', weights='Peso', stat='density', bins=[0, 15, 30, 45, 60, 90, 120], ax=ax_hist)\n", + " ax_hist.set_ylim([0, 0.025])\n", + "\n", + " ax_hist.set_xticks([0, 15, 30, 45, 60, 90, 120])\n", + "\n", + " if i > 0:\n", + " ax_hist.set_yticklabels([])\n", + " ax_hist.set_ylabel('')\n", + "\n", + " #break\n", + " \n", + "\n", + "fig.tight_layout()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 222, + "metadata": {}, + "outputs": [], + "source": [ + "grid_destinos[\"ModoAgregado\"] = grid_destinos[\"ModoDifusion\"].map(\n", + " {\n", + " \"Taxi\": \"Taxi\",\n", + " \"Bip! - Otros Privado\": \"Público\",\n", + " \"Bip!\": \"Público\",\n", + " \"Bip! - Otros Público\": \"Público\",\n", + " \"Taxi Colectivo\": \"Taxi\",\n", + " \"Bicicleta\": \"Activo\",\n", + " \"Caminata\": \"Activo\",\n", + " \"Auto\": \"Auto\",\n", + " \"Otros\": \"Otros\",\n", + " }\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "modo_x_actividad = grid_destinos.groupby([\"actividad\", \"ModoAgregado\", \"Sexo\"])[\"Peso\"].sum().unstack(\n", + " fill_value=0\n", + ").astype(int)\n", + "\n", + "modo_x_actividad.loc[\"Recurrentes\"].sort_index(ascending=False).plot(\n", + " kind=\"barh\", stacked=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "height_ratios = [1.2, 0.3, 0.5]\n", + "\n", + "y_size = sum(height_ratios) * 3\n", + "x_size = map_aspect_ratio * len(activity_order) * 3\n", + "\n", + "fig = plt.figure(figsize=(x_size, y_size))\n", + "fig.set_facecolor('#efefef')\n", + "\n", + "gs = gridspec.GridSpec(\n", + " len(height_ratios),\n", + " len(activity_order),\n", + " figure=fig,\n", + " hspace=0.15,\n", + " wspace=0.05,\n", + " height_ratios=height_ratios,\n", + ")\n", + "\n", + "for i, col in enumerate(activity_order):\n", + " ax_map = fig.add_subplot(gs[0,i])\n", + " ax_hist = fig.add_subplot(gs[1,i])\n", + "\n", + " choropleth_map(ax_map, hex_grid.geodf.join(grid_actividades, on='h3_cell_id'),col, k=7, binning='fisher_jenks', edgecolor='none', cbar_args=dict(\n", + " label=\"log-odds\",\n", + " height=\"60%\",\n", + " width=\"4%\",\n", + " orientation=\"vertical\",\n", + " location=\"center left\",\n", + " label_size=\"small\",\n", + " bbox_to_anchor=(0.0, 0.0, 0.9, 1.0),\n", + " ))\n", + " ax_map.set_title(col)\n", + " ax_map.set_aspect('equal')\n", + " ax_map.set_axis_off()\n", + "\n", + " _xlim = ax_map.get_xlim()\n", + " _ylim = ax_map.get_ylim()\n", + "\n", + " # geopandas cambia los límites\n", + " comunas.plot(ax=ax_map, edgecolor='grey', facecolor='none', linewidth=0.5)\n", + "\n", + " ax_map.set_xlim(_xlim)\n", + " ax_map.set_ylim(_ylim)\n", + "\n", + " sns.histplot(data=grid_destinos[grid_destinos['actividad'] == col], x='TiempoViaje', weights='Peso', stat='density', bins=[0, 15, 30, 45, 60, 90, 120], ax=ax_hist)\n", + " ax_hist.set_ylim([0, 0.025])\n", + "\n", + " ax_hist.set_xticks([0, 15, 30, 45, 60, 90, 120])\n", + " ax_hist.set_xlabel('Tiempo de viaje [minutos]')\n", + " ax_hist.tick_params(top=True, labeltop=True, bottom=False, labelbottom=False)\n", + "\n", + " if i > 0:\n", + " ax_hist.set_yticklabels([])\n", + " ax_hist.set_ylabel('')\n", + "\n", + " #break\n", + "\n", + " ax_modes = fig.add_subplot(gs[2,i])\n", + "\n", + " modo_x_actividad.loc[col].sort_index(ascending=False).div(1000).plot(\n", + " kind=\"barh\", stacked=True, ax=ax_modes, legend=(i == (len(activity_order) - 1)), edgecolor='none'\n", + " )\n", + "\n", + " if i > 0:\n", + " ax_modes.set_ylabel('')\n", + " ax_modes.set_yticklabels([])\n", + "\n", + " ax_modes.set_xlabel('# de viajes [miles]')\n", + " \n", + "fig.tight_layout()\n", + "fig.align_ylabels()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "aves", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}