Skip to content

Commit

Permalink
Scripts and images added
Browse files Browse the repository at this point in the history
  • Loading branch information
haavardaagesen committed May 28, 2021
1 parent a4225ce commit 105aac8
Show file tree
Hide file tree
Showing 66 changed files with 3,841 additions and 0 deletions.
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,22 @@
# twitter-nordic-cross-border-mobility
Processing and analysis steps used in MSc thesis "Understanding Functional Cross-border Regions from Twitter Data in the Nordics".

## Usage

Data used in this thesis is not availble in this repo. The Twitter data are collected using the Twitter API, to collect this data, look at the tool *tweetsearcher* created by Tuomo Väisänen: [https://github.com/DigitalGeographyLab/tweetsearcher](https://github.com/DigitalGeographyLab/tweetsearcher).

Data pre-processing and cleaning is found in *nordic_line_creation.py*, *nordic_data_cleaning.py*, and regions are assigned in *nordic_assign_regions.py*.

The Jupyter Notebook *nordic_cross_border_mobility.ipynb* lists out the steps of data exploration, temporal analyis, correlation analysis, and connectedness explorations.

General maps are created with *all_points_maps.py* and *all_lines_maps.py*. The calculations of functional cross-border regions can be found in *functional_area_maps.py*.

Images from the thesis is found in the *imgs* folder.

## Acknowledgements

The thesis and is conducted in cooperation with the [Digital Geography Lab](https://www2.helsinki.fi/en/researchgroups/digital-geography-lab) at the University of Helsinki and as a part of the *BORDERSPACE -- Tracing Interactions and Mobilities Beyond State Borders: Towards New Transnational Spaces* [project](https://www2.helsinki.fi/en/researchgroups/digital-geography-lab/mobilities-and-interactions-of-people-crossing-state-borders-big-data-to-reveal-transnational-people-and-spaces).

Collection of data is based upon the tool *tweetsearcher* created by Tuomo Väisänen: [https://github.com/DigitalGeographyLab/tweetsearcher](https://github.com/DigitalGeographyLab/tweetsearcher)

Methodology builds upon the work of Samuli Massinen and his repo: [https://github.com/DigitalGeographyLab/cross-border-mobility-twitter](https://github.com/DigitalGeographyLab/cross-border-mobility-twitter)
175 changes: 175 additions & 0 deletions all_lines_maps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
import contextily as ctx
import csv
from datetime import datetime
import db_connection as db_con
import geojson
import geopandas as gpd
import folium
import io
from io import StringIO
import json
from matplotlib import rcParams
import matplotlib as mpl
import matplotlib.pyplot as plt
import multiprocessing as mp
from multiprocessing import Pool
import numpy as np
import operator
import os
import pandas as pd
import psycopg2
import psycopg2.extras as extras
from shapely import wkt
from shapely.geometry import Point, LineString, Polygon
from sqlalchemy import create_engine, func, distinct
import sys
import tempfile
starttime = datetime.now()

def read_sql_inmem_uncompressed(query, db_engine):
copy_sql = "COPY ({query}) TO STDOUT WITH CSV {head}".format(query=query, head="HEADER")
conn = db_engine.raw_connection()
cur = conn.cursor()
store = io.StringIO()
cur.copy_expert(copy_sql, store)
store.seek(0)
df = pd.read_csv(store)
cur.close()
return df
# Import credential and connection settings
db_name = db_con.db_name
db_username = db_con.db_username
db_host = db_con.db_host
db_port = db_con.db_port
db_pwd = db_con.db_pwd
engine_string = f"postgresql://{db_username}:{db_pwd}@{db_host}:{db_port}/{db_name}"
db_engine = create_engine(engine_string)


# SQL-query
query = 'SELECT id, row_id, geometry,cb_move FROM pre_covid_lines'
# Read data to dataframe
pre_lines = db_con.read_sql_inmem_uncompressed(query, db_engine)
# Apply wkt
pre_lines['geometry'] = pre_lines['geometry'].apply(wkt.loads)
# Convert to GeoDataFrame
pre_lines_gdf = gpd.GeoDataFrame(pre_lines, geometry='geometry')
# CRS
pre_lines_gdf.crs = "EPSG:4326"
pre_lines_gdf = pre_lines_gdf.to_crs(epsg=3035)
# Delete dataframe
del pre_lines
fig, ax = plt.subplots(ncols = 1, figsize=(20,16))
pre_lines_gdf.to_crs(epsg=3035).plot(ax=ax, color='green', edgecolor='black', linewidth=0.3, alpha=0.5) # 2 - Projected plot

ax.axis("off")
ctx.add_basemap(ax,crs=pre_lines_gdf.crs.to_string(), source=ctx.providers.CartoDB.Positron)

plt.tight_layout()
plt.savefig('imgs/nordic_all_pre_lines.png', dpi=100)
#del pre_lines_gdf

# Pre Heatmaps
fig, ax = plt.subplots(ncols = 1, figsize=(20,16))
pre_lines_gdf.to_crs(epsg=3035).plot(ax=ax, color='blue', edgecolor='black', linewidth=0.2, alpha=0.01) # 2 - Projected plot

ax.axis("off")
ctx.add_basemap(ax,crs=pre_lines_gdf.crs.to_string(), source=ctx.providers.CartoDB.Positron)

plt.tight_layout()
plt.savefig('imgs/nordic_all_pre_lines_heat.png', dpi=100)

# COVID-19 data

# SQL-query
query = 'SELECT id, row_id, geometry, cb_move FROM post_covid_lines'
# Read data to dataframe
post_lines = db_con.read_sql_inmem_uncompressed(query, db_engine)
# Apply wkt
post_lines['geometry'] = post_lines['geometry'].apply(wkt.loads)
# Convert to GeoDataFrame
post_lines_gdf = gpd.GeoDataFrame(post_lines, geometry='geometry')
# CRS
post_lines_gdf.crs = "EPSG:4326"
post_lines_gdf = post_lines_gdf.to_crs(epsg=3035)
# Delete dataframe
del post_lines

# Plot all lines
fig, ax = plt.subplots(ncols = 1, figsize=(20,16))
post_lines_gdf.to_crs(epsg=3035).plot(ax=ax, color='green', edgecolor='black', linewidth=0.3, alpha=0.5) # 2 - Projected plot
ctx.add_basemap(ax,crs=pre_lines_gdf.crs.to_string(), source=ctx.providers.CartoDB.Positron)

ax.axis("off")
plt.tight_layout()
plt.savefig('imgs/nordic_all_post_lines.png', dpi=100)


fig, axes = plt.subplots(ncols = 2, figsize=(20,16))

ax11 = axes[0]
ax12 = axes[1]

pre_lines_gdf.to_crs(epsg=3035).plot(ax=ax11, color='green', edgecolor='black', linewidth=0.3, alpha=0.5) # 2 - Projected plot
post_lines_gdf.to_crs(epsg=3035).plot(ax=ax12, color='green', edgecolor='black', linewidth=0.3, alpha=0.5) # 2 - Projected plot

ctx.add_basemap(ax11,crs=pre_lines_gdf.crs.to_string(), source=ctx.providers.CartoDB.Positron)
ctx.add_basemap(ax12,crs=pre_lines_gdf.crs.to_string(), source=ctx.providers.CartoDB.Positron)

ax11.axis("off")
ax12.axis("off")
ax11.set_title('Pre-COVID-19',)
ax12.set_title('COVID-19',)
plt.tight_layout()
plt.savefig('imgs/all_lines.png', dpi=100)

# Plot All Travels Heatmap
fig, ax = plt.subplots(ncols = 1, figsize=(20,16))

pre_lines_gdf.to_crs(epsg=3035).plot(ax=ax, color='blue', edgecolor='black', linewidth=0.2, alpha=0.01) # 2 - Projected plot
post_lines_gdf.to_crs(epsg=3035).plot(ax=ax, color='blue', edgecolor='black', linewidth=0.2, alpha=0.01) # 2 - Projected plot
xlim = ([pre_lines_gdf.total_bounds[0], pre_lines_gdf.total_bounds[2]])

ylim = ([pre_lines_gdf.total_bounds[1], 5416499.996586122])
ax.set_xlim(xlim)
ax.set_ylim(ylim)
ctx.add_basemap(ax,crs=pre_lines_gdf.crs.to_string(), source=ctx.providers.CartoDB.Positron)

ax.axis("off")
ax.set_title('Heatmap All Travels Nordic Region',)
plt.tight_layout()
plt.savefig('imgs/all_lines_heat.png', dpi=100)

country_dict = {'FI':'Finland','DK':'Denmark','IS':'Iceland','NO':'Norway','SE':'Sweden'}
def country_heatmap(country):
country_name = country_dict[country]
all_pre_lines = pre_lines_gdf[pre_lines_gdf['cb_move'].str.contains(country)]
all_post_lines = post_lines_gdf[post_lines_gdf['cb_move'].str.contains(country)]
all_lines = all_pre_lines.append(all_post_lines)
fig, ax = plt.subplots(ncols = 1, figsize=(20,16))
all_lines.to_crs(epsg=3035).plot(ax=ax, color='blue', edgecolor='black', linewidth=0.2, alpha=0.01) # 2 - Projected plot
xlim = ([all_lines.total_bounds[0], all_lines.total_bounds[2]])
#ylim = ([all_lines.total_bounds[1], all_lines.total_bounds[3]])
ylim = ([all_lines.total_bounds[1], 5416499.996586122])
ax.set_xlim(xlim)
ax.set_ylim(ylim)
ctx.add_basemap(ax,crs=all_lines.crs.to_string(), source=ctx.providers.CartoDB.Positron)
ax.axis("off")
title_string = f"Heatmap All Travels To and From {country_name}"
ax.set_title(title_string,)
plt.tight_layout()
file_string = f"{country}_heatmap.png"
plt.savefig(f'imgs/heatmaps/{file_string}', transparent=True, dpi=100)
print(f'{country_name} heatmap created')

country_heatmap_list = ['FI','DK','IS','NO','SE']

def heatmap_multi(setting):
country_heatmap(setting)

a_pool = mp.Pool(15)
result = a_pool.map(country_heatmap, country_heatmap_list)
a_pool.close()
a_pool.join()

print(f'Script took: {datetime.now()-starttime}')
105 changes: 105 additions & 0 deletions all_points_maps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import contextily as ctx
import csv
from datetime import datetime
import db_connection as db_con
import geojson
import geopandas as gpd
import folium
import io
from io import StringIO
import json
from matplotlib import rcParams
import matplotlib as mpl
import matplotlib.pyplot as plt
import multiprocessing as mp
from multiprocessing import Pool
import numpy as np
import operator
import os
import pandas as pd
import psycopg2
import psycopg2.extras as extras
from shapely import wkt
from shapely.geometry import Point, LineString, Polygon
from sqlalchemy import create_engine, func, distinct
import sys
import tempfile
starttime = datetime.now()

def read_sql_inmem_uncompressed(query, db_engine):
copy_sql = "COPY ({query}) TO STDOUT WITH CSV {head}".format(query=query, head="HEADER")
conn = db_engine.raw_connection()
cur = conn.cursor()
store = io.StringIO()
cur.copy_expert(copy_sql, store)
store.seek(0)
df = pd.read_csv(store)
cur.close()
return df
# Import credential and connection settings
db_name = db_con.db_name
db_username = db_con.db_username
db_host = db_con.db_host
db_port = db_con.db_port
db_pwd = db_con.db_pwd
engine_string = f"postgresql://{db_username}:{db_pwd}@{db_host}:{db_port}/{db_name}"
db_engine = create_engine(engine_string)


# SQL-query
query = 'SELECT id, lon,lat FROM pre_covid'
# Read data to dataframe
pre_points = db_con.read_sql_inmem_uncompressed(query, db_engine)

fig, ax = plt.subplots(ncols = 1, figsize=(20,16))
pre_points_gdf = gpd.GeoDataFrame(pre_points, geometry=gpd.points_from_xy(pre_points['lon'], pre_points['lat']))
pre_points_gdf.crs = "EPSG:4326"
pre_points_gdf = pre_points_gdf.to_crs(epsg=3035)
pre_points_gdf.to_crs(epsg=3035).plot(ax=ax, color='green', edgecolor='black', linewidth=0.3, alpha=0.5) # 2 - Projected plot

ax.axis("off")
ctx.add_basemap(ax,crs=pre_points_gdf.crs.to_string(), source=ctx.providers.CartoDB.Positron)

plt.tight_layout()
plt.savefig('imgs/nordic_all_pre_points.png', dpi=100)

del pre_points

query = 'SELECT id, lon,lat FROM post_covid'
# Read data to dataframe
post_points = db_con.read_sql_inmem_uncompressed(query, db_engine)

fig, ax = plt.subplots(ncols = 1, figsize=(20,16))
post_points_gdf = gpd.GeoDataFrame(post_points, geometry=gpd.points_from_xy(post_points['lon'], post_points['lat']))
post_points_gdf.crs = "EPSG:4326"
post_points_gdf = post_points_gdf.to_crs(epsg=3035)
post_points_gdf.to_crs(epsg=3035).plot(ax=ax, color='green', edgecolor='black', linewidth=0.3, alpha=0.5) # 2 - Projected plot

ctx.add_basemap(ax,crs=pre_points_gdf.crs.to_string(), source=ctx.providers.CartoDB.Positron)

ax.axis("off")


plt.tight_layout()
plt.savefig('imgs/nordic_all_post_points.png', dpi=100)

del post_points

fig, axes = plt.subplots(ncols = 2, figsize=(20,16))

ax11 = axes[0]
ax12 = axes[1]

pre_points_gdf.to_crs(epsg=3035).plot(ax=ax11, color='green', edgecolor='black', linewidth=0.3, alpha=0.5) # 2 - Projected plot
post_points_gdf.to_crs(epsg=3035).plot(ax=ax12, color='green', edgecolor='black', linewidth=0.3, alpha=0.5) # 2 - Projected plot

ctx.add_basemap(ax11,crs=pre_points_gdf.crs.to_string(), source=ctx.providers.CartoDB.Positron)
ctx.add_basemap(ax12,crs=pre_points_gdf.crs.to_string(), source=ctx.providers.CartoDB.Positron)

ax11.axis("off")
ax12.axis("off")
ax11.set_title('Pre-COVID-19',)
ax12.set_title('Post-COVID-19',)
plt.tight_layout()
plt.savefig('imgs/all_points.png', dpi=100)
print(f'Script took: {datetime.now()-starttime}')
5 changes: 5 additions & 0 deletions credentials_template.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Set up database credentials
db_username = ''
db_pwd = ''
db_name = ''
db_host = ''
35 changes: 35 additions & 0 deletions db_connection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import credentials
import io
from io import StringIO
import pandas as pd
import psycopg2
import psycopg2.extras as extras
from sqlalchemy import create_engine, func, distinct


db_username = credentials.db_username
db_pwd = credentials.db_pwd

db_name = credentials.db_name
db_host = credentials.db_host
db_port = 5432

psyco_con = psycopg2.connect(database=db_name, user=db_username, password=db_pwd,
host=db_host)


engine_string = f"postgresql://{db_username}:{db_pwd}@{db_host}:{db_port}/{db_name}"

db_engine = create_engine(engine_string)

def read_sql_inmem_uncompressed(query, db_engine):
copy_sql = "COPY ({query}) TO STDOUT WITH CSV {head}".format(query=query, head="HEADER")
conn = db_engine.raw_connection()
cur = conn.cursor()
store = io.StringIO()
cur.copy_expert(copy_sql, store)
store.seek(0)
df = pd.read_csv(store)
cur.close()
return df

Loading

0 comments on commit 105aac8

Please sign in to comment.