Skip to content

Commit

Permalink
working environment for phenotypes
Browse files Browse the repository at this point in the history
  • Loading branch information
markwilkinson committed Aug 7, 2024
1 parent 9a6bdfe commit 0313cb5
Showing 1 changed file with 82 additions and 31 deletions.
113 changes: 82 additions & 31 deletions content/FLAIR-GG/phenotypefrequency.ipynb
Original file line number Diff line number Diff line change
@@ -1,20 +1,13 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "039f7a57-a580-4d29-b126-25b51f67140e",
"metadata": {},
"source": [
"# Results explorer for the EJP Counting Case\n",
"<img src = \"https://vp.bgv.cbgp.upm.es/images/flair-gg-logo.png\" width = 700/>"
]
},
{
"cell_type": "markdown",
"id": "ed7ed56c-0088-4335-8d0c-7b9fdf2def85",
"metadata": {},
"source": [
"## Welcome to the FLAIR-GG Counting Case Analytics Notebook\n",
"## Welcome to the FLAIR-GG Phenotype Frequency Analytics Notebook\n",
"\n",
"The Phenotype Frequency Data Service will count the number of times a phenotype appears in a registry (on a per-patient basis - only counts once per patient)\n",
"\n",
"Please run the first cell to set-up the analytics environment\n",
"\n",
Expand Down Expand Up @@ -71,25 +64,25 @@
"# type,frequency\n",
"# http://purl.obolibrary.org/obo/NCIT_C131922,31\n",
"# http://purl.obolibrary.org/obo/NCIT_C136154,22\n",
"# http://www.orpha.net/ORDO/Orphanet_98896,32\n",
"# http://purl.obolibrary.org/obo/NCIT_C20197,76\n",
"# http://purl.obolibrary.org/obo/NCIT_C68615,87\n",
"# http://purl.obolibrary.org/obo/NCIT_C83164,87\n",
"# http://purl.obolibrary.org/obo/NCIT_C13306,24\n",
"\n",
"response = requests.get(url)\n",
"response = json.loads(response.content)\n",
"# print(response)\n",
"site = [\"sample1\", \"sample2\"]\n",
"count = [100, 3]\n",
"#print(response)\n",
"\n",
"data = {}\n",
"for provider in response.keys():\n",
" print(\"Provider: {}\".format(provider))\n",
" data = response[provider]\n",
" alllines = response[provider]\n",
" data[provider] = []\n",
"\n",
" data = data.splitlines().pop()\n",
" lines = alllines.splitlines() # this is the CSV\n",
" lines.pop(0) # get rid of header\n",
" # print(data)\n",
" for line in iter(lines):\n",
" [pheno, freq] = line.split(\",\")\n",
" data[provider].append([pheno, freq])\n",
"\n",
" site.append(provider)\n",
" count.append(int(data))\n",
"# print(data)\n",
"print(\"DONE\")"
]
},
Expand All @@ -100,15 +93,73 @@
"metadata": {},
"outputs": [],
"source": [
"source = pd.DataFrame({\n",
" 'site': site,\n",
" 'count': count\n",
"})\n",
"\n",
"alt.Chart(source).mark_bar().encode(\n",
" x='count',\n",
" y='site'\n",
")"
"data_list = []\n",
"for provider, phenotypes in data.items():\n",
" for phenotype, frequency in phenotypes:\n",
" data_list.append({\"Provider\": provider, \"Phenotype\": phenotype, \"Frequency\": frequency})\n",
"\n",
"# Create a DataFrame\n",
"df = pd.DataFrame(data_list)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ac295f7c",
"metadata": {},
"outputs": [],
"source": [
"# Simple Bar Chart\n",
"bar_chart = alt.Chart(df).mark_bar().encode(\n",
" x='Phenotype:N',\n",
" y='Frequency:Q',\n",
" color='Provider:N',\n",
" column='Provider:N'\n",
").properties(\n",
" title='Phenotype Frequencies by Provider'\n",
")\n",
"\n",
"bar_chart"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "86bee1fb",
"metadata": {},
"outputs": [],
"source": [
"# Grouped Bar Chart\n",
"grouped_bar_chart = alt.Chart(df).mark_bar().encode(\n",
" x=alt.X('Phenotype:N', title='Phenotype'),\n",
" y=alt.Y('Frequency:Q', title='Frequency'),\n",
" color=alt.Color('Provider:N', title='Provider'),\n",
" column=alt.Column('Provider:N', title='Provider')\n",
").properties(\n",
" title='Grouped Phenotype Frequencies by Provider'\n",
")\n",
"\n",
"grouped_bar_chart"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "54916d07",
"metadata": {},
"outputs": [],
"source": [
"# Heatmap\n",
"heatmap = alt.Chart(df).mark_rect().encode(\n",
" x='Phenotype:N',\n",
" y='Provider:N',\n",
" color=alt.Color('Frequency:Q', scale=alt.Scale(scheme='viridis')),\n",
" tooltip=['Provider', 'Phenotype', 'Frequency']\n",
").properties(\n",
" title='Phenotype Frequencies Heatmap'\n",
")\n",
"\n",
"heatmap"
]
},
{
Expand Down

0 comments on commit 0313cb5

Please sign in to comment.