Skip to content

Commit

Permalink
Support data input as DataFrame in Clustergram. (#478)
Browse files Browse the repository at this point in the history
* Write test for dataframe input of Clustergram

* Support data input as DataFrame in Clustergram

* Add integration test for Clustergram reading in dataframes

* Lint and standardize imports

* Log enhancement for upcoming release

* Update version number in package.

* Update autogenerated files.

* Update CHANGELOG with release date.

Co-authored-by: Shammamah Hossain  <[email protected]>
  • Loading branch information
mkcor and Shammamah Hossain authored Feb 21, 2020
1 parent 431f745 commit 9523a2c
Show file tree
Hide file tree
Showing 7 changed files with 79 additions and 24 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Changelog

## [0.4.7] - 2020-02-21
### Added
* [#478](https://github.com/plotly/dash-bio/pull/478) Added support of
DataFrame as Clustergram input.

## [0.4.6] - 2020-01-07
### Fixed
* [#458](https://github.com/plotly/dash-bio/pull/458) Fixed reordering bug of
Expand Down
2 changes: 1 addition & 1 deletion dash_bio/bundle.js

Large diffs are not rendered by default.

11 changes: 8 additions & 3 deletions dash_bio/component_factory/_clustergram.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from random import shuffle

import numpy as np
import pandas as pd
import scipy
import scipy.cluster.hierarchy as sch
import scipy.spatial as scs
Expand All @@ -13,7 +14,7 @@

# pylint: disable=assignment-from-no-return, no-self-use
def Clustergram(
data=None,
data,
generate_curves_dict=False,
return_computed_traces=False,
computed_traces=None,
Expand Down Expand Up @@ -49,7 +50,8 @@ def Clustergram(
Keyword arguments:
- data (ndarray; required): Matrix of observations as array of arrays
- data (2D array-like; required): Matrix or table of observations (dropping
columns of non-numeric dtype).
- generate_curves_dict (bool; default False): Whether or not to return a
dictionary containing information about the cluster number
associated with each curve number in the graph. (May be useful
Expand Down Expand Up @@ -200,7 +202,7 @@ class _Clustergram:

def __init__(
self,
data=None,
data,
row_labels=None,
column_labels=None,
hidden_labels=None,
Expand Down Expand Up @@ -234,6 +236,9 @@ def __init__(
See docstring of the `Clustergram` function, where the same keyword arguments (and a couple
of other ones) are documented.
"""
if isinstance(data, pd.DataFrame):
data = data.select_dtypes('number')
data = data.values
if hidden_labels is None:
hidden_labels = []
if color_threshold is None:
Expand Down
2 changes: 1 addition & 1 deletion dash_bio/package-info.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"name": "dash_bio", "version": "0.4.6", "author": "The Plotly Team <[email protected]>"}
{"name": "dash_bio", "version": "0.4.7", "author": "The Plotly Team <[email protected]>"}
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "dash-bio",
"version": "0.4.6",
"version": "0.4.7",
"description": "Dash components for bioinformatics",
"repository": {
"type": "git",
Expand Down
33 changes: 30 additions & 3 deletions tests/integration/test_clustergram.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
import pandas
import json
import pandas as pd

import dash
import dash_bio
import dash_html_components as html
import dash_bio

from common_features import nested_component_layout, \
nested_component_app_callback

_data = None

_mtcars_data = pandas.read_csv(
_mtcars_data = pd.read_csv(
'tests/dashbio_demos/dash-clustergram/data/mtcars.tsv',
delimiter='\t',
skiprows=4
Expand Down Expand Up @@ -185,3 +185,30 @@ def test_dbcl005_row_annotations(dash_duo):
# the annotation is the correct color
dash_duo.wait_for_style_to_equal(
'g.subplot.x6y6 g.plot g.lines > path', 'stroke', 'rgb(248, 62, 199)')


def test_dbcl006_df_input_row_cluster(dash_duo):

app = dash.Dash(__name__)

# run the same test as dbcl002 (row clustering) where table of
# observations (data argument) is left as a DataFrame
assert isinstance(_mtcars_data, pd.DataFrame)
app.layout = html.Div(nested_component_layout(
dash_bio.Clustergram(
data=_mtcars_data
)
))

nested_component_app_callback(
app,
dash_duo,
component=dash_bio.Clustergram,
component_data=_data,
test_prop_name='cluster',
test_prop_value='row',
prop_value_type='string'
)

assert len(dash_duo.find_elements('g.subplot.x2y2')) == 0
assert len(dash_duo.find_elements('g.subplot.x4y4')) == 1
48 changes: 33 additions & 15 deletions tests/unit/test_clustergram.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,51 @@
import numpy as np
import pandas as pd

from dash_bio import Clustergram

DATA = np.array(
[[1, 1, 1, 1],
[3, 3, 3, 3],
[1, 1, 1, 1],
[3, 3, 3, 3],
[1, 1, 1, 1],
[3, 3, 3, 3]]
)
CLUSTERED_DATA = np.array(
[[1, 1, 1, 1],
[1, 1, 1, 1],
[1, 1, 1, 1],
[3, 3, 3, 3],
[3, 3, 3, 3],
[3, 3, 3, 3]]
)


def test_cluster_rows():
"""Test that rows of 1's and 3's are properly clustered."""
data = np.array(
[[1, 1, 1, 1],
[3, 3, 3, 3],
[1, 1, 1, 1],
[3, 3, 3, 3],
[1, 1, 1, 1],
[3, 3, 3, 3]]
)

data = DATA
_, _, curves_dict = Clustergram(
data,
generate_curves_dict=True,
return_computed_traces=True,
center_values=False
)
clustered_data = CLUSTERED_DATA

assert np.array_equal(curves_dict['heatmap']['z'], clustered_data)


def test_read_dataframe():
"""Test that input data can be in a dataframe."""

clustered_data = np.array(
[[1, 1, 1, 1],
[1, 1, 1, 1],
[1, 1, 1, 1],
[3, 3, 3, 3],
[3, 3, 3, 3],
[3, 3, 3, 3]]
data = pd.DataFrame(DATA)
_, _, curves_dict = Clustergram(
data,
generate_curves_dict=True,
return_computed_traces=True,
center_values=False
)
clustered_data = CLUSTERED_DATA

assert np.array_equal(curves_dict['heatmap']['z'], clustered_data)

0 comments on commit 9523a2c

Please sign in to comment.