Skip to content

Commit

Permalink
Adding at the root level for now; could consider moving to utilities/
Browse files Browse the repository at this point in the history
  • Loading branch information
misi9170 committed Jul 25, 2024
1 parent 0c68ac1 commit 3f9a2db
Showing 1 changed file with 141 additions and 0 deletions.
141 changes: 141 additions & 0 deletions flasc/flasc_dataframe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
from pandas import DataFrame

# Create a new DataFrame subclass
class FlascDataFrame(DataFrame):
"""
I think it makes most sense to store it as FLASC expects it:
- with the correct column names
- in wide format
Then, can offer a transformation to export as the user would like it, for them to work on it
further. How, then, would I revert it back to the needed format
"""
_metadata = ["new_property", "name_map", "newnew_property"] # Add to list, an initialize with Nones or similar
def __init__(self, *args, name_map=None, **kwargs):
super().__init__(*args, **kwargs)

self._flasc = True
# add an attribute here, make sure it's in the metadata
self.new_property = 23

self._user_format = "wide" # or "long" or "semiwide"

# check that name_map dictionary is valid
if name_map is not None:
if not isinstance(name_map, dict):
raise ValueError("name_map must be a dictionary")
if not all(isinstance(k, str) and isinstance(v, str) for k, v in name_map.items()):
raise ValueError("name_map must be a dictionary of strings")
self.name_map = name_map
# Apply the name_map
self.convert_to_flasc_format(inplace=True)

def flasc_method(self):
print("This is a method of the FlascDataFrame class")
self.newnew_property = 20

@property
def _constructor(self):
return FlascDataFrame

def __str__(self):
return "This is a FlascDataFrame!\n"+super().__str__()

def convert_to_user_format(self, inplace=False):
"""
Convert the DataFrame to the format that the user expects, given the name_map.
"""
if self.name_map is not None:
return self.rename(columns={v: k for k, v in self.name_map.items()}, inplace=inplace)
else:
return None if inplace else self.copy()

def convert_to_flasc_format(self, inplace=False):
"""
Convert the DataFrame to the format that FLASC expects.
"""
if self.name_map is not None:
return self.rename(columns=self.name_map, inplace=inplace)
else:
return None if inplace else self.copy()

def _convert_long_to_wide(self):
"""
Convert a long format DataFrame to a wide format DataFrame.
"""
pass

def _convert_semiwide_to_wide(self):
"""
Convert a semiwide format DataFrame to a wide format DataFrame.
"""
pass

def _convert_wide_to_long(self):
"""
Convert a wide format DataFrame to a long format DataFrame.
"""
pass

def _convert_wide_to_semiwide(self):
"""
Convert a wide format DataFrame to a semiwide format DataFrame.
"""
pass

# Likely this will be used for testing, later but it's convenient for prototyping here
if __name__ == "__main__":
df = FlascDataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, name_map={"a":"AA"})
print(df)
print(df.new_property)
df.flasc_method() # Assigns newnew_property
print(df._flasc)
print(df._metadata)

# Check that modifying df still returns an FlascDataFrame
print(type(df))
df.new_property = 42
df.name_map = 6
df = df.drop(columns="c") # Modifies the dataframe, returns a copy
print(df)
print(df.new_property)
print(df.name_map)
print(df.newnew_property) # Not retained with copy, unless in _metadata. If in _metadata, retained!


# Try out the convert methods (seem good)
data = {"AA": [1, 2, 3], "BB": [4, 5, 6], "CC": [7, 8, 9]}
df = FlascDataFrame(data, name_map={"AA": "a", "BB": "b", "CC": "c"})
print(df)
df2 = df.convert_to_user_format()
print(df2)
df.convert_to_user_format(inplace=True)
print(df)

# Drop a column, convert back
df = df.drop(columns="CC")
df.convert_to_flasc_format(inplace=True)
print(df)
# Works great!

# Next, the long format conversion... more complicated

"""
Two possible types of data we should try to handle:
1. Semiwide:
- One column for time stamp
- One column for turbine id
- Many data channel columns
2. Long:
- One column for time stamp
- One column for variable name
- One column for value
FLASC format is wide, i.e.
- One column for time stamp
- One column for each channel for each turbine
Converting between semilong and wide should be relatively straightforward.
Actually, neither of these should be too bad
"""

0 comments on commit 3f9a2db

Please sign in to comment.