Adding at the root level for now; could consider moving to utilities/

misi9170 · Jul 25, 2024 · 3f9a2db · 3f9a2db
1 parent 0c68ac1
commit 3f9a2db
Showing 1 changed file with 141 additions and 0 deletions.
diff --git a/flasc/flasc_dataframe.py b/flasc/flasc_dataframe.py
@@ -0,0 +1,141 @@
+from pandas import DataFrame
+
+# Create a new DataFrame subclass
+class FlascDataFrame(DataFrame):
+    """
+    I think it makes most sense to store it as FLASC expects it:
+    - with the correct column names
+    - in wide format
+
+    Then, can offer a transformation to export as the user would like it, for them to work on it
+    further. How, then, would I revert it back to the needed format
+    """
+    _metadata = ["new_property", "name_map", "newnew_property"] # Add to list, an initialize with Nones or similar
+    def __init__(self, *args, name_map=None, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        self._flasc = True
+        # add an attribute here, make sure it's in the metadata
+        self.new_property = 23
+
+        self._user_format = "wide" # or "long" or "semiwide"
+
+        # check that name_map dictionary is valid
+        if name_map is not None:
+            if not isinstance(name_map, dict):
+                raise ValueError("name_map must be a dictionary")
+            if not all(isinstance(k, str) and isinstance(v, str) for k, v in name_map.items()):
+                raise ValueError("name_map must be a dictionary of strings")
+        self.name_map = name_map
+        # Apply the name_map
+        self.convert_to_flasc_format(inplace=True)
+
+    def flasc_method(self):
+        print("This is a method of the FlascDataFrame class")
+        self.newnew_property = 20
+
+    @property
+    def _constructor(self):
+        return FlascDataFrame
+
+    def __str__(self):
+        return "This is a FlascDataFrame!\n"+super().__str__()
+
+    def convert_to_user_format(self, inplace=False):
+        """
+        Convert the DataFrame to the format that the user expects, given the name_map.
+        """
+        if self.name_map is not None:
+            return self.rename(columns={v: k for k, v in self.name_map.items()}, inplace=inplace)
+        else:
+            return None if inplace else self.copy()
+
+    def convert_to_flasc_format(self, inplace=False):
+        """
+        Convert the DataFrame to the format that FLASC expects.
+        """
+        if self.name_map is not None:
+            return self.rename(columns=self.name_map, inplace=inplace)
+        else:
+            return None if inplace else self.copy()
+
+    def _convert_long_to_wide(self):
+        """
+        Convert a long format DataFrame to a wide format DataFrame.
+        """
+        pass
+
+    def _convert_semiwide_to_wide(self):
+        """
+        Convert a semiwide format DataFrame to a wide format DataFrame.
+        """
+        pass
+
+    def _convert_wide_to_long(self):
+        """
+        Convert a wide format DataFrame to a long format DataFrame.
+        """
+        pass
+
+    def _convert_wide_to_semiwide(self):
+        """
+        Convert a wide format DataFrame to a semiwide format DataFrame.
+        """
+        pass
+
+# Likely this will be used for testing, later but it's convenient for prototyping here
+if __name__ == "__main__":
+    df = FlascDataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, name_map={"a":"AA"})
+    print(df)
+    print(df.new_property)
+    df.flasc_method() # Assigns newnew_property
+    print(df._flasc)
+    print(df._metadata)
+
+    # Check that modifying df still returns an FlascDataFrame
+    print(type(df))
+    df.new_property = 42
+    df.name_map = 6
+    df = df.drop(columns="c") # Modifies the dataframe, returns a copy
+    print(df)
+    print(df.new_property)
+    print(df.name_map)
+    print(df.newnew_property) # Not retained with copy, unless in _metadata. If in _metadata, retained!
+
+
+    # Try out the convert methods (seem good)
+    data = {"AA": [1, 2, 3], "BB": [4, 5, 6], "CC": [7, 8, 9]}
+    df = FlascDataFrame(data, name_map={"AA": "a", "BB": "b", "CC": "c"})
+    print(df)
+    df2 = df.convert_to_user_format()
+    print(df2)
+    df.convert_to_user_format(inplace=True)
+    print(df)
+
+    # Drop a column, convert back
+    df = df.drop(columns="CC")
+    df.convert_to_flasc_format(inplace=True)
+    print(df)
+    # Works great!
+
+    # Next, the long format conversion... more complicated
+
+    """
+    Two possible types of data we should try to handle:
+    1. Semiwide:
+    - One column for time stamp
+    - One column for turbine id
+    - Many data channel columns
+    2. Long:
+    - One column for time stamp
+    - One column for variable name
+    - One column for value
+
+    FLASC format is wide, i.e.
+    - One column for time stamp
+    - One column for each channel for each turbine
+
+    Converting between semilong and wide should be relatively straightforward.
+    Actually, neither of these should be too bad
+    """
+