diff --git a/tsfresh/convenience/bindings.py b/tsfresh/convenience/bindings.py index 6ed5e301..ca7847d3 100644 --- a/tsfresh/convenience/bindings.py +++ b/tsfresh/convenience/bindings.py @@ -14,6 +14,7 @@ def _feature_extraction_on_chunk_helper( column_value, default_fc_parameters, kind_to_fc_parameters, + show_warnings=True, ): """ Helper function wrapped around _do_extraction_on_chunk to use the correct format @@ -28,18 +29,23 @@ def _feature_extraction_on_chunk_helper( default_fc_parameters = {} if column_sort is not None: - df = df.sort_values(column_sort) - chunk = df[column_id].iloc[0], df[column_kind].iloc[0], df[column_value] + data = df[[column_sort, column_value]].set_index(column_sort) + data = data.sort_index(level=column_sort) + else: + data = df[column_value] + + chunk = df[column_id].iloc[0], df[column_kind].iloc[0], data features = _do_extraction_on_chunk( chunk, default_fc_parameters=default_fc_parameters, kind_to_fc_parameters=kind_to_fc_parameters, + show_warnings=show_warnings, ) features = pd.DataFrame(features, columns=[column_id, "variable", "value"]) features["value"] = features["value"].astype("double") - return features[[column_id, "variable", "value"]] + return features def dask_feature_extraction_on_chunk( @@ -50,6 +56,8 @@ def dask_feature_extraction_on_chunk( column_sort=None, default_fc_parameters=None, kind_to_fc_parameters=None, + taste_of_pandas_df=False, + show_warnings=True, ): """ Extract features on a grouped dask dataframe given the column names and the extraction settings. @@ -125,6 +133,14 @@ def dask_feature_extraction_on_chunk( :param column_value: The name for the column keeping the value itself. :type column_value: str + :param taste_of_pandas_df: pandas Dataframe with same preprocessing steps as the Dask Dataframe, (e.g. df.groupby(), etc.) + :type taste_of_pandas_df: pandas.DataFrame + + :param show_warnings: Wether to show warings in tsfresh. + :type show_warnings: bool + + + :return: A dask dataframe with the columns ``column_id``, "variable" and "value". The index is taken from the grouped dataframe. :rtype: dask.dataframe.DataFrame (id int64, variable object, value float64) @@ -138,11 +154,14 @@ def dask_feature_extraction_on_chunk( column_value=column_value, default_fc_parameters=default_fc_parameters, kind_to_fc_parameters=kind_to_fc_parameters, + show_warnings=show_warnings, ) - return df.apply( - feature_extraction, - meta=[(column_id, "int64"), ("variable", "object"), ("value", "float64")], - ) + if taste_of_pandas_df: + meta = taste_of_pandas_df.apply(feature_extraction) + else: + meta = [(column_id, "int64"), ("variable", "object"), ("value", "float64")] + + return df.apply(feature_extraction, meta=meta) def spark_feature_extraction_on_chunk( @@ -154,6 +173,7 @@ def spark_feature_extraction_on_chunk( default_fc_parameters=None, kind_to_fc_parameters=None, ): + """ Extract features on a grouped spark dataframe given the column names and the extraction settings. This wrapper function should only be used if you have a spark dataframe as input. diff --git a/tsfresh/feature_extraction/extraction.py b/tsfresh/feature_extraction/extraction.py index 7a87eadf..951f97bb 100644 --- a/tsfresh/feature_extraction/extraction.py +++ b/tsfresh/feature_extraction/extraction.py @@ -358,7 +358,7 @@ def _f(): continue x = data else: - x = data.values + x = data.values.flatten() if getattr(func, "fctype", None) == "combiner": result = func(x, param=parameter_list) diff --git a/tsfresh/feature_extraction/feature_calculators.py b/tsfresh/feature_extraction/feature_calculators.py index 84473d9a..dba3fc5c 100644 --- a/tsfresh/feature_extraction/feature_calculators.py +++ b/tsfresh/feature_extraction/feature_calculators.py @@ -2282,7 +2282,7 @@ def linear_trend_timewise(x, param): times_seconds = (ix - ix[0]).total_seconds() times_hours = np.asarray(times_seconds / float(3600)) - linReg = linregress(times_hours, x.values) + linReg = linregress(times_hours, x.values.flatten()) return [ ('attr_"{}"'.format(config["attr"]), getattr(linReg, config["attr"]))