Merge branch 'develop' of github.com:lolongcovas/freqtrade into feature/training_data_slice_normalization
This commit is contained in:
@@ -18,6 +18,7 @@ from sklearn.model_selection import train_test_split
|
||||
from sklearn.neighbors import NearestNeighbors
|
||||
|
||||
from freqtrade.configuration import TimeRange
|
||||
from freqtrade.constants import Config
|
||||
from freqtrade.exceptions import OperationalException
|
||||
from freqtrade.exchange import timeframe_to_seconds
|
||||
from freqtrade.strategy.interface import IStrategy
|
||||
@@ -57,7 +58,7 @@ class FreqaiDataKitchen:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: Dict[str, Any],
|
||||
config: Config,
|
||||
live: bool = False,
|
||||
pair: str = "",
|
||||
):
|
||||
@@ -465,27 +466,6 @@ class FreqaiDataKitchen:
|
||||
|
||||
return df
|
||||
|
||||
def remove_training_from_backtesting(
|
||||
self
|
||||
) -> DataFrame:
|
||||
"""
|
||||
Function which takes the backtesting time range and
|
||||
remove training data from dataframe, keeping only the
|
||||
startup_candle_count candles
|
||||
"""
|
||||
startup_candle_count = self.config.get('startup_candle_count', 0)
|
||||
tf = self.config['timeframe']
|
||||
tr = self.config["timerange"]
|
||||
|
||||
backtesting_timerange = TimeRange.parse_timerange(tr)
|
||||
if startup_candle_count > 0 and backtesting_timerange:
|
||||
backtesting_timerange.subtract_start(timeframe_to_seconds(tf) * startup_candle_count)
|
||||
|
||||
start = datetime.fromtimestamp(backtesting_timerange.startts, tz=timezone.utc)
|
||||
df = self.return_dataframe
|
||||
df = df.loc[df["date"] >= start, :]
|
||||
return df
|
||||
|
||||
def principal_component_analysis(self) -> None:
|
||||
"""
|
||||
Performs Principal Component Analysis on the data for dimensionality reduction
|
||||
@@ -774,12 +754,22 @@ class FreqaiDataKitchen:
|
||||
|
||||
def compute_inlier_metric(self, set_='train') -> None:
|
||||
"""
|
||||
|
||||
Compute inlier metric from backwards distance distributions.
|
||||
This metric defines how well features from a timepoint fit
|
||||
into previous timepoints.
|
||||
"""
|
||||
|
||||
def normalise(dataframe: DataFrame, key: str) -> DataFrame:
|
||||
if set_ == 'train':
|
||||
min_value = dataframe.min()
|
||||
max_value = dataframe.max()
|
||||
self.data[f'{key}_min'] = min_value
|
||||
self.data[f'{key}_max'] = max_value
|
||||
else:
|
||||
min_value = self.data[f'{key}_min']
|
||||
max_value = self.data[f'{key}_max']
|
||||
return (dataframe - min_value) / (max_value - min_value)
|
||||
|
||||
no_prev_pts = self.freqai_config["feature_parameters"]["inlier_metric_window"]
|
||||
|
||||
if set_ == 'train':
|
||||
@@ -824,7 +814,12 @@ class FreqaiDataKitchen:
|
||||
inliers = pd.DataFrame(index=distances.index)
|
||||
for key in distances.keys():
|
||||
current_distances = distances[key].dropna()
|
||||
fit_params = stats.weibull_min.fit(current_distances)
|
||||
current_distances = normalise(current_distances, key)
|
||||
if set_ == 'train':
|
||||
fit_params = stats.weibull_min.fit(current_distances)
|
||||
self.data[f'{key}_fit_params'] = fit_params
|
||||
else:
|
||||
fit_params = self.data[f'{key}_fit_params']
|
||||
quantiles = stats.weibull_min.cdf(current_distances, *fit_params)
|
||||
|
||||
df_inlier = pd.DataFrame(
|
||||
@@ -836,7 +831,7 @@ class FreqaiDataKitchen:
|
||||
|
||||
inlier_metric = pd.DataFrame(
|
||||
data=inliers.sum(axis=1) / no_prev_pts,
|
||||
columns=['inlier_metric'],
|
||||
columns=['%-inlier_metric'],
|
||||
index=compute_df.index
|
||||
)
|
||||
|
||||
@@ -900,11 +895,14 @@ class FreqaiDataKitchen:
|
||||
"""
|
||||
column_names = dataframe.columns
|
||||
features = [c for c in column_names if "%" in c]
|
||||
labels = [c for c in column_names if "&" in c]
|
||||
if not features:
|
||||
raise OperationalException("Could not find any features!")
|
||||
|
||||
self.training_features_list = features
|
||||
|
||||
def find_labels(self, dataframe: DataFrame) -> None:
|
||||
column_names = dataframe.columns
|
||||
labels = [c for c in column_names if "&" in c]
|
||||
self.label_list = labels
|
||||
|
||||
def check_if_pred_in_training_spaces(self) -> None:
|
||||
@@ -992,8 +990,6 @@ class FreqaiDataKitchen:
|
||||
|
||||
to_keep = [col for col in dataframe.columns if not col.startswith("&")]
|
||||
self.return_dataframe = pd.concat([dataframe[to_keep], self.full_df], axis=1)
|
||||
|
||||
self.return_dataframe = self.remove_training_from_backtesting()
|
||||
self.full_df = DataFrame()
|
||||
|
||||
return
|
||||
@@ -1227,7 +1223,8 @@ class FreqaiDataKitchen:
|
||||
|
||||
def get_unique_classes_from_labels(self, dataframe: DataFrame) -> None:
|
||||
|
||||
self.find_features(dataframe)
|
||||
# self.find_features(dataframe)
|
||||
self.find_labels(dataframe)
|
||||
|
||||
for key in self.label_list:
|
||||
if dataframe[key].dtype == object:
|
||||
|
||||
Reference in New Issue
Block a user