Merge branch 'feat/freqai' of https://github.com/lolongcovas/freqtrade into feat/freqai

This commit is contained in:
longyu
2022-07-11 22:53:37 +02:00
14 changed files with 489 additions and 510 deletions

View File

@@ -174,9 +174,10 @@ def _validate_freqai(conf: Dict[str, Any]) -> None:
for param in constants.SCHEMA_FREQAI_REQUIRED:
if param not in conf.get('freqai', {}):
raise OperationalException(
f'{param} not found in Freqai config'
)
if param not in conf.get('freqai', {}).get('feature_parameters', {}):
raise OperationalException(
f'{param} not found in Freqai config'
)
def _validate_whitelist(conf: Dict[str, Any]) -> None:

View File

@@ -477,16 +477,16 @@ CONF_SCHEMA = {
"freqai": {
"type": "object",
"properties": {
"timeframes": {"type": "list"},
"train_period": {"type": "integer", "default": 0},
"backtest_period": {"type": "float", "default": 7},
"train_period_days": {"type": "integer", "default": 0},
"backtest_period_days": {"type": "float", "default": 7},
"identifier": {"type": "str", "default": "example"},
"corr_pairlist": {"type": "list"},
"feature_parameters": {
"type": "object",
"properties": {
"period": {"type": "integer"},
"shift": {"type": "integer", "default": 0},
"include_corr_pairlist": {"type": "list"},
"include_timeframes": {"type": "list"},
"label_period_candles": {"type": "integer"},
"include_shifted_candles": {"type": "integer", "default": 0},
"DI_threshold": {"type": "float", "default": 0},
"weight_factor": {"type": "number", "default": 0},
"principal_component_analysis": {"type": "boolean", "default": False},
@@ -555,11 +555,11 @@ SCHEMA_MINIMAL_REQUIRED = [
]
SCHEMA_FREQAI_REQUIRED = [
'timeframes',
'train_period',
'backtest_period',
'include_timeframes',
'train_period_days',
'backtest_period_days',
'identifier',
'corr_pairlist',
'include_corr_pairlist',
'feature_parameters',
'data_split_parameters',
'model_training_parameters'

View File

@@ -38,12 +38,14 @@ class FreqaiDataDrawer:
self.model_return_values: Dict[str, Any] = {}
self.pair_data_dict: Dict[str, Any] = {}
self.historic_data: Dict[str, Any] = {}
self.historic_predictions: Dict[str, Any] = {}
self.follower_dict: Dict[str, Any] = {}
self.full_path = full_path
self.follow_mode = follow_mode
if follow_mode:
self.create_follower_dict()
self.load_drawer_from_disk()
self.load_historic_predictions_from_disk()
self.training_queue: Dict[str, int] = {}
self.history_lock = threading.Lock()
@@ -68,6 +70,29 @@ class FreqaiDataDrawer:
return exists
def load_historic_predictions_from_disk(self):
"""
Locate and load a previously saved historic predictions.
:returns:
exists: bool = whether or not the drawer was located
"""
exists = Path(self.full_path / str("historic_predictions.json")).resolve().exists()
if exists:
with open(self.full_path / str("historic_predictions.json"), "r") as fp:
self.pair_dict = json.load(fp)
logger.info(f"Found existing historic predictions at {self.full_path}, but beware of "
"that statistics may be inaccurate if the bot has been offline for "
"an extended period of time.")
elif not self.follow_mode:
logger.info("Could not find existing historic_predictions, starting from scratch")
else:
logger.warning(
f"Follower could not find historic predictions at {self.full_path} "
"sending null values back to strategy"
)
return exists
def save_drawer_to_disk(self):
"""
Save data drawer full of all pair model metadata in present model folder.
@@ -75,6 +100,13 @@ class FreqaiDataDrawer:
with open(self.full_path / str("pair_dictionary.json"), "w") as fp:
json.dump(self.pair_dict, fp, default=self.np_encoder)
def save_historic_predictions_to_disk(self):
"""
Save data drawer full of all pair model metadata in present model folder.
"""
with open(self.full_path / str("historic_predictions.json"), "w") as fp:
json.dump(self.historic_predictions, fp, default=self.np_encoder)
def save_follower_dict_to_disk(self):
"""
Save follower dictionary to disk (used by strategy for persistent prediction targets)
@@ -176,16 +208,18 @@ class FreqaiDataDrawer:
historical candles, and also stores historical predictions despite retrainings (so stored
predictions are true predictions, not just inferencing on trained data)
"""
self.model_return_values[pair] = pd.DataFrame()
# dynamic df returned to strategy and plotted in frequi
mrv_df = self.model_return_values[pair] = pd.DataFrame()
for label in dk.label_list:
self.model_return_values[pair][label] = pred_df[label]
self.model_return_values[pair][f"{label}_mean"] = dk.data["labels_mean"][label]
self.model_return_values[pair][f"{label}_std"] = dk.data["labels_std"][label]
mrv_df[label] = pred_df[label]
mrv_df[f"{label}_mean"] = dk.data["labels_mean"][label]
mrv_df[f"{label}_std"] = dk.data["labels_std"][label]
if self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0) > 0:
self.model_return_values[pair]["DI_values"] = dk.DI_values
mrv_df["DI_values"] = dk.DI_values
self.model_return_values[pair]["do_predict"] = do_preds
mrv_df["do_predict"] = do_preds
def append_model_predictions(self, pair: str, predictions, do_preds, dk, len_df) -> None:
@@ -201,6 +235,13 @@ class FreqaiDataDrawer:
i = length_difference + 1
df = self.model_return_values[pair] = self.model_return_values[pair].shift(-i)
hp_df = self.historic_predictions[pair]
# here are some pandas hula hoops to accommodate the possibility of a series
# or dataframe depending number of labels requested by user
nan_df = pd.DataFrame(np.nan, index=hp_df.index[-2:] + 2, columns=hp_df.columns)
hp_df = pd.concat([hp_df, nan_df], ignore_index=True, axis=0)
hp_df = pd.concat([hp_df, nan_df[-2:-1]], axis=0)
for label in dk.label_list:
df[label].iloc[-1] = predictions[label].iloc[-1]
@@ -212,6 +253,9 @@ class FreqaiDataDrawer:
if self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0) > 0:
df["DI_values"].iloc[-1] = dk.DI_values[-1]
# append the new predictions to persistent storage
hp_df.iloc[-1] = df[label].iloc[-1]
if length_difference < 0:
prepend_df = pd.DataFrame(
np.zeros((abs(length_difference) - 1, len(df.columns))), columns=df.columns

View File

@@ -26,6 +26,7 @@ from freqtrade.strategy.interface import IStrategy
SECONDS_IN_DAY = 86400
SECONDS_IN_HOUR = 3600
logger = logging.getLogger(__name__)
@@ -59,13 +60,13 @@ class FreqaiDataKitchen:
self.set_all_pairs()
if not self.live:
self.full_timerange = self.create_fulltimerange(
self.config["timerange"], self.freqai_config.get("train_period")
self.config["timerange"], self.freqai_config.get("train_period_days")
)
(self.training_timeranges, self.backtesting_timeranges) = self.split_timerange(
self.full_timerange,
config["freqai"]["train_period"],
config["freqai"]["backtest_period"],
config["freqai"]["train_period_days"],
config["freqai"]["backtest_period_days"],
)
# self.strat_dataframe: DataFrame = strat_dataframe
self.dd = data_drawer
@@ -137,19 +138,6 @@ class FreqaiDataKitchen:
self.dd.pair_dict[coin]["data_path"] = str(self.data_path)
self.dd.save_drawer_to_disk()
# TODO add a helper function to let user save/load any data they are custom adding. We
# do not want them having to edit the default save/load methods here. Below is an example
# of what we do NOT want.
# if self.freqai_config.get('feature_parameters','determine_statistical_distributions'):
# self.data_dictionary["upper_quantiles"].to_pickle(
# save_path / str(self.model_filename + "_upper_quantiles.pkl")
# )
# self.data_dictionary["lower_quantiles"].to_pickle(
# save_path / str(self.model_filename + "_lower_quantiles.pkl")
# )
return
def load_data(self, coin: str = "", keras_model=False) -> Any:
@@ -183,22 +171,6 @@ class FreqaiDataKitchen:
self.data_path / str(self.model_filename + "_trained_df.pkl")
)
# TODO add a helper function to let user save/load any data they are custom adding. We
# do not want them having to edit the default save/load methods here. Below is an example
# of what we do NOT want.
# if self.freqai_config.get('feature_parameters','determine_statistical_distributions'):
# self.data_dictionary["upper_quantiles"] = pd.read_pickle(
# self.data_path / str(self.model_filename + "_upper_quantiles.pkl")
# )
# self.data_dictionary["lower_quantiles"] = pd.read_pickle(
# self.data_path / str(self.model_filename + "_lower_quantiles.pkl")
# )
# self.data_path = Path(self.data["data_path"])
# self.model_filename = self.data["model_filename"]
# try to access model in memory instead of loading object from disk to save time
if self.live and self.model_filename in self.dd.model_dictionary:
model = self.dd.model_dictionary[self.model_filename]
@@ -206,7 +178,6 @@ class FreqaiDataKitchen:
model = load(self.data_path / str(self.model_filename + "_model.joblib"))
else:
from tensorflow import keras
model = keras.models.load_model(self.data_path / str(self.model_filename + "_model.h5"))
if Path(self.data_path / str(self.model_filename + "_svm_model.joblib")).resolve().exists():
@@ -234,17 +205,18 @@ class FreqaiDataKitchen:
:filtered_dataframe: cleaned dataframe ready to be split.
:labels: cleaned labels ready to be split.
"""
feat_dict = self.freqai_config.get("feature_parameters", {})
weights: npt.ArrayLike
if self.freqai_config["feature_parameters"].get("weight_factor", 0) > 0:
if feat_dict.get("weight_factor", 0) > 0:
weights = self.set_weights_higher_recent(len(filtered_dataframe))
else:
weights = np.ones(len(filtered_dataframe))
if self.freqai_config["feature_parameters"].get("stratify", 0) > 0:
if feat_dict.get("stratify_training_data", 0) > 0:
stratification = np.zeros(len(filtered_dataframe))
for i in range(1, len(stratification)):
if i % self.freqai_config.get("feature_parameters", {}).get("stratify", 0) == 0:
if i % feat_dict.get("stratify_training_data", 0) == 0:
stratification[i] = 1
else:
stratification = None
@@ -261,7 +233,6 @@ class FreqaiDataKitchen:
labels,
weights,
stratify=stratification,
# shuffle=False,
**self.config["freqai"]["data_split_parameters"],
)
@@ -274,7 +245,6 @@ class FreqaiDataKitchen:
unfiltered_dataframe: DataFrame,
training_feature_list: List,
label_list: List = list(),
# labels: DataFrame = pd.DataFrame(),
training_filter: bool = True,
) -> Tuple[DataFrame, DataFrame]:
"""
@@ -439,7 +409,7 @@ class FreqaiDataKitchen:
bt_split: the backtesting length (dats). Specified in user configuration file
"""
train_period = train_split * SECONDS_IN_DAY
train_period_days = train_split * SECONDS_IN_DAY
bt_period = bt_split * SECONDS_IN_DAY
full_timerange = TimeRange.parse_timerange(tr)
@@ -460,7 +430,7 @@ class FreqaiDataKitchen:
while True:
if not first:
timerange_train.startts = timerange_train.startts + bt_period
timerange_train.stopts = timerange_train.startts + train_period
timerange_train.stopts = timerange_train.startts + train_period_days
first = False
start = datetime.datetime.utcfromtimestamp(timerange_train.startts)
@@ -763,7 +733,7 @@ class FreqaiDataKitchen:
return
def create_fulltimerange(self, backtest_tr: str, backtest_period: int) -> str:
def create_fulltimerange(self, backtest_tr: str, backtest_period_days: int) -> str:
backtest_timerange = TimeRange.parse_timerange(backtest_tr)
if backtest_timerange.stopts == 0:
@@ -771,7 +741,8 @@ class FreqaiDataKitchen:
datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
)
backtest_timerange.startts = backtest_timerange.startts - backtest_period * SECONDS_IN_DAY
backtest_timerange.startts = (backtest_timerange.startts
- backtest_period_days * SECONDS_IN_DAY)
start = datetime.datetime.utcfromtimestamp(backtest_timerange.startts)
stop = datetime.datetime.utcfromtimestamp(backtest_timerange.stopts)
full_timerange = start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d")
@@ -817,7 +788,8 @@ class FreqaiDataKitchen:
data_load_timerange = TimeRange()
# find the max indicator length required
max_timeframe_chars = self.freqai_config.get("timeframes")[-1]
max_timeframe_chars = self.freqai_config.get(
"feature_parameters", {}).get("include_timeframes")[-1]
max_period = self.freqai_config.get("feature_parameters", {}).get(
"indicator_max_period", 50
)
@@ -840,11 +812,11 @@ class FreqaiDataKitchen:
# logger.info(f'Extending data download by {additional_seconds/SECONDS_IN_DAY:.2f} days')
if trained_timestamp != 0:
elapsed_time = (time - trained_timestamp) / SECONDS_IN_DAY
retrain = elapsed_time > self.freqai_config.get("backtest_period")
elapsed_time = (time - trained_timestamp) / SECONDS_IN_HOUR
retrain = elapsed_time > self.freqai_config.get("live_retrain_hours", 0)
if retrain:
trained_timerange.startts = int(
time - self.freqai_config.get("train_period", 0) * SECONDS_IN_DAY
time - self.freqai_config.get("train_period_days", 0) * SECONDS_IN_DAY
)
trained_timerange.stopts = int(time)
# we want to load/populate indicators on more data than we plan to train on so
@@ -852,19 +824,19 @@ class FreqaiDataKitchen:
# unless they have data further back in time before the start of the train period
data_load_timerange.startts = int(
time
- self.freqai_config.get("train_period", 0) * SECONDS_IN_DAY
- self.freqai_config.get("train_period_days", 0) * SECONDS_IN_DAY
- additional_seconds
)
data_load_timerange.stopts = int(time)
else: # user passed no live_trained_timerange in config
trained_timerange.startts = int(
time - self.freqai_config.get("train_period") * SECONDS_IN_DAY
time - self.freqai_config.get("train_period_days") * SECONDS_IN_DAY
)
trained_timerange.stopts = int(time)
data_load_timerange.startts = int(
time
- self.freqai_config.get("train_period", 0) * SECONDS_IN_DAY
- self.freqai_config.get("train_period_days", 0) * SECONDS_IN_DAY
- additional_seconds
)
data_load_timerange.stopts = int(time)
@@ -930,7 +902,7 @@ class FreqaiDataKitchen:
refresh_backtest_ohlcv_data(
exchange,
pairs=self.all_pairs,
timeframes=self.freqai_config.get("timeframes"),
timeframes=self.freqai_config.get("feature_parameters", {}).get("include_timeframes"),
datadir=self.config["datadir"],
timerange=timerange,
new_pairs_days=new_pairs_days,
@@ -948,12 +920,12 @@ class FreqaiDataKitchen:
:params:
dataframe: DataFrame = strategy provided dataframe
"""
feat_params = self.freqai_config.get("feature_parameters", {})
with self.dd.history_lock:
history_data = self.dd.historic_data
for pair in self.all_pairs:
for tf in self.freqai_config.get("timeframes"):
for tf in feat_params.get("include_timeframes"):
# check if newest candle is already appended
df_dp = strategy.dp.get_pair_dataframe(pair, tf)
@@ -992,7 +964,8 @@ class FreqaiDataKitchen:
def set_all_pairs(self) -> None:
self.all_pairs = copy.deepcopy(self.freqai_config.get("corr_pairlist", []))
self.all_pairs = copy.deepcopy(self.freqai_config.get(
'feature_parameters', {}).get('include_corr_pairlist', []))
for pair in self.config.get("exchange", "").get("pair_whitelist"):
if pair not in self.all_pairs:
self.all_pairs.append(pair)
@@ -1003,14 +976,14 @@ class FreqaiDataKitchen:
Only called once upon startup of bot.
:params:
timerange: TimeRange = full timerange required to populate all indicators
for training according to user defined train_period
for training according to user defined train_period_days
"""
history_data = self.dd.historic_data
for pair in self.all_pairs:
if pair not in history_data:
history_data[pair] = {}
for tf in self.freqai_config.get("timeframes"):
for tf in self.freqai_config.get("feature_parameters", {}).get("include_timeframes"):
history_data[pair][tf] = load_pair_history(
datadir=self.config["datadir"],
timeframe=tf,
@@ -1028,7 +1001,7 @@ class FreqaiDataKitchen:
to the present pair.
:params:
timerange: TimeRange = full timerange required to populate all indicators
for training according to user defined train_period
for training according to user defined train_period_days
metadata: dict = strategy furnished pair metadata
"""
@@ -1036,9 +1009,10 @@ class FreqaiDataKitchen:
corr_dataframes: Dict[Any, Any] = {}
base_dataframes: Dict[Any, Any] = {}
historic_data = self.dd.historic_data
pairs = self.freqai_config.get("corr_pairlist", [])
pairs = self.freqai_config.get('feature_parameters', {}).get(
'include_corr_pairlist', [])
for tf in self.freqai_config.get("timeframes"):
for tf in self.freqai_config.get("feature_parameters", {}).get("include_timeframes"):
base_dataframes[tf] = self.slice_dataframe(timerange, historic_data[pair][tf])
if pairs:
for p in pairs:
@@ -1057,7 +1031,7 @@ class FreqaiDataKitchen:
# DataFrame]:
# corr_dataframes: Dict[Any, Any] = {}
# base_dataframes: Dict[Any, Any] = {}
# pairs = self.freqai_config.get('corr_pairlist', []) # + [metadata['pair']]
# pairs = self.freqai_config.get('include_corr_pairlist', []) # + [metadata['pair']]
# # timerange = TimeRange.parse_timerange(new_timerange)
# for tf in self.freqai_config.get('timeframes'):
@@ -1101,9 +1075,9 @@ class FreqaiDataKitchen:
dataframe: DataFrame = dataframe containing populated indicators
"""
dataframe = base_dataframes[self.config["timeframe"]].copy()
pairs = self.freqai_config.get("corr_pairlist", [])
pairs = self.freqai_config.get('feature_parameters', {}).get('include_corr_pairlist', [])
sgi = True
for tf in self.freqai_config.get("timeframes"):
for tf in self.freqai_config.get("feature_parameters", {}).get("include_timeframes"):
dataframe = strategy.populate_any_indicators(
pair,
pair,
@@ -1129,6 +1103,19 @@ class FreqaiDataKitchen:
return dataframe
def fit_live_predictions(self) -> None:
"""
Fit the labels with a gaussian distribution
"""
import scipy as spy
num_candles = self.freqai_config.get('fit_live_predictions_candles', 100)
self.data["labels_mean"], self.data["labels_std"] = {}, {}
for label in self.label_list:
f = spy.stats.norm.fit(self.dd.historic_predictions[self.pair][label].tail(num_candles))
self.data["labels_mean"][label], self.data["labels_std"][label] = f[0], f[1]
return
def fit_labels(self) -> None:
"""
Fit the labels with a gaussian distribution

View File

@@ -1,4 +1,5 @@
# import contextlib
import copy
import datetime
import gc
import logging
@@ -95,7 +96,7 @@ class IFreqaiModel(ABC):
dk = self.start_live(dataframe, metadata, strategy, self.dk)
# For backtesting, each pair enters and then gets trained for each window along the
# sliding window defined by "train_period" (training window) and "backtest_period"
# sliding window defined by "train_period_days" (training window) and "live_retrain_hours"
# (backtest window, i.e. window immediately following the training window).
# FreqAI slides the window and sequentially builds the backtesting results before returning
# the concatenated results for the full backtesting period back to the strategy.
@@ -143,11 +144,11 @@ class IFreqaiModel(ABC):
) -> FreqaiDataKitchen:
"""
The main broad execution for backtesting. For backtesting, each pair enters and then gets
trained for each window along the sliding window defined by "train_period" (training window)
and "backtest_period" (backtest window, i.e. window immediately following the
training window). FreqAI slides the window and sequentially builds the backtesting results
before returning the concatenated results for the full backtesting period back to the
strategy.
trained for each window along the sliding window defined by "train_period_days"
(training window) and "backtest_period_days" (backtest window, i.e. window immediately
following the training window). FreqAI slides the window and sequentially builds
the backtesting results before returning the concatenated results for the full
backtesting period back to the strategy.
:params:
dataframe: DataFrame = strategy passed dataframe
metadata: Dict = pair metadata
@@ -484,6 +485,20 @@ class IFreqaiModel(ABC):
self.dd.purge_old_models()
# self.retrain = False
def set_initial_historic_predictions(self, df: DataFrame, model: Any,
dk: FreqaiDataKitchen, pair: str) -> None:
trained_predictions = model.predict(df)
pred_df = DataFrame(trained_predictions, columns=dk.label_list)
for label in dk.label_list:
pred_df[label] = (
(pred_df[label] + 1)
* (dk.data["labels_max"][label] - dk.data["labels_min"][label])
/ 2
) + dk.data["labels_min"][label]
self.dd.historic_predictions[pair] = pd.DataFrame()
self.dd.historic_predictions[pair] = copy.deepcopy(pred_df)
# Following methods which are overridden by user made prediction models.
# See freqai/prediction_models/CatboostPredictionModlel.py for an example.

View File

@@ -0,0 +1,112 @@
import logging
from typing import Tuple
from pandas import DataFrame
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.freqai.freqai_interface import IFreqaiModel
logger = logging.getLogger(__name__)
class BaseRegressionModel(IFreqaiModel):
"""
User created prediction model. The class needs to override three necessary
functions, predict(), train(), fit(). The class inherits ModelHandler which
has its own DataHandler where data is held, saved, loaded, and managed.
"""
def return_values(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> DataFrame:
"""
User uses this function to add any additional return values to the dataframe.
e.g.
dataframe['volatility'] = dk.volatility_values
"""
return dataframe
def train(
self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen
) -> Tuple[DataFrame, DataFrame]:
"""
Filter the training data and train a model to it. Train makes heavy use of the datakitchen
for storing, saving, loading, and analyzing the data.
:params:
:unfiltered_dataframe: Full dataframe for the current training period
:metadata: pair metadata from strategy.
:returns:
:model: Trained model which can be used to inference (self.predict)
"""
logger.info("--------------------Starting training " f"{pair} --------------------")
# filter the features requested by user in the configuration file and elegantly handle NaNs
features_filtered, labels_filtered = dk.filter_features(
unfiltered_dataframe,
dk.training_features_list,
dk.label_list,
training_filter=True,
)
# split data into train/test data.
data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
if not self.freqai_info.get('fit_live_predictions', 0):
dk.fit_labels()
# normalize all data based on train_dataset only
data_dictionary = dk.normalize_data(data_dictionary)
# optional additional data cleaning/analysis
self.data_cleaning_train(dk)
logger.info(
f'Training model on {len(dk.data_dictionary["train_features"].columns)}' " features"
)
logger.info(f'Training model on {len(data_dictionary["train_features"])} data points')
model = self.fit(data_dictionary)
if pair not in self.dd.historic_predictions:
self.set_initial_historic_predictions(
data_dictionary['train_features'], model, dk, pair)
elif self.freqai_info.get('fit_live_predictions_candles', 0):
dk.fit_live_predictions()
self.dd.save_historic_predictions_to_disk()
logger.info(f"--------------------done training {pair}--------------------")
return model
def predict(
self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False
) -> Tuple[DataFrame, DataFrame]:
"""
Filter the prediction features data and predict with it.
:param: unfiltered_dataframe: Full dataframe for the current backtest period.
:return:
:pred_df: dataframe containing the predictions
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
data (NaNs) or felt uncertain about data (PCA and DI index)
"""
dk.find_features(unfiltered_dataframe)
filtered_dataframe, _ = dk.filter_features(
unfiltered_dataframe, dk.training_features_list, training_filter=False
)
filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe)
dk.data_dictionary["prediction_features"] = filtered_dataframe
# optional additional data cleaning/analysis
self.data_cleaning_predict(dk, filtered_dataframe)
predictions = self.model.predict(dk.data_dictionary["prediction_features"])
pred_df = DataFrame(predictions, columns=dk.label_list)
for label in dk.label_list:
pred_df[label] = (
(pred_df[label] + 1)
* (dk.data["labels_max"][label] - dk.data["labels_min"][label])
/ 2
) + dk.data["labels_min"][label]
return (pred_df, dk.do_predict)

View File

@@ -1,94 +1,21 @@
import logging
from typing import Any, Dict, Tuple
from typing import Any, Dict
from catboost import CatBoostRegressor, Pool
from pandas import DataFrame
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.freqai.freqai_interface import IFreqaiModel
from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel
logger = logging.getLogger(__name__)
class CatboostPredictionModel(IFreqaiModel):
class CatboostPredictionModel(BaseRegressionModel):
"""
User created prediction model. The class needs to override three necessary
functions, predict(), train(), fit(). The class inherits ModelHandler which
has its own DataHandler where data is held, saved, loaded, and managed.
"""
def return_values(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> DataFrame:
"""
User uses this function to add any additional return values to the dataframe.
e.g.
dataframe['volatility'] = dk.volatility_values
"""
return dataframe
def make_labels(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> DataFrame:
"""
User defines the labels here (target values).
:params:
:dataframe: the full dataframe for the present training period
"""
dataframe["s"] = (
dataframe["close"]
.shift(-self.feature_parameters["period"])
.rolling(self.feature_parameters["period"])
.mean()
/ dataframe["close"]
- 1
)
return dataframe["s"]
def train(
self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen
) -> Tuple[DataFrame, DataFrame]:
"""
Filter the training data and train a model to it. Train makes heavy use of the datahkitchen
for storing, saving, loading, and analyzing the data.
:params:
:unfiltered_dataframe: Full dataframe for the current training period
:metadata: pair metadata from strategy.
:returns:
:model: Trained model which can be used to inference (self.predict)
"""
logger.info("--------------------Starting training " f"{pair} --------------------")
# unfiltered_labels = self.make_labels(unfiltered_dataframe, dk)
# filter the features requested by user in the configuration file and elegantly handle NaNs
features_filtered, labels_filtered = dk.filter_features(
unfiltered_dataframe,
dk.training_features_list,
dk.label_list,
training_filter=True,
)
# split data into train/test data.
data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
dk.fit_labels() # fit labels to a cauchy distribution so we know what to expect in strategy
# normalize all data based on train_dataset only
data_dictionary = dk.normalize_data(data_dictionary)
# optional additional data cleaning/analysis
self.data_cleaning_train(dk)
logger.info(
f'Training model on {len(dk.data_dictionary["train_features"].columns)}' " features"
)
logger.info(f'Training model on {len(data_dictionary["train_features"])} data points')
model = self.fit(data_dictionary)
logger.info(f"--------------------done training {pair}--------------------")
return model
def fit(self, data_dictionary: Dict) -> Any:
"""
User sets up the training and test data to fit their desired model here
@@ -118,37 +45,3 @@ class CatboostPredictionModel(IFreqaiModel):
model.fit(X=train_data, eval_set=test_data)
return model
def predict(
self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False
) -> Tuple[DataFrame, DataFrame]:
"""
Filter the prediction features data and predict with it.
:param: unfiltered_dataframe: Full dataframe for the current backtest period.
:return:
:pred_df: dataframe containing the predictions
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
data (NaNs) or felt uncertain about data (PCA and DI index)
"""
dk.find_features(unfiltered_dataframe)
filtered_dataframe, _ = dk.filter_features(
unfiltered_dataframe, dk.training_features_list, training_filter=False
)
filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe)
dk.data_dictionary["prediction_features"] = filtered_dataframe
# optional additional data cleaning/analysis
self.data_cleaning_predict(dk, filtered_dataframe)
predictions = self.model.predict(dk.data_dictionary["prediction_features"])
pred_df = DataFrame(predictions, columns=dk.label_list)
for label in dk.label_list:
pred_df[label] = (
(pred_df[label] + 1)
* (dk.data["labels_max"][label] - dk.data["labels_min"][label])
/ 2
) + dk.data["labels_min"][label]
return (pred_df, dk.do_predict)

View File

@@ -1,77 +1,22 @@
import logging
from typing import Any, Dict, Tuple
from typing import Any, Dict
from catboost import CatBoostRegressor # , Pool
from pandas import DataFrame
from sklearn.multioutput import MultiOutputRegressor
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.freqai.freqai_interface import IFreqaiModel
from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel
logger = logging.getLogger(__name__)
class CatboostPredictionMultiModel(IFreqaiModel):
class CatboostPredictionMultiModel(BaseRegressionModel):
"""
User created prediction model. The class needs to override three necessary
functions, predict(), train(), fit(). The class inherits ModelHandler which
has its own DataHandler where data is held, saved, loaded, and managed.
"""
def return_values(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> DataFrame:
"""
User uses this function to add any additional return values to the dataframe.
e.g.
dataframe['volatility'] = dk.volatility_values
"""
return dataframe
def train(
self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen
) -> Tuple[DataFrame, DataFrame]:
"""
Filter the training data and train a model to it. Train makes heavy use of the datahkitchen
for storing, saving, loading, and analyzing the data.
:params:
:unfiltered_dataframe: Full dataframe for the current training period
:metadata: pair metadata from strategy.
:returns:
:model: Trained model which can be used to inference (self.predict)
"""
logger.info("--------------------Starting training " f"{pair} --------------------")
# unfiltered_labels = self.make_labels(unfiltered_dataframe, dk)
# filter the features requested by user in the configuration file and elegantly handle NaNs
features_filtered, labels_filtered = dk.filter_features(
unfiltered_dataframe,
dk.training_features_list,
dk.label_list,
training_filter=True,
)
# split data into train/test data.
data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
dk.fit_labels() # fit labels to a cauchy distribution so we know what to expect in strategy
# normalize all data based on train_dataset only
data_dictionary = dk.normalize_data(data_dictionary)
# optional additional data cleaning/analysis
self.data_cleaning_train(dk)
logger.info(
f'Training model on {len(dk.data_dictionary["train_features"].columns)}' " features"
)
logger.info(f'Training model on {len(data_dictionary["train_features"])} data points')
model = self.fit(data_dictionary)
logger.info(f"--------------------done training {pair}--------------------")
return model
def fit(self, data_dictionary: Dict) -> Any:
"""
User sets up the training and test data to fit their desired model here
@@ -99,37 +44,3 @@ class CatboostPredictionMultiModel(IFreqaiModel):
test_score = model.score(*eval_set)
logger.info(f"Train score {train_score}, Test score {test_score}")
return model
def predict(
self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False
) -> Tuple[DataFrame, DataFrame]:
"""
Filter the prediction features data and predict with it.
:param: unfiltered_dataframe: Full dataframe for the current backtest period.
:return:
:pred_df: dataframe containing the predictions
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
data (NaNs) or felt uncertain about data (PCA and DI index)
"""
dk.find_features(unfiltered_dataframe)
filtered_dataframe, _ = dk.filter_features(
unfiltered_dataframe, dk.training_features_list, training_filter=False
)
filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe)
dk.data_dictionary["prediction_features"] = filtered_dataframe
# optional additional data cleaning/analysis
self.data_cleaning_predict(dk, filtered_dataframe)
predictions = self.model.predict(dk.data_dictionary["prediction_features"])
pred_df = DataFrame(predictions, columns=dk.label_list)
for label in dk.label_list:
pred_df[label] = (
(pred_df[label] + 1)
* (dk.data["labels_max"][label] - dk.data["labels_min"][label])
/ 2
) + dk.data["labels_min"][label]
return (pred_df, dk.do_predict)

View File

@@ -1,76 +1,21 @@
import logging
from typing import Any, Dict, Tuple
from typing import Any, Dict
from lightgbm import LGBMRegressor
from pandas import DataFrame
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.freqai.freqai_interface import IFreqaiModel
from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel
logger = logging.getLogger(__name__)
class LightGBMPredictionModel(IFreqaiModel):
class LightGBMPredictionModel(BaseRegressionModel):
"""
User created prediction model. The class needs to override three necessary
functions, predict(), train(), fit(). The class inherits ModelHandler which
has its own DataHandler where data is held, saved, loaded, and managed.
"""
def return_values(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> DataFrame:
"""
User uses this function to add any additional return values to the dataframe.
e.g.
dataframe['volatility'] = dk.volatility_values
"""
return dataframe
def train(
self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen
) -> Tuple[DataFrame, DataFrame]:
"""
Filter the training data and train a model to it. Train makes heavy use of the datahkitchen
for storing, saving, loading, and analyzing the data.
:params:
:unfiltered_dataframe: Full dataframe for the current training period
:metadata: pair metadata from strategy.
:returns:
:model: Trained model which can be used to inference (self.predict)
"""
logger.info("--------------------Starting training " f"{pair} --------------------")
# unfiltered_labels = self.make_labels(unfiltered_dataframe, dk)
# filter the features requested by user in the configuration file and elegantly handle NaNs
features_filtered, labels_filtered = dk.filter_features(
unfiltered_dataframe,
dk.training_features_list,
dk.label_list,
training_filter=True,
)
# split data into train/test data.
data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
dk.fit_labels() # fit labels to a cauchy distribution so we know what to expect in strategy
# normalize all data based on train_dataset only
data_dictionary = dk.normalize_data(data_dictionary)
# optional additional data cleaning/analysis
self.data_cleaning_train(dk)
logger.info(
f'Training model on {len(dk.data_dictionary["train_features"].columns)}' " features"
)
logger.info(f'Training model on {len(data_dictionary["train_features"])} data points')
model = self.fit(data_dictionary)
logger.info(f"--------------------done training {pair}--------------------")
return model
def fit(self, data_dictionary: Dict) -> Any:
"""
Most regressors use the same function names and arguments e.g. user
@@ -89,39 +34,3 @@ class LightGBMPredictionModel(IFreqaiModel):
model.fit(X=X, y=y, eval_set=eval_set)
return model
def predict(
self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen
) -> Tuple[DataFrame, DataFrame]:
"""
Filter the prediction features data and predict with it.
:param: unfiltered_dataframe: Full dataframe for the current backtest period.
:return:
:predictions: np.array of predictions
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
data (NaNs) or felt uncertain about data (PCA and DI index)
"""
# logger.info("--------------------Starting prediction--------------------")
dk.find_features(unfiltered_dataframe)
filtered_dataframe, _ = dk.filter_features(
unfiltered_dataframe, dk.training_features_list, training_filter=False
)
filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe)
dk.data_dictionary["prediction_features"] = filtered_dataframe
# optional additional data cleaning/analysis
self.data_cleaning_predict(dk, filtered_dataframe)
predictions = self.model.predict(dk.data_dictionary["prediction_features"])
pred_df = DataFrame(predictions, columns=dk.label_list)
for label in dk.label_list:
pred_df[label] = (
(pred_df[label] + 1)
* (dk.data["labels_max"][label] - dk.data["labels_min"][label])
/ 2
) + dk.data["labels_min"][label]
return (pred_df, dk.do_predict)

View File

@@ -44,7 +44,8 @@ def expand_pairlist(wildcardpl: List[str], available_pairs: List[str],
def dynamic_expand_pairlist(config: dict, markets: list) -> List[str]:
if config.get('freqai', {}):
full_pairs = config['pairs'] + [pair for pair in config['freqai']['corr_pairlist']
corr_pairlist = config['freqai']['feature_parameters']['include_corr_pairlist']
full_pairs = config['pairs'] + [pair for pair in corr_pairlist
if pair not in config['pairs']]
expanded_pairs = expand_pairlist(full_pairs, markets)
else:

View File

@@ -56,9 +56,9 @@ class FreqaiExampleStrategy(IStrategy):
def informative_pairs(self):
whitelist_pairs = self.dp.current_whitelist()
corr_pairs = self.config["freqai"]["corr_pairlist"]
corr_pairs = self.config["freqai"]["feature_parameters"]["include_corr_pairlist"]
informative_pairs = []
for tf in self.config["freqai"]["timeframes"]:
for tf in self.config["freqai"]["feature_parameters"]["include_timeframes"]:
for pair in whitelist_pairs:
informative_pairs.append((pair, tf))
for pair in corr_pairs:
@@ -93,7 +93,7 @@ class FreqaiExampleStrategy(IStrategy):
informative = self.dp.get_pair_dataframe(pair, tf)
# first loop is automatically duplicating indicators for time periods
for t in self.freqai_info["feature_parameters"]["indicator_periods"]:
for t in self.freqai_info["feature_parameters"]["indicator_periods_candles"]:
t = int(t)
informative[f"%-{coin}rsi-period_{t}"] = ta.RSI(informative, timeperiod=t)
@@ -123,8 +123,6 @@ class FreqaiExampleStrategy(IStrategy):
)
informative[f"%-{coin}roc-period_{t}"] = ta.ROC(informative, timeperiod=t)
macd = ta.MACD(informative, timeperiod=t)
informative[f"%-{coin}macd-period_{t}"] = macd["macd"]
informative[f"%-{coin}relative_volume-period_{t}"] = (
informative["volume"] / informative["volume"].rolling(t).mean()
@@ -136,7 +134,7 @@ class FreqaiExampleStrategy(IStrategy):
indicators = [col for col in informative if col.startswith("%")]
# This loop duplicates and shifts all indicators to add a sense of recency to data
for n in range(self.freqai_info["feature_parameters"]["shift"] + 1):
for n in range(self.freqai_info["feature_parameters"]["include_shifted_candles"] + 1):
if n == 0:
continue
informative_shift = informative[indicators].shift(n)
@@ -161,8 +159,8 @@ class FreqaiExampleStrategy(IStrategy):
# needs to be used such as templates/CatboostPredictionMultiModel.py
df["&-s_close"] = (
df["close"]
.shift(-self.freqai_info["feature_parameters"]["period"])
.rolling(self.freqai_info["feature_parameters"]["period"])
.shift(-self.freqai_info["feature_parameters"]["label_period_candles"])
.rolling(self.freqai_info["feature_parameters"]["label_period_candles"])
.mean()
/ df["close"]
- 1
@@ -179,7 +177,7 @@ class FreqaiExampleStrategy(IStrategy):
# indicated by the user in the configuration file.
# All indicators must be populated by populate_any_indicators() for live functionality
# to work correctly.
for tf in self.freqai_info["timeframes"]:
for tf in self.freqai_info["feature_parameters"]["include_timeframes"]:
dataframe = self.populate_any_indicators(
metadata,
self.pair,
@@ -189,7 +187,7 @@ class FreqaiExampleStrategy(IStrategy):
set_generalized_indicators=sgi,
)
sgi = False
for pair in self.freqai_info["corr_pairlist"]:
for pair in self.freqai_info["feature_parameters"]["include_corr_pairlist"]:
if metadata["pair"] in pair:
continue # do not include whitelisted pair twice if it is in corr_pairlist
dataframe = self.populate_any_indicators(