start collecting indefinite history of predictions. Allow user to generate statistics on these predictions. Direct FreqAI to save these to disk and reload them if available.
This commit is contained in:
parent
3fc92b1b21
commit
8ce6b18318
@ -562,6 +562,28 @@ a certain number of hours in age by setting the `expiration_hours` in the config
|
||||
In the present example, the user will only allow predictions on models that are less than 1/2 hours
|
||||
old.
|
||||
|
||||
## Choosing the calculation of the `target_roi`
|
||||
|
||||
As shown in `templates/FreqaiExampleStrategy.py`, the `target_roi` is based on two metrics computed
|
||||
by FreqAI: `label_mean` and `label_std`. These are the statistics associated with the labels used
|
||||
*during the most recent training*. This allows the model to know what magnitude of a target to be
|
||||
expecting since it is directly stemming from the training data. By default, FreqAI computes this based
|
||||
on trainig data and it assumes the labels are Gaussian distributed. These are big assumptions
|
||||
that the user should consider when creating their labels. If the user wants to consider the population
|
||||
of *historical predictions* for creating the dynamic target instead of the trained labels, the user
|
||||
can do so by setting `fit_live_prediction_candles` to the number of historical prediction candles
|
||||
the user wishes to use to generate target statistics.
|
||||
|
||||
```json
|
||||
"freqai": {
|
||||
"fit_live_prediction_candles": 300,
|
||||
}
|
||||
```
|
||||
|
||||
If the user sets this value, FreqAI will initially use the predictions from the training data set
|
||||
and then subsequently begin introducing real prediction data as it is generated. FreqAI will save
|
||||
this historical data to be reloaded if the user stops and restarts with the same `identifier`.
|
||||
|
||||
<!-- ## Dynamic target expectation
|
||||
|
||||
The labels used for model training have a unique statistical distribution for each separate model training.
|
||||
|
@ -38,12 +38,14 @@ class FreqaiDataDrawer:
|
||||
self.model_return_values: Dict[str, Any] = {}
|
||||
self.pair_data_dict: Dict[str, Any] = {}
|
||||
self.historic_data: Dict[str, Any] = {}
|
||||
self.historic_predictions: Dict[str, Any] = {}
|
||||
self.follower_dict: Dict[str, Any] = {}
|
||||
self.full_path = full_path
|
||||
self.follow_mode = follow_mode
|
||||
if follow_mode:
|
||||
self.create_follower_dict()
|
||||
self.load_drawer_from_disk()
|
||||
self.load_historic_predictions_from_disk()
|
||||
self.training_queue: Dict[str, int] = {}
|
||||
self.history_lock = threading.Lock()
|
||||
|
||||
@ -68,6 +70,29 @@ class FreqaiDataDrawer:
|
||||
|
||||
return exists
|
||||
|
||||
def load_historic_predictions_from_disk(self):
|
||||
"""
|
||||
Locate and load a previously saved historic predictions.
|
||||
:returns:
|
||||
exists: bool = whether or not the drawer was located
|
||||
"""
|
||||
exists = Path(self.full_path / str("historic_predictions.json")).resolve().exists()
|
||||
if exists:
|
||||
with open(self.full_path / str("historic_predictions.json"), "r") as fp:
|
||||
self.pair_dict = json.load(fp)
|
||||
logger.info(f"Found existing historic predictions at {self.full_path}, but beware of "
|
||||
"that statistics may be inaccurate if the bot has been offline for "
|
||||
"an extended period of time.")
|
||||
elif not self.follow_mode:
|
||||
logger.info("Could not find existing historic_predictions, starting from scratch")
|
||||
else:
|
||||
logger.warning(
|
||||
f"Follower could not find historic predictions at {self.full_path} "
|
||||
"sending null values back to strategy"
|
||||
)
|
||||
|
||||
return exists
|
||||
|
||||
def save_drawer_to_disk(self):
|
||||
"""
|
||||
Save data drawer full of all pair model metadata in present model folder.
|
||||
@ -75,6 +100,13 @@ class FreqaiDataDrawer:
|
||||
with open(self.full_path / str("pair_dictionary.json"), "w") as fp:
|
||||
json.dump(self.pair_dict, fp, default=self.np_encoder)
|
||||
|
||||
def save_historic_predictions_to_disk(self):
|
||||
"""
|
||||
Save data drawer full of all pair model metadata in present model folder.
|
||||
"""
|
||||
with open(self.full_path / str("historic_predictions.json"), "w") as fp:
|
||||
json.dump(self.historic_predictions, fp, default=self.np_encoder)
|
||||
|
||||
def save_follower_dict_to_disk(self):
|
||||
"""
|
||||
Save follower dictionary to disk (used by strategy for persistent prediction targets)
|
||||
@ -176,16 +208,18 @@ class FreqaiDataDrawer:
|
||||
historical candles, and also stores historical predictions despite retrainings (so stored
|
||||
predictions are true predictions, not just inferencing on trained data)
|
||||
"""
|
||||
self.model_return_values[pair] = pd.DataFrame()
|
||||
# dynamic df returned to strategy and plotted in frequi
|
||||
mrv_df = self.model_return_values[pair] = pd.DataFrame()
|
||||
|
||||
for label in dk.label_list:
|
||||
self.model_return_values[pair][label] = pred_df[label]
|
||||
self.model_return_values[pair][f"{label}_mean"] = dk.data["labels_mean"][label]
|
||||
self.model_return_values[pair][f"{label}_std"] = dk.data["labels_std"][label]
|
||||
mrv_df[label] = pred_df[label]
|
||||
mrv_df[f"{label}_mean"] = dk.data["labels_mean"][label]
|
||||
mrv_df[f"{label}_std"] = dk.data["labels_std"][label]
|
||||
|
||||
if self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0) > 0:
|
||||
self.model_return_values[pair]["DI_values"] = dk.DI_values
|
||||
mrv_df["DI_values"] = dk.DI_values
|
||||
|
||||
self.model_return_values[pair]["do_predict"] = do_preds
|
||||
mrv_df["do_predict"] = do_preds
|
||||
|
||||
def append_model_predictions(self, pair: str, predictions, do_preds, dk, len_df) -> None:
|
||||
|
||||
@ -201,6 +235,13 @@ class FreqaiDataDrawer:
|
||||
i = length_difference + 1
|
||||
|
||||
df = self.model_return_values[pair] = self.model_return_values[pair].shift(-i)
|
||||
hp_df = self.historic_predictions[pair]
|
||||
|
||||
# here are some pandas hula hoops to accommodate the possibility of a series
|
||||
# or dataframe depending number of labels requested by user
|
||||
nan_df = pd.DataFrame(np.nan, index=hp_df.index[-2:] + 2, columns=hp_df.columns)
|
||||
hp_df = pd.concat([hp_df, nan_df], ignore_index=True, axis=0)
|
||||
hp_df = pd.concat([hp_df, nan_df[-2:-1]], axis=0)
|
||||
|
||||
for label in dk.label_list:
|
||||
df[label].iloc[-1] = predictions[label].iloc[-1]
|
||||
@ -212,6 +253,9 @@ class FreqaiDataDrawer:
|
||||
if self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0) > 0:
|
||||
df["DI_values"].iloc[-1] = dk.DI_values[-1]
|
||||
|
||||
# append the new predictions to persistent storage
|
||||
hp_df.iloc[-1] = df[label].iloc[-1]
|
||||
|
||||
if length_difference < 0:
|
||||
prepend_df = pd.DataFrame(
|
||||
np.zeros((abs(length_difference) - 1, len(df.columns))), columns=df.columns
|
||||
|
@ -138,19 +138,6 @@ class FreqaiDataKitchen:
|
||||
self.dd.pair_dict[coin]["data_path"] = str(self.data_path)
|
||||
self.dd.save_drawer_to_disk()
|
||||
|
||||
# TODO add a helper function to let user save/load any data they are custom adding. We
|
||||
# do not want them having to edit the default save/load methods here. Below is an example
|
||||
# of what we do NOT want.
|
||||
|
||||
# if self.freqai_config.get('feature_parameters','determine_statistical_distributions'):
|
||||
# self.data_dictionary["upper_quantiles"].to_pickle(
|
||||
# save_path / str(self.model_filename + "_upper_quantiles.pkl")
|
||||
# )
|
||||
|
||||
# self.data_dictionary["lower_quantiles"].to_pickle(
|
||||
# save_path / str(self.model_filename + "_lower_quantiles.pkl")
|
||||
# )
|
||||
|
||||
return
|
||||
|
||||
def load_data(self, coin: str = "", keras_model=False) -> Any:
|
||||
@ -184,22 +171,6 @@ class FreqaiDataKitchen:
|
||||
self.data_path / str(self.model_filename + "_trained_df.pkl")
|
||||
)
|
||||
|
||||
# TODO add a helper function to let user save/load any data they are custom adding. We
|
||||
# do not want them having to edit the default save/load methods here. Below is an example
|
||||
# of what we do NOT want.
|
||||
|
||||
# if self.freqai_config.get('feature_parameters','determine_statistical_distributions'):
|
||||
# self.data_dictionary["upper_quantiles"] = pd.read_pickle(
|
||||
# self.data_path / str(self.model_filename + "_upper_quantiles.pkl")
|
||||
# )
|
||||
|
||||
# self.data_dictionary["lower_quantiles"] = pd.read_pickle(
|
||||
# self.data_path / str(self.model_filename + "_lower_quantiles.pkl")
|
||||
# )
|
||||
|
||||
# self.data_path = Path(self.data["data_path"])
|
||||
# self.model_filename = self.data["model_filename"]
|
||||
|
||||
# try to access model in memory instead of loading object from disk to save time
|
||||
if self.live and self.model_filename in self.dd.model_dictionary:
|
||||
model = self.dd.model_dictionary[self.model_filename]
|
||||
@ -207,7 +178,6 @@ class FreqaiDataKitchen:
|
||||
model = load(self.data_path / str(self.model_filename + "_model.joblib"))
|
||||
else:
|
||||
from tensorflow import keras
|
||||
|
||||
model = keras.models.load_model(self.data_path / str(self.model_filename + "_model.h5"))
|
||||
|
||||
if Path(self.data_path / str(self.model_filename + "_svm_model.joblib")).resolve().exists():
|
||||
@ -263,7 +233,6 @@ class FreqaiDataKitchen:
|
||||
labels,
|
||||
weights,
|
||||
stratify=stratification,
|
||||
# shuffle=False,
|
||||
**self.config["freqai"]["data_split_parameters"],
|
||||
)
|
||||
|
||||
@ -276,7 +245,6 @@ class FreqaiDataKitchen:
|
||||
unfiltered_dataframe: DataFrame,
|
||||
training_feature_list: List,
|
||||
label_list: List = list(),
|
||||
# labels: DataFrame = pd.DataFrame(),
|
||||
training_filter: bool = True,
|
||||
) -> Tuple[DataFrame, DataFrame]:
|
||||
"""
|
||||
@ -1135,6 +1103,19 @@ class FreqaiDataKitchen:
|
||||
|
||||
return dataframe
|
||||
|
||||
def fit_live_predictions(self) -> None:
|
||||
"""
|
||||
Fit the labels with a gaussian distribution
|
||||
"""
|
||||
import scipy as spy
|
||||
num_candles = self.freqai_config.get('fit_live_predictions_candles', 100)
|
||||
self.data["labels_mean"], self.data["labels_std"] = {}, {}
|
||||
for label in self.label_list:
|
||||
f = spy.stats.norm.fit(self.dd.historic_predictions[self.pair][label].tail(num_candles))
|
||||
self.data["labels_mean"][label], self.data["labels_std"][label] = f[0], f[1]
|
||||
|
||||
return
|
||||
|
||||
def fit_labels(self) -> None:
|
||||
"""
|
||||
Fit the labels with a gaussian distribution
|
||||
|
@ -1,4 +1,5 @@
|
||||
# import contextlib
|
||||
import copy
|
||||
import datetime
|
||||
import gc
|
||||
import logging
|
||||
@ -484,6 +485,20 @@ class IFreqaiModel(ABC):
|
||||
self.dd.purge_old_models()
|
||||
# self.retrain = False
|
||||
|
||||
def set_initial_historic_predictions(self, df: DataFrame, model: Any,
|
||||
dk: FreqaiDataKitchen, pair: str) -> None:
|
||||
trained_predictions = model.predict(df)
|
||||
pred_df = DataFrame(trained_predictions, columns=dk.label_list)
|
||||
for label in dk.label_list:
|
||||
pred_df[label] = (
|
||||
(pred_df[label] + 1)
|
||||
* (dk.data["labels_max"][label] - dk.data["labels_min"][label])
|
||||
/ 2
|
||||
) + dk.data["labels_min"][label]
|
||||
|
||||
self.dd.historic_predictions[pair] = pd.DataFrame()
|
||||
self.dd.historic_predictions[pair] = copy.deepcopy(pred_df)
|
||||
|
||||
# Following methods which are overridden by user made prediction models.
|
||||
# See freqai/prediction_models/CatboostPredictionModlel.py for an example.
|
||||
|
||||
|
@ -51,7 +51,8 @@ class BaseRegressionModel(IFreqaiModel):
|
||||
|
||||
# split data into train/test data.
|
||||
data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
|
||||
dk.fit_labels() # fit labels to a cauchy distribution so we know what to expect in strategy
|
||||
if not self.freqai_info.get('fit_live_predictions', 0):
|
||||
dk.fit_labels()
|
||||
# normalize all data based on train_dataset only
|
||||
data_dictionary = dk.normalize_data(data_dictionary)
|
||||
|
||||
@ -65,6 +66,13 @@ class BaseRegressionModel(IFreqaiModel):
|
||||
|
||||
model = self.fit(data_dictionary)
|
||||
|
||||
if pair not in self.dd.historic_predictions:
|
||||
self.set_initial_historic_predictions(
|
||||
data_dictionary['train_features'], model, dk, pair)
|
||||
elif self.freqai_info.get('fit_live_predictions_candles', 0):
|
||||
dk.fit_live_predictions()
|
||||
self.dd.save_historic_predictions_to_disk()
|
||||
|
||||
logger.info(f"--------------------done training {pair}--------------------")
|
||||
|
||||
return model
|
||||
|
Loading…
Reference in New Issue
Block a user