diff --git a/freqtrade/freqai/base_models/BaseTensorFlowModel.py b/freqtrade/freqai/base_models/BaseTensorFlowModel.py index b41ee0175..a12a6a9ef 100644 --- a/freqtrade/freqai/base_models/BaseTensorFlowModel.py +++ b/freqtrade/freqai/base_models/BaseTensorFlowModel.py @@ -3,10 +3,10 @@ from time import time from typing import Any from pandas import DataFrame - +import numpy as np from freqtrade.freqai.data_kitchen import FreqaiDataKitchen from freqtrade.freqai.freqai_interface import IFreqaiModel - +import tensorflow as tf logger = logging.getLogger(__name__) @@ -17,6 +17,13 @@ class BaseTensorFlowModel(IFreqaiModel): User *must* inherit from this class and set fit() and predict(). """ + def __init__(self, **kwargs): + super().__init__(config=kwargs['config']) + self.keras = True + if self.ft_params.get("DI_threshold", 0): + self.ft_params["DI_threshold"] = 0 + logger.warning("DI threshold is not configured for Keras models yet. Deactivating.") + def train( self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs ) -> Any: @@ -41,13 +48,9 @@ class BaseTensorFlowModel(IFreqaiModel): training_filter=True, ) - start_date = unfiltered_df["date"].iloc[0].strftime("%Y-%m-%d") - end_date = unfiltered_df["date"].iloc[-1].strftime("%Y-%m-%d") - logger.info(f"-------------------- Training on data from {start_date} to " - f"{end_date} --------------------") # split data into train/test data. data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered) - if not self.freqai_info.get("fit_live_predictions_candles", 0) or not self.live: + if not self.freqai_info.get("fit_live_predictions", 0) or not self.live: dk.fit_labels() # normalize all data based on train_dataset only data_dictionary = dk.normalize_data(data_dictionary) @@ -68,3 +71,76 @@ class BaseTensorFlowModel(IFreqaiModel): f"({end_time - start_time:.2f} secs) --------------------") return model + + +class WindowGenerator: + def __init__( + self, + input_width, + label_width, + shift, + train_df=None, + val_df=None, + test_df=None, + train_labels=None, + val_labels=None, + test_labels=None, + batch_size=None, + ): + # Store the raw data. + self.train_df = train_df + self.val_df = val_df + self.test_df = test_df + self.train_labels = train_labels + self.val_labels = val_labels + self.test_labels = test_labels + self.batch_size = batch_size + self.input_width = input_width + self.label_width = label_width + self.shift = shift + self.total_window_size = input_width + shift + self.input_slice = slice(0, input_width) + self.input_indices = np.arange(self.total_window_size)[self.input_slice] + + def make_dataset(self, data, labels=None): + data = np.array(data, dtype=np.float32) + if labels is not None: + labels = np.array(labels, dtype=np.float32) + ds = tf.keras.preprocessing.timeseries_dataset_from_array( + data=data, + targets=labels, + sequence_length=self.total_window_size, + sequence_stride=1, + sampling_rate=1, + shuffle=False, + batch_size=self.batch_size, + ) + + return ds + + @property + def train(self): + return self.make_dataset(self.train_df, self.train_labels) + + @property + def val(self): + return self.make_dataset(self.val_df, self.val_labels) + + @property + def test(self): + return self.make_dataset(self.test_df, self.test_labels) + + @property + def inference(self): + return self.make_dataset(self.test_df) + + @property + def example(self): + """Get and cache an example batch of `inputs, labels` for plotting.""" + result = getattr(self, "_example", None) + if result is None: + # No example batch was found, so get one from the `.train` dataset + result = next(iter(self.train)) + # And cache it for next time + self._example = result + return result diff --git a/freqtrade/freqai/prediction_models/CNNPredictionModel.py b/freqtrade/freqai/prediction_models/CNNPredictionModel.py new file mode 100644 index 000000000..4129a27cd --- /dev/null +++ b/freqtrade/freqai/prediction_models/CNNPredictionModel.py @@ -0,0 +1,145 @@ +import logging +from typing import Any, Dict, Tuple + +from pandas import DataFrame +from freqtrade.exceptions import OperationalException +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen +import tensorflow as tf +from freqtrade.freqai.base_models.BaseTensorFlowModel import BaseTensorFlowModel, WindowGenerator +from tensorflow.keras.layers import Input, Conv1D, Dense +from tensorflow.keras.models import Model +import numpy as np + +logger = logging.getLogger(__name__) + +# tf.config.run_functions_eagerly(True) +# tf.data.experimental.enable_debug_mode() + +MAX_EPOCHS = 10 + + +class CNNPredictionModel(BaseTensorFlowModel): + """ + User created prediction model. The class needs to override three necessary + functions, predict(), fit(). + """ + + def fit(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen) -> Any: + """ + User sets up the training and test data to fit their desired model here + :params: + :data_dictionary: the dictionary constructed by DataHandler to hold + all the training and test data/labels. + """ + train_df = data_dictionary["train_features"] + train_labels = data_dictionary["train_labels"] + test_df = data_dictionary["test_features"] + test_labels = data_dictionary["test_labels"] + n_labels = len(train_labels.columns) + + if n_labels > 1: + raise OperationalException( + "Neural Net not yet configured for multi-targets. Please " + " reduce number of targets to 1 in strategy." + ) + + n_features = len(data_dictionary["train_features"].columns) + BATCH_SIZE = self.freqai_info.get("batch_size", 64) + input_dims = [BATCH_SIZE, self.CONV_WIDTH, n_features] + + w1 = WindowGenerator( + input_width=self.CONV_WIDTH, + label_width=1, + shift=1, + train_df=train_df, + val_df=test_df, + train_labels=train_labels, + val_labels=test_labels, + batch_size=BATCH_SIZE, + ) + + model = self.create_model(input_dims, n_labels) + + steps_per_epoch = np.ceil(len(test_df) / BATCH_SIZE) + lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay( + 0.001, decay_steps=steps_per_epoch * 1000, decay_rate=1, staircase=False + ) + + early_stopping = tf.keras.callbacks.EarlyStopping( + monitor="loss", patience=3, mode="min", min_delta=0.0001 + ) + + model.compile( + loss=tf.losses.MeanSquaredError(), + optimizer=tf.optimizers.Adam(lr_schedule), + metrics=[tf.metrics.MeanAbsoluteError()], + ) + + model.fit( + w1.train, + epochs=MAX_EPOCHS, + shuffle=False, + validation_data=w1.val, + callbacks=[early_stopping], + verbose=1, + ) + + return model + + def predict( + self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first=True + ) -> Tuple[DataFrame, DataFrame]: + """ + Filter the prediction features data and predict with it. + :param: unfiltered_dataframe: Full dataframe for the current backtest period. + :return: + :predictions: np.array of predictions + :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove + data (NaNs) or felt uncertain about data (PCA and DI index) + """ + + dk.find_features(unfiltered_dataframe) + filtered_dataframe, _ = dk.filter_features( + unfiltered_dataframe, dk.training_features_list, training_filter=False + ) + filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe) + dk.data_dictionary["prediction_features"] = filtered_dataframe + + # optional additional data cleaning/analysis + self.data_cleaning_predict(dk, filtered_dataframe) + + if first: + full_df = dk.data_dictionary["prediction_features"] + + w1 = WindowGenerator( + input_width=self.CONV_WIDTH, + label_width=1, + shift=1, + test_df=full_df, + batch_size=len(full_df), + ) + + predictions = self.model.predict(w1.inference) + len_diff = len(dk.do_predict) - len(predictions) + if len_diff > 0: + dk.do_predict = dk.do_predict[len_diff:] + + else: + data = dk.data_dictionary["prediction_features"] + data = tf.expand_dims(data, axis=0) + predictions = self.model(data, training=False) + + predictions = predictions[:, 0, 0] + pred_df = DataFrame(predictions, columns=dk.label_list) + + pred_df = dk.denormalize_labels_from_metadata(pred_df) + + return (pred_df, np.ones(len(pred_df))) + + def create_model(self, input_dims, n_labels) -> Any: + + input_layer = Input(shape=(input_dims[1], input_dims[2])) + Layer_1 = Conv1D(filters=32, kernel_size=(self.CONV_WIDTH,), activation="relu")(input_layer) + Layer_3 = Dense(units=32, activation="relu")(Layer_1) + output_layer = Dense(units=n_labels)(Layer_3) + return Model(inputs=input_layer, outputs=output_layer) diff --git a/requirements-freqai.txt b/requirements-freqai.txt index 66730e29f..f83e65d8e 100644 --- a/requirements-freqai.txt +++ b/requirements-freqai.txt @@ -9,3 +9,4 @@ catboost==1.1.1; platform_machine != 'aarch64' lightgbm==3.3.3 xgboost==1.7.1 tensorboard==2.11.0 +tensorflow==3.2.2