add CNN prediction model

2022-10-11 19:55:28 +02:00
parent 8d3ed03184 f5870a7540
commit 85df7faa98
5 changed files with 236 additions and 10 deletions
--- a/freqtrade/freqai/base_models/BaseTensorFlowModel.py
+++ b/freqtrade/freqai/base_models/BaseTensorFlowModel.py
@@ -3,10 +3,10 @@ from time import time
 from typing import Any
 from pandas import DataFrame
-
+import numpy as np
 from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
 from freqtrade.freqai.freqai_interface import IFreqaiModel
-
+import tensorflow as tf
 logger = logging.getLogger(__name__)
@@ -17,6 +17,13 @@ class BaseTensorFlowModel(IFreqaiModel):
    User *must* inherit from this class and set fit() and predict().
    """
    def __init__(self, **kwargs):
        super().__init__(config=kwargs['config'])
        self.keras = True
        if self.ft_params.get("DI_threshold", 0):
            self.ft_params["DI_threshold"] = 0
            logger.warning("DI threshold is not configured for Keras models yet. Deactivating.")
    def train(
        self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
    ) -> Any:
@@ -68,3 +75,76 @@ class BaseTensorFlowModel(IFreqaiModel):
                    f"({end_time - start_time:.2f} secs) --------------------")
        return model
 class WindowGenerator:
    def __init__(
        self,
        input_width,
        label_width,
        shift,
        train_df=None,
        val_df=None,
        test_df=None,
        train_labels=None,
        val_labels=None,
        test_labels=None,
        batch_size=None,
    ):
        # Store the raw data.
        self.train_df = train_df
        self.val_df = val_df
        self.test_df = test_df
        self.train_labels = train_labels
        self.val_labels = val_labels
        self.test_labels = test_labels
        self.batch_size = batch_size
        self.input_width = input_width
        self.label_width = label_width
        self.shift = shift
        self.total_window_size = input_width + shift
        self.input_slice = slice(0, input_width)
        self.input_indices = np.arange(self.total_window_size)[self.input_slice]
    def make_dataset(self, data, labels=None):
        data = np.array(data, dtype=np.float32)
        if labels is not None:
            labels = np.array(labels, dtype=np.float32)
        ds = tf.keras.preprocessing.timeseries_dataset_from_array(
            data=data,
            targets=labels,
            sequence_length=self.total_window_size,
            sequence_stride=1,
            sampling_rate=1,
            shuffle=False,
            batch_size=self.batch_size,
        )
        return ds
    @property
    def train(self):
        return self.make_dataset(self.train_df, self.train_labels)
    @property
    def val(self):
        return self.make_dataset(self.val_df, self.val_labels)
    @property
    def test(self):
        return self.make_dataset(self.test_df, self.test_labels)
    @property
    def inference(self):
        return self.make_dataset(self.test_df)
    @property
    def example(self):
        """Get and cache an example batch of `inputs, labels` for plotting."""
        result = getattr(self, "_example", None)
        if result is None:
            # No example batch was found, so get one from the `.train` dataset
            result = next(iter(self.train))
            # And cache it for next time
            self._example = result
        return result
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -77,9 +77,10 @@ class FreqaiDataKitchen:
        self.backtest_predictions_folder: str = "backtesting_predictions"
        self.live = live
        self.pair = pair
        self.model_save_type = self.freqai_config.get('model_save_type', 'joblib')
        self.svm_model: linear_model.SGDOneClassSVM = None
-        self.keras: bool = self.freqai_config.get("keras", False)
+        # self.model_save_type: bool = self.freqai_config.get("keras", False)
        self.set_all_pairs()
        if not self.live:
            if not self.config["timerange"]:
@@ -569,7 +570,7 @@ class FreqaiDataKitchen:
        predict: bool = If true, inference an existing SVM model, else construct one
        """
-        if self.keras:
+        if self.model_save_type == 'keras':
            logger.warning(
                "SVM outlier removal not currently supported for Keras based models. "
                "Skipping user requested function."
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -73,10 +73,10 @@ class IFreqaiModel(ABC):
        self.identifier: str = self.freqai_info.get("identifier", "no_id_provided")
        self.scanning = False
        self.ft_params = self.freqai_info["feature_parameters"]
-        self.keras: bool = self.freqai_info.get("keras", False)
+        # self.keras: bool = self.freqai_info.get("keras", False)
-        if self.keras and self.ft_params.get("DI_threshold", 0):
+        # if self.keras and self.ft_params.get("DI_threshold", 0):
-            self.ft_params["DI_threshold"] = 0
+        #     self.ft_params["DI_threshold"] = 0
-            logger.warning("DI threshold is not configured for Keras models yet. Deactivating.")
+        #     logger.warning("DI threshold is not configured for Keras models yet. Deactivating.")
        self.CONV_WIDTH = self.freqai_info.get("conv_width", 2)
        if self.ft_params.get("inlier_metric_window", 0):
            self.CONV_WIDTH = self.ft_params.get("inlier_metric_window", 0) * 2
@@ -645,7 +645,8 @@ class IFreqaiModel(ABC):
        # # for keras type models, the conv_window needs to be prepended so
        # # viewing is correct in frequi
-        if self.freqai_info.get('keras', False) or self.ft_params.get('inlier_metric_window', 0):
+        if (not self.freqai_info.get('model_save_type', 'joblib') or
                self.ft_params.get('inlier_metric_window', 0)):
            n_lost_points = self.freqai_info.get('conv_width', 2)
            zeros_df = DataFrame(np.zeros((n_lost_points, len(hist_preds_df.columns))),
                                 columns=hist_preds_df.columns)
--- a/freqtrade/freqai/prediction_models/CNNPredictionModel.py
+++ b/freqtrade/freqai/prediction_models/CNNPredictionModel.py
@@ -0,0 +1,144 @@
 import logging
 from typing import Any, Dict, Tuple
 from pandas import DataFrame
 from freqtrade.exceptions import OperationalException
 from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
 import tensorflow as tf
 from freqtrade.freqai.base_models.BaseTensorFlowModel import BaseTensorFlowModel, WindowGenerator
 from tensorflow.keras.layers import Input, Conv1D, Dense
 from tensorflow.keras.models import Model
 import numpy as np
 logger = logging.getLogger(__name__)
 # tf.config.run_functions_eagerly(True)
 # tf.data.experimental.enable_debug_mode()
 MAX_EPOCHS = 10
 class CNNPredictionModel(BaseTensorFlowModel):
    """
    User created prediction model. The class needs to override three necessary
    functions, predict(), fit().
    """
    def fit(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen) -> Any:
        """
        User sets up the training and test data to fit their desired model here
        :params:
        :data_dictionary: the dictionary constructed by DataHandler to hold
        all the training and test data/labels.
        """
        train_df = data_dictionary["train_features"]
        train_labels = data_dictionary["train_labels"]
        test_df = data_dictionary["test_features"]
        test_labels = data_dictionary["test_labels"]
        n_labels = len(train_labels.columns)
        if n_labels > 1:
            raise OperationalException(
                "Neural Net not yet configured for multi-targets. Please "
                " reduce number of targets to 1 in strategy."
            )
        n_features = len(data_dictionary["train_features"].columns)
        BATCH_SIZE = self.freqai_info.get("batch_size", 64)
        input_dims = [BATCH_SIZE, self.CONV_WIDTH, n_features]
        w1 = WindowGenerator(
            input_width=self.CONV_WIDTH,
            label_width=1,
            shift=1,
            train_df=train_df,
            val_df=test_df,
            train_labels=train_labels,
            val_labels=test_labels,
            batch_size=BATCH_SIZE,
        )
        model = self.create_model(input_dims, n_labels)
        steps_per_epoch = np.ceil(len(test_df) / BATCH_SIZE)
        lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(
            0.001, decay_steps=steps_per_epoch * 1000, decay_rate=1, staircase=False
        )
        early_stopping = tf.keras.callbacks.EarlyStopping(
            monitor="loss", patience=3, mode="min", min_delta=0.0001
        )
        model.compile(
            loss=tf.losses.MeanSquaredError(),
            optimizer=tf.optimizers.Adam(lr_schedule),
            metrics=[tf.metrics.MeanAbsoluteError()],
        )
        model.fit(
            w1.train,
            epochs=MAX_EPOCHS,
            shuffle=False,
            validation_data=w1.val,
            callbacks=[early_stopping],
            verbose=1,
        )
        return model
    def predict(
        self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first=True
    ) -> Tuple[DataFrame, DataFrame]:
        """
        Filter the prediction features data and predict with it.
        :param: unfiltered_dataframe: Full dataframe for the current backtest period.
        :return:
        :predictions: np.array of predictions
        :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
        data (NaNs) or felt uncertain about data (PCA and DI index)
        """
        dk.find_features(unfiltered_dataframe)
        filtered_dataframe, _ = dk.filter_features(
            unfiltered_dataframe, dk.training_features_list, training_filter=False
        )
        filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe)
        dk.data_dictionary["prediction_features"] = filtered_dataframe
        # optional additional data cleaning/analysis
        self.data_cleaning_predict(dk, filtered_dataframe)
        if first:
            full_df = dk.data_dictionary["prediction_features"]
            w1 = WindowGenerator(
                input_width=self.CONV_WIDTH,
                label_width=1,
                shift=1,
                test_df=full_df,
                batch_size=len(full_df),
            )
            predictions = self.model.predict(w1.inference)
            len_diff = len(dk.do_predict) - len(predictions)
            if len_diff > 0:
                dk.do_predict = dk.do_predict[len_diff:]
        else:
            data = dk.data_dictionary["prediction_features"]
            data = tf.expand_dims(data, axis=0)
            predictions = self.model(data, training=False)
        predictions = predictions[:, 0, 0]
        pred_df = DataFrame(predictions, columns=dk.label_list)
        pred_df = dk.denormalize_labels_from_metadata(pred_df)
        return (pred_df, np.ones(len(pred_df)))
    def create_model(self, input_dims, n_labels) -> Any:
        input_layer = Input(shape=(input_dims[1], input_dims[2]))
        Layer_1 = Conv1D(filters=32, kernel_size=(self.CONV_WIDTH,), activation="relu")(input_layer)
        Layer_3 = Dense(units=32, activation="relu")(Layer_1)
        output_layer = Dense(units=n_labels)(Layer_3)
        return Model(inputs=input_layer, outputs=output_layer)