flake8 passing, use pathlib in lieu of os.path to accommodate windows/mac OS

2022-05-04 17:42:34 +02:00
parent 93cf4cc262
commit 2a3347bc12
7 changed files with 593 additions and 439 deletions
--- a/freqtrade/templates/ExamplePredictionModel.py
+++ b/freqtrade/templates/ExamplePredictionModel.py
@@ -1,15 +1,17 @@
-import numpy as np
+from typing import Any, Dict, Tuple
+
 import pandas as pd
 from catboost import CatBoostRegressor, Pool
 from pandas import DataFrame
-from typing import Any, Dict, Tuple
+
 from freqtrade.freqai.freqai_interface import IFreqaiModel

+
 class ExamplePredictionModel(IFreqaiModel):
    """
    User created prediction model. The class needs to override three necessary
    functions, predict(), train(), fit(). The class inherits ModelHandler which
-    has its own DataHandler where data is held, saved, loaded, and managed. 
+    has its own DataHandler where data is held, saved, loaded, and managed.
    """

    def make_labels(self, dataframe: DataFrame) -> DataFrame:
@@ -19,15 +21,20 @@ class ExamplePredictionModel(IFreqaiModel):
        :dataframe: the full dataframe for the present training period
        """

-        dataframe['s'] = (dataframe['close'].shift(-self.feature_parameters['period']).rolling(
-            self.feature_parameters['period']).max() / dataframe['close'] - 1)
-        self.dh.data['s_mean'] = dataframe['s'].mean()
-        self.dh.data['s_std'] = dataframe['s'].std()
+        dataframe["s"] = (
+            dataframe["close"]
+            .shift(-self.feature_parameters["period"])
+            .rolling(self.feature_parameters["period"])
+            .max()
+            / dataframe["close"]
+            - 1
+        )
+        self.dh.data["s_mean"] = dataframe["s"].mean()
+        self.dh.data["s_std"] = dataframe["s"].std()

-        print('label mean',self.dh.data['s_mean'],'label std',self.dh.data['s_std'])
-
-        return dataframe['s']
+        print("label mean", self.dh.data["s_mean"], "label std", self.dh.data["s_std"])

+        return dataframe["s"]

    def train(self, unfiltered_dataframe: DataFrame, metadata: dict) -> Tuple[DataFrame, DataFrame]:
        """
@@ -35,7 +42,7 @@ class ExamplePredictionModel(IFreqaiModel):
        for storing, saving, loading, and managed.
        :params:
        :unfiltered_dataframe: Full dataframe for the current training period
-        :metadata: pair metadata from strategy. 
+        :metadata: pair metadata from strategy.
        :returns:
        :model: Trained model which can be used to inference (self.predict)
        """
@@ -46,8 +53,12 @@ class ExamplePredictionModel(IFreqaiModel):
        unfiltered_labels = self.make_labels(unfiltered_dataframe)

        # filter the features requested by user in the configuration file and elegantly handle NaNs
-        features_filtered, labels_filtered = self.dh.filter_features(unfiltered_dataframe, 
-                            self.dh.training_features_list, unfiltered_labels, training_filter=True)
+        features_filtered, labels_filtered = self.dh.filter_features(
+            unfiltered_dataframe,
+            self.dh.training_features_list,
+            unfiltered_labels,
+            training_filter=True,
+        )

        # split data into train/test data.
        data_dictionary = self.dh.make_train_test_datasets(features_filtered, labels_filtered)
@@ -55,46 +66,47 @@ class ExamplePredictionModel(IFreqaiModel):
        data_dictionary = self.dh.standardize_data(data_dictionary)

        # optional additional data cleaning
-        if self.feature_parameters['principal_component_analysis']:
+        if self.feature_parameters["principal_component_analysis"]:
            self.dh.principal_component_analysis()
        if self.feature_parameters["remove_outliers"]:
            self.dh.remove_outliers(predict=False)
-        if self.feature_parameters['DI_threshold']:
-            self.dh.data['avg_mean_dist'] = self.dh.compute_distances()
+        if self.feature_parameters["DI_threshold"]:
+            self.dh.data["avg_mean_dist"] = self.dh.compute_distances()

-        print("length of train data", len(data_dictionary['train_features']))
+        print("length of train data", len(data_dictionary["train_features"]))

        model = self.fit(data_dictionary)

-        print('Finished training')
+        print("Finished training")
        print(f'--------------------done training {metadata["pair"]}--------------------')

        return model

    def fit(self, data_dictionary: Dict) -> Any:
        """
-        Most regressors use the same function names and arguments e.g. user 
+        Most regressors use the same function names and arguments e.g. user
        can drop in LGBMRegressor in place of CatBoostRegressor and all data
        management will be properly handled by Freqai.
        :params:
-        :data_dictionary: the dictionary constructed by DataHandler to hold 
+        :data_dictionary: the dictionary constructed by DataHandler to hold
        all the training and test data/labels.
        """

        train_data = Pool(
-            data=data_dictionary['train_features'],
-            label=data_dictionary['train_labels'],
-            weight=data_dictionary['train_weights']
+            data=data_dictionary["train_features"],
+            label=data_dictionary["train_labels"],
+            weight=data_dictionary["train_weights"],
        )

        test_data = Pool(
-            data=data_dictionary['test_features'],
-            label=data_dictionary['test_labels'],
-            weight=data_dictionary['test_weights']
+            data=data_dictionary["test_features"],
+            label=data_dictionary["test_labels"],
+            weight=data_dictionary["test_weights"],
        )

-        model = CatBoostRegressor(verbose=100, early_stopping_rounds=400,
-                            **self.model_training_parameters)
+        model = CatBoostRegressor(
+            verbose=100, early_stopping_rounds=400, **self.model_training_parameters
+        )
        model.fit(X=train_data, eval_set=test_data)

        return model
@@ -103,7 +115,7 @@ class ExamplePredictionModel(IFreqaiModel):
        """
        Filter the prediction features data and predict with it.
        :param: unfiltered_dataframe: Full dataframe for the current backtest period.
-        :return: 
+        :return:
        :predictions: np.array of predictions
        :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
        data (NaNs) or felt uncertain about data (PCA and DI index)
@@ -112,27 +124,31 @@ class ExamplePredictionModel(IFreqaiModel):
        print("--------------------Starting prediction--------------------")

        original_feature_list = self.dh.build_feature_list(self.config)
-        filtered_dataframe, _ = self.dh.filter_features(unfiltered_dataframe, original_feature_list, training_filter=False)
+        filtered_dataframe, _ = self.dh.filter_features(
+            unfiltered_dataframe, original_feature_list, training_filter=False
+        )
        filtered_dataframe = self.dh.standardize_data_from_metadata(filtered_dataframe)
-        self.dh.data_dictionary['prediction_features'] = filtered_dataframe
+        self.dh.data_dictionary["prediction_features"] = filtered_dataframe

-        # optional additional data cleaning 
-        if self.feature_parameters['principal_component_analysis']:
+        # optional additional data cleaning
+        if self.feature_parameters["principal_component_analysis"]:
            pca_components = self.dh.pca.transform(filtered_dataframe)
-            self.dh.data_dictionary['prediction_features'] = pd.DataFrame(data=pca_components,
-                              columns = ['PC'+str(i) for i in range(0,self.dh.data['n_kept_components'])],
-                              index = filtered_dataframe.index)
-        
+            self.dh.data_dictionary["prediction_features"] = pd.DataFrame(
+                data=pca_components,
+                columns=["PC" + str(i) for i in range(0, self.dh.data["n_kept_components"])],
+                index=filtered_dataframe.index,
+            )
+
        if self.feature_parameters["remove_outliers"]:
-            self.dh.remove_outliers(predict=True) # creates dropped index
+            self.dh.remove_outliers(predict=True)  # creates dropped index

-        if self.feature_parameters['DI_threshold']:
-            self.dh.check_if_pred_in_training_spaces() # sets do_predict
+        if self.feature_parameters["DI_threshold"]:
+            self.dh.check_if_pred_in_training_spaces()  # sets do_predict

-        predictions = self.model.predict(self.dh.data_dictionary['prediction_features'])
+        predictions = self.model.predict(self.dh.data_dictionary["prediction_features"])

        # compute the non-standardized predictions
-        predictions = predictions * self.dh.data['labels_std'] + self.dh.data['labels_mean']
+        predictions = predictions * self.dh.data["labels_std"] + self.dh.data["labels_mean"]

        print("--------------------Finished prediction--------------------")