merge develop into feat/freqai-rl-dev

2022-10-30 10:13:03 +01:00
parent 8d7adfabe9 5c14aeddc6
commit d1a0874683
129 changed files with 2648 additions and 1004 deletions
@@ -1,4 +1,6 @@
 import logging
+import sys
+from pathlib import Path
 from typing import Any, Dict

 from catboost import CatBoostClassifier, Pool
@@ -20,9 +22,8 @@ class CatboostClassifier(BaseClassifierModel):
    def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
        """
        User sets up the training and test data to fit their desired model here
-        :params:
-        :data_dictionary: the dictionary constructed by DataHandler to hold
-        all the training and test data/labels.
+        :param data_dictionary: the dictionary constructed by DataHandler to hold
+                                all the training and test data/labels.
        """

        train_data = Pool(
@@ -30,15 +31,25 @@ class CatboostClassifier(BaseClassifierModel):
            label=data_dictionary["train_labels"],
            weight=data_dictionary["train_weights"],
        )
+        if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0:
+            test_data = None
+        else:
+            test_data = Pool(
+                data=data_dictionary["test_features"],
+                label=data_dictionary["test_labels"],
+                weight=data_dictionary["test_weights"],
+            )

        cbr = CatBoostClassifier(
-            allow_writing_files=False,
+            allow_writing_files=True,
            loss_function='MultiClass',
+            train_dir=Path(dk.data_path),
            **self.model_training_parameters,
        )

        init_model = self.get_init_model(dk.pair)

-        cbr.fit(train_data, init_model=init_model)
+        cbr.fit(X=train_data, eval_set=test_data, init_model=init_model,
+                log_cout=sys.stdout, log_cerr=sys.stderr)

        return cbr
@@ -1,4 +1,6 @@
 import logging
+import sys
+from pathlib import Path
 from typing import Any, Dict

 from catboost import CatBoostRegressor, Pool
@@ -41,10 +43,12 @@ class CatboostRegressor(BaseRegressionModel):
        init_model = self.get_init_model(dk.pair)

        model = CatBoostRegressor(
-            allow_writing_files=False,
+            allow_writing_files=True,
+            train_dir=Path(dk.data_path),
            **self.model_training_parameters,
        )

-        model.fit(X=train_data, eval_set=test_data, init_model=init_model)
+        model.fit(X=train_data, eval_set=test_data, init_model=init_model,
+                  log_cout=sys.stdout, log_cerr=sys.stderr)

        return model
@@ -1,4 +1,6 @@
 import logging
+import sys
+from pathlib import Path
 from typing import Any, Dict

 from catboost import CatBoostRegressor, Pool
@@ -26,7 +28,8 @@ class CatboostRegressorMultiTarget(BaseRegressionModel):
        """

        cbr = CatBoostRegressor(
-            allow_writing_files=False,
+            allow_writing_files=True,
+            train_dir=Path(dk.data_path),
            **self.model_training_parameters,
        )

@@ -56,8 +59,10 @@ class CatboostRegressorMultiTarget(BaseRegressionModel):

        fit_params = []
        for i in range(len(eval_sets)):
-            fit_params.append(
-                {'eval_set': eval_sets[i],  'init_model': init_models[i]})
+            fit_params.append({
+                    'eval_set': eval_sets[i],  'init_model': init_models[i],
+                    'log_cout': sys.stdout, 'log_cerr': sys.stderr,
+                 })

        model = FreqaiMultiOutputRegressor(estimator=cbr)
        thread_training = self.freqai_info.get('multitarget_parallel_training', False)
@@ -20,9 +20,8 @@ class LightGBMClassifier(BaseClassifierModel):
    def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
        """
        User sets up the training and test data to fit their desired model here
-        :params:
-        :data_dictionary: the dictionary constructed by DataHandler to hold
-        all the training and test data/labels.
+        :param data_dictionary: the dictionary constructed by DataHandler to hold
+                                all the training and test data/labels.
        """

        if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:
@@ -26,9 +26,8 @@ class XGBoostClassifier(BaseClassifierModel):
    def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
        """
        User sets up the training and test data to fit their desired model here
-        :params:
-        :data_dictionary: the dictionary constructed by DataHandler to hold
-        all the training and test data/labels.
+        :param data_dictionary: the dictionary constructed by DataHandler to hold
+                                all the training and test data/labels.
        """

        X = data_dictionary["train_features"].to_numpy()
@@ -65,7 +64,7 @@ class XGBoostClassifier(BaseClassifierModel):
    ) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
        """
        Filter the prediction features data and predict with it.
-        :param: unfiltered_df: Full dataframe for the current backtest period.
+        :param unfiltered_df: Full dataframe for the current backtest period.
        :return:
        :pred_df: dataframe containing the predictions
        :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
@@ -0,0 +1,84 @@
+import logging
+from typing import Any, Dict, Tuple
+
+import numpy as np
+import numpy.typing as npt
+import pandas as pd
+from pandas import DataFrame
+from pandas.api.types import is_integer_dtype
+from sklearn.preprocessing import LabelEncoder
+from xgboost import XGBRFClassifier
+
+from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel
+from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
+
+
+logger = logging.getLogger(__name__)
+
+
+class XGBoostRFClassifier(BaseClassifierModel):
+    """
+    User created prediction model. The class needs to override three necessary
+    functions, predict(), train(), fit(). The class inherits ModelHandler which
+    has its own DataHandler where data is held, saved, loaded, and managed.
+    """
+
+    def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
+        """
+        User sets up the training and test data to fit their desired model here
+        :param data_dictionary: the dictionary constructed by DataHandler to hold
+            all the training and test data/labels.
+        """
+
+        X = data_dictionary["train_features"].to_numpy()
+        y = data_dictionary["train_labels"].to_numpy()[:, 0]
+
+        le = LabelEncoder()
+        if not is_integer_dtype(y):
+            y = pd.Series(le.fit_transform(y), dtype="int64")
+
+        if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:
+            eval_set = None
+        else:
+            test_features = data_dictionary["test_features"].to_numpy()
+            test_labels = data_dictionary["test_labels"].to_numpy()[:, 0]
+
+            if not is_integer_dtype(test_labels):
+                test_labels = pd.Series(le.transform(test_labels), dtype="int64")
+
+            eval_set = [(test_features, test_labels)]
+
+        train_weights = data_dictionary["train_weights"]
+
+        init_model = self.get_init_model(dk.pair)
+
+        model = XGBRFClassifier(**self.model_training_parameters)
+
+        model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights,
+                  xgb_model=init_model)
+
+        return model
+
+    def predict(
+        self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
+    ) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
+        """
+        Filter the prediction features data and predict with it.
+        :param  unfiltered_df: Full dataframe for the current backtest period.
+        :return:
+        :pred_df: dataframe containing the predictions
+        :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
+        data (NaNs) or felt uncertain about data (PCA and DI index)
+        """
+
+        (pred_df, dk.do_predict) = super().predict(unfiltered_df, dk, **kwargs)
+
+        le = LabelEncoder()
+        label = dk.label_list[0]
+        labels_before = list(dk.data['labels_std'].keys())
+        labels_after = le.fit_transform(labels_before).tolist()
+        pred_df[label] = le.inverse_transform(pred_df[label])
+        pred_df = pred_df.rename(
+            columns={labels_after[i]: labels_before[i] for i in range(len(labels_before))})
+
+        return (pred_df, dk.do_predict)
@@ -0,0 +1,46 @@
+import logging
+from typing import Any, Dict
+
+from xgboost import XGBRFRegressor
+
+from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel
+from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
+
+
+logger = logging.getLogger(__name__)
+
+
+class XGBoostRFRegressor(BaseRegressionModel):
+    """
+    User created prediction model. The class needs to override three necessary
+    functions, predict(), train(), fit(). The class inherits ModelHandler which
+    has its own DataHandler where data is held, saved, loaded, and managed.
+    """
+
+    def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
+        """
+        User sets up the training and test data to fit their desired model here
+        :param data_dictionary: the dictionary constructed by DataHandler to hold
+                                all the training and test data/labels.
+        """
+
+        X = data_dictionary["train_features"]
+        y = data_dictionary["train_labels"]
+
+        if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0:
+            eval_set = None
+            eval_weights = None
+        else:
+            eval_set = [(data_dictionary["test_features"], data_dictionary["test_labels"])]
+            eval_weights = [data_dictionary['test_weights']]
+
+        sample_weight = data_dictionary["train_weights"]
+
+        xgb_model = self.get_init_model(dk.pair)
+
+        model = XGBRFRegressor(**self.model_training_parameters)
+
+        model.fit(X=X, y=y, sample_weight=sample_weight, eval_set=eval_set,
+                  sample_weight_eval_set=eval_weights, xgb_model=xgb_model)
+
+        return model
@@ -29,6 +29,7 @@ class XGBoostRegressor(BaseRegressionModel):

        if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0:
            eval_set = None
+            eval_weights = None
        else:
            eval_set = [(data_dictionary["test_features"], data_dictionary["test_labels"])]
            eval_weights = [data_dictionary['test_weights']]