From 40f00196ebe4abc91b9987bf4365ea43f48c0eee Mon Sep 17 00:00:00 2001 From: robcaulk Date: Fri, 22 Jul 2022 17:37:51 +0200 Subject: [PATCH 01/16] use cloudpickle in place of pickle. define Paths once in data_drawer. --- freqtrade/freqai/data_drawer.py | 49 +++++++++++++++++--------------- freqtrade/freqai/data_kitchen.py | 36 ++++------------------- 2 files changed, 31 insertions(+), 54 deletions(-) diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py index c89394c09..b0493e766 100644 --- a/freqtrade/freqai/data_drawer.py +++ b/freqtrade/freqai/data_drawer.py @@ -1,7 +1,6 @@ import collections import json import logging -import pickle import re import shutil import threading @@ -10,6 +9,7 @@ from typing import Any, Dict, Tuple import numpy as np import pandas as pd +from joblib.externals import cloudpickle from pandas import DataFrame @@ -41,6 +41,12 @@ class FreqaiDataDrawer: self.historic_predictions: Dict[str, Any] = {} self.follower_dict: Dict[str, Any] = {} self.full_path = full_path + self.follower_name = self.config.get("bot_name", "follower1") + self.follower_dict_path = Path( + self.full_path / f"follower_dictionary-{self.follower_name}.json" + ) + self.historic_predictions_path = Path(self.full_path / "historic_predictions.pkl") + self.pair_dictionary_path = Path(self.full_path / "pair_dictionary.json") self.follow_mode = follow_mode if follow_mode: self.create_follower_dict() @@ -56,9 +62,9 @@ class FreqaiDataDrawer: :returns: exists: bool = whether or not the drawer was located """ - exists = Path(self.full_path / str("pair_dictionary.json")).resolve().exists() + exists = self.pair_dictionary_path.is_file() # resolve().exists() if exists: - with open(self.full_path / str("pair_dictionary.json"), "r") as fp: + with open(self.pair_dictionary_path, "r") as fp: self.pair_dict = json.load(fp) elif not self.follow_mode: logger.info("Could not find existing datadrawer, starting from scratch") @@ -76,13 +82,15 @@ class FreqaiDataDrawer: :returns: exists: bool = whether or not the drawer was located """ - exists = Path(self.full_path / str("historic_predictions.pkl")).resolve().exists() + exists = self.historic_predictions_path.is_file() # resolve().exists() if exists: - with open(self.full_path / str("historic_predictions.pkl"), "rb") as fp: - self.historic_predictions = pickle.load(fp) - logger.info(f"Found existing historic predictions at {self.full_path}, but beware " - "that statistics may be inaccurate if the bot has been offline for " - "an extended period of time.") + with open(self.historic_predictions_path, "rb") as fp: + self.historic_predictions = cloudpickle.load(fp) + logger.info( + f"Found existing historic predictions at {self.full_path}, but beware " + "that statistics may be inaccurate if the bot has been offline for " + "an extended period of time." + ) elif not self.follow_mode: logger.info("Could not find existing historic_predictions, starting from scratch") else: @@ -97,37 +105,34 @@ class FreqaiDataDrawer: """ Save data drawer full of all pair model metadata in present model folder. """ - with open(self.full_path / str("historic_predictions.pkl"), "wb") as fp: - pickle.dump(self.historic_predictions, fp, protocol=pickle.HIGHEST_PROTOCOL) + with open(self.historic_predictions_path, "wb") as fp: + cloudpickle.dump(self.historic_predictions, fp, protocol=cloudpickle.DEFAULT_PROTOCOL) def save_drawer_to_disk(self): """ Save data drawer full of all pair model metadata in present model folder. """ - with open(self.full_path / str("pair_dictionary.json"), "w") as fp: + with open(self.pair_dictionary_path, "w") as fp: json.dump(self.pair_dict, fp, default=self.np_encoder) def save_follower_dict_to_disk(self): """ Save follower dictionary to disk (used by strategy for persistent prediction targets) """ - follower_name = self.config.get("bot_name", "follower1") - with open( - self.full_path / str("follower_dictionary-" + follower_name + ".json"), "w" - ) as fp: + with open(self.follower_dict_path, "w") as fp: json.dump(self.follower_dict, fp, default=self.np_encoder) def create_follower_dict(self): """ Create or dictionary for each follower to maintain unique persistent prediction targets """ - follower_name = self.config.get("bot_name", "follower1") + whitelist_pairs = self.config.get("exchange", {}).get("pair_whitelist") exists = ( - Path(self.full_path / str("follower_dictionary-" + follower_name + ".json")) - .resolve() - .exists() + self.follower_dict_path.is_file() + # .resolve() + # .exists() ) if exists: @@ -136,9 +141,7 @@ class FreqaiDataDrawer: for pair in whitelist_pairs: self.follower_dict[pair] = {} - with open( - self.full_path / str("follower_dictionary-" + follower_name + ".json"), "w" - ) as fp: + with open(self.follow_path, "w") as fp: json.dump(self.follower_dict, fp, default=self.np_encoder) def np_encoder(self, object): diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index cfa0d3818..4ba6badf9 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -2,7 +2,6 @@ import copy import datetime import json import logging -import pickle as pk import shutil from pathlib import Path from typing import Any, Dict, List, Tuple @@ -11,6 +10,7 @@ import numpy as np import numpy.typing as npt import pandas as pd from joblib import dump, load # , Parallel, delayed # used for auto distribution assignment +from joblib.externals import cloudpickle from pandas import DataFrame from sklearn import linear_model from sklearn.metrics.pairwise import pairwise_distances @@ -130,7 +130,7 @@ class FreqaiDataKitchen: ) if self.freqai_config.get("feature_parameters", {}).get("principal_component_analysis"): - pk.dump( + cloudpickle.dump( self.pca, open(self.data_path / str(self.model_filename + "_pca_object.pkl"), "wb") ) @@ -192,7 +192,7 @@ class FreqaiDataKitchen: ) if self.config["freqai"]["feature_parameters"]["principal_component_analysis"]: - self.pca = pk.load( + self.pca = cloudpickle.load( open(self.data_path / str(self.model_filename + "_pca_object.pkl"), "rb") ) @@ -433,7 +433,7 @@ class FreqaiDataKitchen: tr_training_list_timerange = [] tr_backtesting_list_timerange = [] first = True - # within_config_timerange = True + while True: if not first: timerange_train.startts = timerange_train.startts + bt_period @@ -475,7 +475,7 @@ class FreqaiDataKitchen: :df: Dataframe containing all candles to run the entire backtest. Here it is sliced down to just the present training period. """ - # timerange = TimeRange.parse_timerange(tr) + start = datetime.datetime.fromtimestamp(timerange.startts, tz=datetime.timezone.utc) stop = datetime.datetime.fromtimestamp(timerange.stopts, tz=datetime.timezone.utc) df = df.loc[df["date"] >= start, :] @@ -1132,32 +1132,6 @@ class FreqaiDataKitchen: # Functions containing useful data manpulation examples. but not actively in use. - # def build_feature_list(self, config: dict, metadata: dict) -> list: - # """ - # SUPERCEDED BY self.find_features() - # Build the list of features that will be used to filter - # the full dataframe. Feature list is construced from the - # user configuration file. - # :params: - # :config: Canonical freqtrade config file containing all - # user defined input in config['freqai] dictionary. - # """ - # features = [] - # for tf in config["freqai"]["timeframes"]: - # for ft in config["freqai"]["base_features"]: - # for n in range(config["freqai"]["feature_parameters"]["shift"] + 1): - # shift = "" - # if n > 0: - # shift = "_shift-" + str(n) - # features.append(metadata['pair'].split("/")[0] + "-" + ft + shift + "_" + tf) - # for p in config["freqai"]["corr_pairlist"]: - # if metadata['pair'] in p: - # continue # avoid duplicate features - # features.append(p.split("/")[0] + "-" + ft + shift + "_" + tf) - - # # logger.info("number of features %s", len(features)) - # return features - # Possibly phasing these outlier removal methods below out in favor of # use_SVM_to_remove_outliers (computationally more efficient and apparently higher performance). # But these have good data manipulation examples, so keep them commented here for now. From 5559e605b801a70a26ce134dd6d54faea74b6520 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Fri, 22 Jul 2022 17:46:14 +0200 Subject: [PATCH 02/16] small PR conversation resolutions --- freqtrade/freqai/freqai_interface.py | 2 +- freqtrade/resolvers/freqaimodel_resolver.py | 2 +- freqtrade/strategy/interface.py | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 7f2fd677c..55f5a03a0 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -511,7 +511,7 @@ class IFreqaiModel(ABC): """ @abstractmethod - def fit(self) -> Any: + def fit(self, data_dictionary: Dict[str, Any]) -> Any: """ Most regressors use the same function names and arguments e.g. user can drop in LGBMRegressor in place of CatBoostRegressor and all data diff --git a/freqtrade/resolvers/freqaimodel_resolver.py b/freqtrade/resolvers/freqaimodel_resolver.py index 0fcfca363..5a847bb2b 100644 --- a/freqtrade/resolvers/freqaimodel_resolver.py +++ b/freqtrade/resolvers/freqaimodel_resolver.py @@ -44,7 +44,7 @@ class FreqaiModelResolver(IResolver): ) if freqaimodel_name in disallowed_models: raise OperationalException( - f"{freqaimodel_name} is a baseclass and cannot be used directly. User must choose " + f"{freqaimodel_name} is a baseclass and cannot be used directly. Please choose " "an existing child class or inherit from this baseclass.\n" ) freqaimodel = FreqaiModelResolver.load_object( diff --git a/freqtrade/strategy/interface.py b/freqtrade/strategy/interface.py index ed458786b..df73981bd 100644 --- a/freqtrade/strategy/interface.py +++ b/freqtrade/strategy/interface.py @@ -555,7 +555,8 @@ class IStrategy(ABC, HyperStrategyMixin): Function designed to automatically generate, name and merge features from user indicated timeframes in the configuration file. User can add additional features here, but must follow the naming convention. - Defined in IStrategy because Freqai needs to know it exists. + This method is *only* used in FreqaiDataKitchen class and therefore + it is only called if FreqAI is active. :params: :pair: pair to be used as informative :df: strategy dataframe which will receive merges from informatives From 36dc9be7aad861651553baf49b1ed0c43ee823ee Mon Sep 17 00:00:00 2001 From: Matthias Date: Fri, 22 Jul 2022 20:27:25 +0200 Subject: [PATCH 03/16] Update some docs wording --- docs/freqai.md | 186 +++++++++++++++++++++++++------------------------ 1 file changed, 94 insertions(+), 92 deletions(-) diff --git a/docs/freqai.md b/docs/freqai.md index dbe5d1893..1e597696a 100644 --- a/docs/freqai.md +++ b/docs/freqai.md @@ -11,18 +11,20 @@ Among the the features included: * Sweep model training and backtesting to simulate consistent model retraining through time. * Remove outliers automatically from training and prediction sets using a Dissimilarity Index and Support Vector Machines. * Reduce the dimensionality of the data with Principal Component Analysis. -* Store models to disk to make reloading from a crash fast and easy (and purge obsolete files automatically for sustained dry/live runs.) +* Store models to disk to make reloading from a crash fast and easy (and purge obsolete files automatically for sustained dry/live runs). * Normalize the data automatically in a smart and statistically safe way. * Automated data download and data handling. -* Clean the incoming data and of NaNs in a safe way and before training and prediction. +* Clean the incoming data of NaNs in a safe way before training and prediction. * Retrain live automatically so that the model self-adapts to the market in an unsupervised manner. ## General approach The user provides FreqAI with a set of custom indicators (created inside the strategy the same way -a typical Freqtrade strategy is created) as well as a target value (typically some price change into the future). FreqAI trains a model to predict the target value based on the input of custom indicators. -FreqAI will train and save a new model for each pair in the config whitelist. -Users employ FreqAI to backtest a strategy (emulate reality with retraining a model as new data is introduced) and run the model live to generate buy and sell signals. In dry/live, FreqAI works in a background thread to keep all models as updated as possible with consistent retraining. +a typical Freqtrade strategy is created) as well as a target value (typically some price change into the future). +FreqAI trains a model to predict the target value based on the input of custom indicators. +FreqAI will train and save a new model for each pair in the config whitelist. +Users employ FreqAI to backtest a strategy (emulate reality with retraining a model as new data is introduced) and run the model live to generate entry and exit signals. +In dry/live, FreqAI works in a background thread to keep all models as updated as possible with consistent retraining. ## Background and vocabulary @@ -32,16 +34,14 @@ builds the features from anything they can construct in the strategy. **Labels** are the target values with which the weights inside a model are trained toward. Each set of features is associated with a single label, which is also -defined within the strategy by the user. These labels look forward into the +defined within the strategy by the user. These labels intentionally look into the future, and are not available to the model during dryrun/live/backtesting. **Training** refers to the process of feeding individual feature sets into the -model with associated labels with the goal of matching input feature sets to -associated labels. +model with associated labels with the goal of matching input feature sets to associated labels. **Train data** is a subset of the historic data which is fed to the model during -training to adjust weights. This data directly influences weight connections -in the model. +training to adjust weights. This data directly influences weight connections in the model. **Test data** is a subset of the historic data which is used to evaluate the intermediate performance of the model during training. This data does not @@ -51,15 +51,17 @@ directly influence nodal weights within the model. Use `pip` to install the prerequisites with: -`pip install -r requirements-freqai.txt` +``` bash +pip install -r requirements-freqai.txt +``` ## Running from the example files -An example strategy, an example prediction model, and example config can all be found in -`freqtrade/templates/FreqaiExampleStrategy.py`, -`freqtrade/freqai/prediction_models/LightGBMPredictionModel.py`, -`config_examples/config_freqai_futures.example.json`, respectively. Assuming the user has downloaded -the necessary data, Freqai can be executed from these templates with: +An example strategy, an example prediction model, and example config can all be found in +`freqtrade/templates/FreqaiExampleStrategy.py`, `freqtrade/freqai/prediction_models/LightGBMPredictionModel.py`, +`config_examples/config_freqai_futures.example.json`, respectively. + +Assuming the user has downloaded the necessary data, Freqai can be executed from these templates with: ```bash freqtrade backtesting --config config_examples/config_freqai.example.json --strategy FreqaiExampleStrategy --freqaimodel LightGBMPredictionModel --strategy-path freqtrade/templates --timerange 20220101-20220201 @@ -68,6 +70,7 @@ freqtrade backtesting --config config_examples/config_freqai.example.json --stra ## Configuring the bot ### Parameter table + The table below will list all configuration parameters available for `FreqAI`. Mandatory parameters are marked as **Required**, which means that they are required to be set in one of the possible ways. @@ -75,14 +78,14 @@ Mandatory parameters are marked as **Required**, which means that they are requi | Parameter | Description | |------------|-------------| | `freqai` | **Required.** The dictionary containing all the parameters for controlling FreqAI.
**Datatype:** dictionary. -| `identifier` | **Required.** A unique name for the current model. This can be reused to reload pretrained models/data.
**Datatype:** string. +| `identifier` | **Required.** A unique name for the current model. This can be reused to reload pre-trained models/data.
**Datatype:** string. | `train_period_days` | **Required.** Number of days to use for the training data (width of the sliding window).
**Datatype:** positive integer. | `backtest_period_days` | **Required.** Number of days to inference into the trained model before sliding the window and retraining. This can be fractional days, but beware that the user provided `timerange` will be divided by this number to yield the number of trainings necessary to complete the backtest.
**Datatype:** Float. | `live_retrain_hours` | Frequency of retraining during dry/live runs. Default set to 0, which means it will retrain as often as possible. **Datatype:** Float > 0. -| `follow_mode` | If true, this instance of FreqAI will look for models associated with `identifier` and load those for inferencing. A `follower` will **not** train new models. False by default.
**Datatype:** boolean. +| `follow_mode` | If true, this instance of FreqAI will look for models associated with `identifier` and load those for inferencing. A `follower` will **not** train new models. `False` by default.
**Datatype:** boolean. | `live_trained_timestamp` | Useful if user wants to start from models trained during a *backtest*. The timestamp can be located in the `user_data/models` backtesting folder. This is not a commonly used parameter, leave undefined for most applications.
**Datatype:** positive integer. | `fit_live_predictions_candles` | Computes target (label) statistics from prediction data, instead of from the training data set. Number of candles is the number of historical candles it uses to generate the statistics.
**Datatype:** positive integer. -| `purge_old_models` | Tell FreqAI to delete obsolete models. Otherwise, all historic models will remain on disk. Defaults to False.
**Datatype:** boolean. +| `purge_old_models` | Tell FreqAI to delete obsolete models. Otherwise, all historic models will remain on disk. Defaults to `False`.
**Datatype:** boolean. | `expiration_hours` | Ask FreqAI to avoid making predictions if a model is more than `expiration_hours` old. Defaults to 0 which means models never expire.
**Datatype:** positive integer. | | **Feature Parameters** | `feature_parameters` | A dictionary containing the parameters used to engineer the feature set. Details and examples shown [here](#building-the-feature-set)
**Datatype:** dictionary. @@ -98,24 +101,25 @@ Mandatory parameters are marked as **Required**, which means that they are requi | `stratify_training_data` | This value is used to indicate the stratification of the data. e.g. 2 would set every 2nd data point into a separate dataset to be pulled from during training/testing.
**Datatype:** positive integer. | `indicator_max_period_candles` | The maximum *period* used in `populate_any_indicators()` for indicator creation. FreqAI uses this information in combination with the maximum timeframe to calculate how many data points it should download so that the first data point does not have a NaN
**Datatype:** positive integer. | `indicator_periods_candles` | A list of integers used to duplicate all indicators according to a set of periods and add them to the feature set.
**Datatype:** list of positive integers. -| | **Data split parameters** -| `data_split_parameters` | include any additional parameters available from Scikit-learn `test_train_split()`, which are shown [here](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html)
**Datatype:** dictionary. +| | **Data split parameters** +| `data_split_parameters` | Include any additional parameters available from Scikit-learn `test_train_split()`, which are shown [here](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html)
**Datatype:** dictionary. | `test_size` | Fraction of data that should be used for testing instead of training.
**Datatype:** positive float below 1. | `shuffle` | Shuffle the training data points during training. Typically for time-series forecasting, this is set to False. **Datatype:** boolean. -| | **Model training parameters** +| | **Model training parameters** | `model_training_parameters` | A flexible dictionary that includes all parameters available by the user selected library. For example, if the user uses `LightGBMPredictionModel`, then this dictionary can contain any parameter available by the `LightGBMRegressor` [here](https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.LGBMRegressor.html). If the user selects a different model, then this dictionary can contain any parameter from that different model.
**Datatype:** dictionary. | `n_estimators` | A common parameter among regressors which sets the number of boosted trees to fit
**Datatype:** integer. | `learning_rate` | A common parameter among regressors which sets the boosting learning rate.
**Datatype:** float. | `n_jobs`, `thread_count`, `task_type` | Different libraries use different parameter names to control the number of threads used for parallel processing or whether or not it is a `task_type` of `gpu` or `cpu`.
**Datatype:** float. ### Return values for use in strategy + Here are the values you can expect to receive inside the dataframe returned by FreqAI: | Parameter | Description | |------------|-------------| | `&-s*` | user defined labels in the user made strategy. Anything prepended with `&` is treated as a training target inside FreqAI. These same dataframe columns names are fed back to the user as the predictions. For example, the user wishes to predict the price change in the next 40 candles (similar to `templates/FreqaiExampleStrategy.py`) by setting `&-s_close`. FreqAI makes the predictions and gives them back to the user under the same key (`&-s_close`) to be used in `populate_entry/exit_trend()`.
**Datatype:** depends on the output of the model. -| `&-s*_std/mean` | The standard deviation and mean values of the user defined labels during training (or live tracking with `fit_live_predictions_candles`). Commonly used to understand rarity of prediction (use the z-score as shown in `templates/FreqaiExampleStrategy.py` to evaluate how often a particular prediction was observed during training (or historically with `fit_live_predictions_candles`)
**Datatype:** floats. -| `do_predict` | An indication of an outlier, this return value is integer between -1 and 2 which lets the user understand if the prediction is trustworthy or not. `do_predict==1` means the prediction is trustworthy. If the [Dissimilartiy Index](#removing-outliers-with-the-dissimilarity-index) is above the user defined treshold, it will subtract 1 from `do_predict`. If `use_SVM_to_remove_outliers()` is active, then the Support Vector Machine (SVM) may also detect outliers in training and prediction data. In this case, the SVM will also subtract one from `do_predict`. A particular case is when `do_predict == 2`, it means that the model has expired due to `expired_hours`.
**Datatype:** integer between -1 and 2. +| `&-s*_std/mean` | The standard deviation and mean values of the user defined labels during training (or live tracking with `fit_live_predictions_candles`). Commonly used to understand rarity of prediction (use the z-score as shown in `templates/FreqaiExampleStrategy.py` to evaluate how often a particular prediction was observed during training (or historically with `fit_live_predictions_candles`)
**Datatype:** float. +| `do_predict` | An indication of an outlier, this return value is integer between -1 and 2 which lets the user understand if the prediction is trustworthy or not. `do_predict==1` means the prediction is trustworthy. If the [Dissimilarity Index](#removing-outliers-with-the-dissimilarity-index) is above the user defined threshold, it will subtract 1 from `do_predict`. If `use_SVM_to_remove_outliers()` is active, then the Support Vector Machine (SVM) may also detect outliers in training and prediction data. In this case, the SVM will also subtract one from `do_predict`. A particular case is when `do_predict == 2`, it means that the model has expired due to `expired_hours`.
**Datatype:** integer between -1 and 2. | `DI_values` | The raw Dissimilarity Index values to give the user a sense of confidence in the prediction. Lower DI means the data point is closer to the trained parameter space.
**Datatype:** float. ### Example config file @@ -159,13 +163,13 @@ config setup includes: ### Building the feature set Features are added by the user inside the `populate_any_indicators()` method of the strategy -by prepending indicators with `%` and labels are added by prependng `&`. There are some important -components/structures that the user *must* include when building their feature set. As shown below, -`with self.model.bridge.lock:` must be used to ensure thread safety - especially when using third -party libraries for indicator construction such as TA-lib. Another structure to consider is the -location of the labels at the bottom of the example function (below `if set_generalized_indicators:`). +by prepending indicators with `%` and labels are added by prepending `&`. +There are some important components/structures that the user *must* include when building their feature set. +As shown below, `with self.model.bridge.lock:` must be used to ensure thread safety - especially when using third +party libraries for indicator construction such as TA-lib. +Another structure to consider is the location of the labels at the bottom of the example function (below `if set_generalized_indicators:`). This is where the user will add single features and labels to their feature set to avoid duplication from -various configuration paramters which multiply the feature set such as `include_timeframes`. +various configuration parameters which multiply the feature set such as `include_timeframes`. ```python def populate_any_indicators( @@ -252,12 +256,13 @@ various configuration paramters which multiply the feature set such as `include_ return df ``` -The user of the present example does not want to pass the `bb_lowerband` as a feature to the model, + +The user of the present example does not want to pass the `bb_lowerband` as a feature to the model, and has therefore not prepended it with `%`. The user does, however, wish to pass `bb_width` to the -model for training/prediction and has therfore prepended it with `%`._ +model for training/prediction and has therefore prepended it with `%`. Note: features **must** be defined in `populate_any_indicators()`. Making features in `populate_indicators()` -will fail in live/dry. If the user wishes to add generalized features that are not associated with +will fail in live/dry mode. If the user wishes to add generalized features that are not associated with a specific pair or timeframe, they should use the following structure inside `populate_any_indicators()` (as exemplified in `freqtrade/templates/FreqaiExampleStrategy.py`: @@ -299,35 +304,34 @@ set will include all the features from `populate_any_indicators` on all the `inc `ETH/USD`, `LINK/USD`, and `BNB/USD`. `include_shifted_candles` is another user controlled parameter which indicates the number of previous -candles to include in the present feature set. In other words, `innclude_shifted_candles: 2`, tells -Freqai to include the the past 2 candles for each of the features included -in the dataset. +candles to include in the present feature set. In other words, `include_shifted_candles: 2`, tells +Freqai to include the the past 2 candles for each of the features included in the dataset. -In total, the number of features the present user has created is:_ +In total, the number of features the present user has created is: -legnth of `include_timeframes` * no. features in `populate_any_indicators()` * legnth of `include_corr_pairlist` * no. `include_shifted_candles` * length of `indicator_periods_candles`_ -3 * 3 * 3 * 2 * 2 = 108._ +length of `include_timeframes` * no. features in `populate_any_indicators()` * length of `include_corr_pairlist` * no. `include_shifted_candles` * length of `indicator_periods_candles` +_3 * 3 * 3 * 2 * 2 = 108_. ### Deciding the sliding training window and backtesting duration Users define the backtesting timerange with the typical `--timerange` parameter in the user configuration file. `train_period_days` is the duration of the sliding training window, while -`backtest_period_days` is the sliding backtesting window, both in number of days (backtest_period_days can be +`backtest_period_days` is the sliding backtesting window, both in number of days (`backtest_period_days` can be a float to indicate sub daily retraining in live/dry mode). In the present example, the user is asking Freqai to use a training period of 30 days and backtest the subsequent 7 days. -This means that if the user sets `--timerange 20210501-20210701`, +This means that if the user sets `--timerange 20210501-20210701`, Freqai will train 8 separate models (because the full range comprises 8 weeks), and then backtest the subsequent week associated with each of the 8 training data set timerange months. Users can think of this as a "sliding window" which emulates Freqai retraining itself once per week in live using the previous -month of data._ +month of data. In live, the required training data is automatically computed and downloaded. However, in backtesting the user must manually enter the required number of `startup_candles` in the config. This value -is used to increase the available data to FreqAI and should be sufficient to enable all indicators -to be NaN free at the beginning of the first training timerange. This boils down to identifying the +is used to increase the available data to FreqAI and should be sufficient to enable all indicators +to be NaN free at the beginning of the first training timerange. This boils down to identifying the highest timeframe (`4h` in present example) and the longest indicator period (25 in present example) -and adding this to the `train_period_days`. The units need to be in the base candle time frame:_ +and adding this to the `train_period_days`. The units need to be in the base candle time frame: `startup_candles` = ( 4 hours * 25 max period * 60 minutes/hour + 30 day train_period_days * 1440 minutes per day ) / 5 min (base time frame) = 1488. @@ -344,16 +348,16 @@ The freqai training/backtesting module can be executed with the following comman freqtrade backtesting --strategy FreqaiExampleStrategy --config config_freqai_futures.example.json --freqaimodel LightGBMPredictionModel --timerange 20210501-20210701 ``` -If this command has never been executed with the existing config file, then it will train a new model -for each pair, for each backtesting window within the bigger `--timerange`._ +If this command has never been executed with the existing config file, then it will train a new model +for each pair, for each backtesting window within the bigger `--timerange`. --- -**NOTE** -Once the training is completed, the user can execute this again with the same config file and -FreqAI will find the trained models and load them instead of spending time training. This is useful -if the user wants to tweak (or even hyperopt) buy and sell criteria inside the strategy. IF the user -*wants* to retrain a new model with the same config file, then he/she should simply change the `identifier`. -This way, the user can return to using any model they wish by simply changing the `identifier`. +!!! Note "Model reuse" + Once the training is completed, the user can execute this again with the same config file and + FreqAI will find the trained models and load them instead of spending time training. This is useful + if the user wants to tweak (or even hyperopt) buy and sell criteria inside the strategy. IF the user + *wants* to retrain a new model with the same config file, then he/she should simply change the `identifier`. + This way, the user can return to using any model they wish by simply changing the `identifier`. --- @@ -402,8 +406,9 @@ the feature set with a proper naming convention for the IFreqaiModel to use late ### Building an IFreqaiModel -FreqAI has multiple example prediction model based libraries such as `Catboost` regression (`freqai/prediction_models/CatboostPredictionModel.py`) and `LightGBM` regression. However, users can customize and create -their own prediction models using the `IFreqaiModel` class. Users are encouraged to inherit `train()` and `predict()` to let them customize various aspects of their training procedures. +FreqAI has multiple example prediction model based libraries such as `Catboost` regression (`freqai/prediction_models/CatboostPredictionModel.py`) and `LightGBM` regression. +However, users can customize and create their own prediction models using the `IFreqaiModel` class. +Users are encouraged to inherit `train()` and `predict()` to let them customize various aspects of their training procedures. ### Running the model live @@ -431,28 +436,28 @@ the same `identifier` parameter } ``` -In this case, although Freqai will initiate with a +In this case, although Freqai will initiate with a pre-trained model, it will still check to see how much time has elapsed since the model was trained, and if a full `live_retrain_hours` has elapsed since the end of the loaded model, FreqAI will self retrain. -It is common to want constant retraining, in whichcase, user should set `live_retrain_hours` to 0. +It is common to want constant retraining, in which case, the user should set `live_retrain_hours` to 0. -## Data anylsis techniques +## Data analysis techniques ### Controlling the model learning process -Model training parameters are unqiue to the library employed by the user. FreqAI allows users to set any parameter for any library using the `model_training_parameters` dictionary in the user configuration file. The example configuration files show some of the example parameters associated with `Catboost` and `LightGBM`, but users can add any parameters available in those libraries. +Model training parameters are unique to the ML library used by the user. FreqAI allows users to set any parameter for any library using the `model_training_parameters` dictionary in the user configuration file. The example configuration files show some of the example parameters associated with `Catboost` and `LightGBM`, but users can add any parameters available in those libraries. Data split parameters are defined in `data_split_parameters` which can be any parameters associated with `Sklearn`'s `train_test_split()` function. Meanwhile, FreqAI includes some additional parameters such `weight_factor` which allows the user to weight more recent data more strongly than past data via an exponential function: $$ W_i = \exp(\frac{-i}{\alpha*n}) $$ -where $W_i$ is the weight of data point $i$ in a total set of $n$ data points._ +where $W_i$ is the weight of data point $i$ in a total set of $n$ data points. ![weight-factor](assets/weights_factor.png) -`train_test_split()` has a parameters called `shuffle`, which users also have access to in FreqAI, that allows them to keep the data unshuffled. This is particularly useful to avoid biasing training with temporally autocorrelated data. - +`train_test_split()` has a parameters called `shuffle`, which users also have access to in FreqAI, that allows them to keep the data unshuffled. This is particularly useful to avoid biasing training with temporally auto-correlated data. + Finally, `label_period_candles` defines the offset used for the `labels`. In the present example, the user is asking for `labels` that are 24 candles in the future. @@ -489,7 +494,7 @@ to low levels of certainty. Activating the Dissimilarity Index can be achieved w ```json "freqai": { "feature_parameters" : { - "DI_threshold": 1 + "DI_threshold": 1 } } ``` @@ -504,7 +509,7 @@ Users can reduce the dimensionality of their features by activating the `princip ```json "freqai": { "feature_parameters" : { - "principal_component_analysis": true + "principal_component_analysis": true } } ``` @@ -525,8 +530,7 @@ The user can tell Freqai to remove outlier data points from the training/test da ``` Freqai will train an SVM on the training data (or components if the user activated -`principal_component_analysis`) and remove any data point that it deems to be sit beyond the -feature space. +`principal_component_analysis`) and remove any data point that it deems to be sitting beyond the feature space. ### Stratifying the data @@ -541,10 +545,10 @@ The user can stratify the training/testing data using: ``` which will split the data chronologically so that every Xth data points is a testing data point. In the -present example, the user is asking for every third data point in the dataframe to be used for -testing, the other points are used for training. +present example, the user is asking for every third data point in the dataframe to be used for +testing, the other points are used for training. -### Setting up a follower +## Setting up a follower The user can define: @@ -555,15 +559,15 @@ The user can define: } ``` -to indicate to the bot that it should not train models, but instead should look for models trained -by a leader with the same `identifier`. In this example, the user has a leader bot with the -`identifier: "example"` already running or launching simultaneously as the present follower. +to indicate to the bot that it should not train models, but instead should look for models trained +by a leader with the same `identifier`. In this example, the user has a leader bot with the +`identifier: "example"` already running or launching simultaneously as the present follower. The follower will load models created by the leader and inference them to obtain predictions. -### Purging old model data +## Purging old model data -FreqAI stores new model files each time it retrains. These files become obsolete as new models -are trained and FreqAI adapts to the new market conditions. Users planning to leave FreqAI running +FreqAI stores new model files each time it retrains. These files become obsolete as new models +are trained and FreqAI adapts to the new market conditions. Users planning to leave FreqAI running for extended periods of time with high frequency retraining should set `purge_old_models` in their config: @@ -591,17 +595,17 @@ a certain number of hours in age by setting the `expiration_hours` in the config ``` In the present example, the user will only allow predictions on models that are less than 1/2 hours -old. +old. ## Choosing the calculation of the `target_roi` As shown in `templates/FreqaiExampleStrategy.py`, the `target_roi` is based on two metrics computed -by FreqAI: `label_mean` and `label_std`. These are the statistics associated with the labels used -*during the most recent training*. This allows the model to know what magnitude of a target to be -expecting since it is directly stemming from the training data. By default, FreqAI computes this based -on trainig data and it assumes the labels are Gaussian distributed. These are big assumptions -that the user should consider when creating their labels. If the user wants to consider the population -of *historical predictions* for creating the dynamic target instead of the trained labels, the user +by FreqAI: `label_mean` and `label_std`. These are the statistics associated with the labels used +*during the most recent training*. +This allows the model to know what magnitude of a target to be expecting since it is directly stemming from the training data. +By default, FreqAI computes this based on training data and it assumes the labels are Gaussian distributed. +These are big assumptions that the user should consider when creating their labels. If the user wants to consider the population +of *historical predictions* for creating the dynamic target instead of the trained labels, the user can do so by setting `fit_live_prediction_candles` to the number of historical prediction candles the user wishes to use to generate target statistics. @@ -620,7 +624,7 @@ this historical data to be reloaded if the user stops and restarts with the same The labels used for model training have a unique statistical distribution for each separate model training. We can use this information to know if our current prediction is in the realm of what the model was trained on, and if so, what is the statistical probability of the current prediction. With this information, we can -make more informed prediction._ +make more informed prediction. FreqAI builds this label distribution and provides a quantile to the strategy, which can be optionally used as a dynamic threshold. The `target_quantile: X` means that X% of the labels are below this value. So setting: @@ -663,25 +667,23 @@ below this value. An example usage in the strategy may look something like: ``` --> - - ## Additional information ### Common pitfalls FreqAI cannot be combined with `VolumePairlists` (or any pairlist filter that adds and removes pairs dynamically). -This is for performance reasons - FreqAI relies on making quick predictions/retrains. To do this effectively, +This is for performance reasons - FreqAI relies on making quick predictions/retrains. To do this effectively, it needs to download all the training data at the beginning of a dry/live instance. FreqAI stores and appends -new candles automatically for future retrains. But this means that if new pairs arrive later in the dry run due +new candles automatically for future retrains. But this means that if new pairs arrive later in the dry run due to a volume pairlist, it will not have the data ready. FreqAI does work, however, with the `ShufflePairlist`. ### Feature normalization -The feature set created by the user is automatically normalized to the training -data only. This includes all test data and unseen prediction data (dry/live/backtest). +The feature set created by the user is automatically normalized to the training data only. +This includes all test data and unseen prediction data (dry/live/backtest). ### File structure -`user_data_dir/models/` contains all the data associated with the trainings and -backtests. This file structure is heavily controlled and read by the `FreqaiDataKitchen()` -and should thus not be modified. +`user_data_dir/models/` contains all the data associated with the trainings and backtests. +This file structure is heavily controlled and read by the `FreqaiDataKitchen()` +and should therefore not be modified. From 3acc86957016f9727c52d9a5ac5460204c43840b Mon Sep 17 00:00:00 2001 From: robcaulk Date: Sat, 23 Jul 2022 12:42:24 +0200 Subject: [PATCH 04/16] improve the dataframe key description, update outdated parts of doc --- docs/freqai.md | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/docs/freqai.md b/docs/freqai.md index 1e597696a..04f442778 100644 --- a/docs/freqai.md +++ b/docs/freqai.md @@ -111,16 +111,17 @@ Mandatory parameters are marked as **Required**, which means that they are requi | `learning_rate` | A common parameter among regressors which sets the boosting learning rate.
**Datatype:** float. | `n_jobs`, `thread_count`, `task_type` | Different libraries use different parameter names to control the number of threads used for parallel processing or whether or not it is a `task_type` of `gpu` or `cpu`.
**Datatype:** float. -### Return values for use in strategy -Here are the values you can expect to receive inside the dataframe returned by FreqAI: +### Important FreqAI dataframe key patterns +Here are the values the user can expect to include/use inside the typical strategy dataframe (`df[]`): -| Parameter | Description | +| DataFrame Key | Description | |------------|-------------| -| `&-s*` | user defined labels in the user made strategy. Anything prepended with `&` is treated as a training target inside FreqAI. These same dataframe columns names are fed back to the user as the predictions. For example, the user wishes to predict the price change in the next 40 candles (similar to `templates/FreqaiExampleStrategy.py`) by setting `&-s_close`. FreqAI makes the predictions and gives them back to the user under the same key (`&-s_close`) to be used in `populate_entry/exit_trend()`.
**Datatype:** depends on the output of the model. -| `&-s*_std/mean` | The standard deviation and mean values of the user defined labels during training (or live tracking with `fit_live_predictions_candles`). Commonly used to understand rarity of prediction (use the z-score as shown in `templates/FreqaiExampleStrategy.py` to evaluate how often a particular prediction was observed during training (or historically with `fit_live_predictions_candles`)
**Datatype:** float. -| `do_predict` | An indication of an outlier, this return value is integer between -1 and 2 which lets the user understand if the prediction is trustworthy or not. `do_predict==1` means the prediction is trustworthy. If the [Dissimilarity Index](#removing-outliers-with-the-dissimilarity-index) is above the user defined threshold, it will subtract 1 from `do_predict`. If `use_SVM_to_remove_outliers()` is active, then the Support Vector Machine (SVM) may also detect outliers in training and prediction data. In this case, the SVM will also subtract one from `do_predict`. A particular case is when `do_predict == 2`, it means that the model has expired due to `expired_hours`.
**Datatype:** integer between -1 and 2. -| `DI_values` | The raw Dissimilarity Index values to give the user a sense of confidence in the prediction. Lower DI means the data point is closer to the trained parameter space.
**Datatype:** float. +| `df['&*']` | Any dataframe column prepended with `&` in `populate_any_indicators()` is treated as a training target inside FreqAI (typically following the naming convention `&-s*`). These same dataframe columns names are fed back to the user as the predictions. For example, the user wishes to predict the price change in the next 40 candles (similar to `templates/FreqaiExampleStrategy.py`) by setting `df['&-s_close']`. FreqAI makes the predictions and gives them back to the user under the same key (`df['&-s_close']`) to be used in `populate_entry/exit_trend()`.
**Datatype:** depends on the output of the model. +| `df['&*_std/mean']` | The standard deviation and mean values of the user defined labels during training (or live tracking with `fit_live_predictions_candles`). Commonly used to understand rarity of prediction (use the z-score as shown in `templates/FreqaiExampleStrategy.py` to evaluate how often a particular prediction was observed during training (or historically with `fit_live_predictions_candles`)
**Datatype:** float. +| `df['do_predict']` | An indication of an outlier, this return value is integer between -1 and 2 which lets the user understand if the prediction is trustworthy or not. `do_predict==1` means the prediction is trustworthy. If the [Dissimilarity Index](#removing-outliers-with-the-dissimilarity-index) is above the user defined threshold, it will subtract 1 from `do_predict`. If `use_SVM_to_remove_outliers()` is active, then the Support Vector Machine (SVM) may also detect outliers in training and prediction data. In this case, the SVM will also subtract one from `do_predict`. A particular case is when `do_predict == 2`, it means that the model has expired due to `expired_hours`.
**Datatype:** integer between -1 and 2. +| `df['DI_values']` | The raw Dissimilarity Index values to give the user a sense of confidence in the prediction. Lower DI means the data point is closer to the trained parameter space.
**Datatype:** float. +| `df['%*']` | Any dataframe column prepended with `%` in `populate_any_indicators()` is treated as a training feature inside FreqAI. For example, the user can include the rsi in the training feature set (similar to `templates/FreqaiExampleStrategy.py`) by setting `df['%-rsi']`. See more details on how this is done [here](#building-the-feature-set). Note: since the number of features prepended with `%` can multiply very quickly (10s of thousands of features is easily engineered using the multiplictative functionality described in the `feature_parameters` table.) these features are removed from the dataframe upon return from FreqAI. If the user wishes to keep a particular type of feature for plotting purposes, you can prepend it with `%%`.
**Datatype:** depends on the output of the model. ### Example config file @@ -338,6 +339,9 @@ and adding this to the `train_period_days`. The units need to be in the base can !!! Note In dry/live, this is all precomputed and handled automatically. Thus, `startup_candle` has no influence on dry/live. +!!! Note + Although fractional `backtest_period_days` is allowed, the user should be ware that the `--timerange` is divided by this value to determine the number of models that FreqAI will need to train in order to backtest the full range. For example, if the user wants to set a `--timerange` of 10 days, and asks for a `backtest_period_days` of 0.1, FreqAI will need to train 100 models per pair to complete the full backtest. This is why it is physically impossible to truly backtest FreqAI adaptive training. The best way to fully test a model is to run it dry and let it constantly train. In this case, backtesting would take the exact same amount of time as a dry run. + ## Running Freqai ### Training and backtesting @@ -419,12 +423,7 @@ freqtrade trade --strategy FreqaiExampleStrategy --config config_freqai.example. ``` By default, Freqai will not find find any existing models and will start by training a new one -given the user configuration settings. Following training, it will use that model to predict for the -duration of `backtest_period_days`. After a full `backtest_period_days` has elapsed, Freqai will auto retrain -a new model, and begin making predictions with the updated model. FreqAI backtesting and live both -permit the user to use fractional days (i.e. 0.1) in the `backtest_period_days`, which enables more frequent -retraining. But the user should be careful that using a fractional `backtest_period_days` with a large -`--timerange` in backtesting will result in a huge amount of required trainings/models. +given the user configuration settings. Following training, it will use that model to make predictions on incoming candles until a new model is available. New models are typically generated as often as possible, with FreqAI managing an internal queue of the pairs to try and keep all models equally "young." FreqAI will always use the newest trained model to make predictions on incoming live data. If users do not want FreqAI to retrain new models as often as possible, they can set `live_retrain_hours` to tell FreqAI to wait at least that number of hours before retraining a new model. Additionally, users can set `expired_hours` to tell FreqAI to avoid making predictions on models aged over this number of hours. If the user wishes to start dry/live from a backtested saved model, the user only needs to reuse the same `identifier` parameter @@ -439,7 +438,6 @@ the same `identifier` parameter In this case, although Freqai will initiate with a pre-trained model, it will still check to see how much time has elapsed since the model was trained, and if a full `live_retrain_hours` has elapsed since the end of the loaded model, FreqAI will self retrain. -It is common to want constant retraining, in which case, the user should set `live_retrain_hours` to 0. ## Data analysis techniques From c2d6a0e891744791a9ad27db216bd141fd8e45d3 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Sat, 23 Jul 2022 13:04:06 +0200 Subject: [PATCH 05/16] add record of contribution to doc and source --- docs/freqai.md | 17 +++++++++++++++++ freqtrade/freqai/data_drawer.py | 20 +++++++++++++++++--- freqtrade/freqai/freqai_interface.py | 18 +++++++++++++++++- 3 files changed, 51 insertions(+), 4 deletions(-) diff --git a/docs/freqai.md b/docs/freqai.md index 04f442778..5072d3721 100644 --- a/docs/freqai.md +++ b/docs/freqai.md @@ -685,3 +685,20 @@ This includes all test data and unseen prediction data (dry/live/backtest). `user_data_dir/models/` contains all the data associated with the trainings and backtests. This file structure is heavily controlled and read by the `FreqaiDataKitchen()` and should therefore not be modified. + +## Credits +FreqAI was developed by a group of individuals who all contributed specific skillsets to the +project. + +Conception and software development: +Robert Caulk @robcaulk + +Theoretical brainstorming: +Elin Törnquist @thorntwig + +Code review, software architecture brainstorming: +@xmatthias + +Beta testing and bug reporting: +@bloodhunter4rc, Salah Lamkadem @ikonx, @ken11o2, @longyu, @paranoidandy, @smidelis, @smarm +Juha Nykänen @suikula, Wagner Costa @wagnercosta diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py index b0493e766..baaa55c7e 100644 --- a/freqtrade/freqai/data_drawer.py +++ b/freqtrade/freqai/data_drawer.py @@ -13,9 +13,6 @@ from joblib.externals import cloudpickle from pandas import DataFrame -# from freqtrade.freqai.data_kitchen import FreqaiDataKitchen - - logger = logging.getLogger(__name__) @@ -25,6 +22,23 @@ class FreqaiDataDrawer: /loading to/from disk. This object remains persistent throughout live/dry, unlike FreqaiDataKitchen, which is reinstantiated for each coin. + + Record of contribution: + FreqAI was developed by a group of individuals who all contributed specific skillsets to the + project. + + Conception and software development: + Robert Caulk @robcaulk + + Theoretical brainstorming: + Elin Törnquist @thorntwig + + Code review, software architecture brainstorming: + @xmatthias + + Beta testing and bug reporting: + @bloodhunter4rc, Salah Lamkadem @ikonx, @ken11o2, @longyu, @paranoidandy, @smidelis, @smarm + Juha Nykänen @suikula, Wagner Costa @wagnercosta """ def __init__(self, full_path: Path, config: dict, follow_mode: bool = False): diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 55f5a03a0..5fa15ebf8 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -38,7 +38,23 @@ class IFreqaiModel(ABC): """ Class containing all tools for training and prediction in the strategy. Base*PredictionModels inherit from this class. - Author: Robert Caulk, rob.caulk@gmail.com + + Record of contribution: + FreqAI was developed by a group of individuals who all contributed specific skillsets to the + project. + + Conception and software development: + Robert Caulk @robcaulk + + Theoretical brainstorming: + Elin Törnquist @thorntwig + + Code review, software architecture brainstorming: + @xmatthias + + Beta testing and bug reporting: + @bloodhunter4rc, Salah Lamkadem @ikonx, @ken11o2, @longyu, @paranoidandy, @smidelis, @smarm + Juha Nykänen @suikula, Wagner Costa @wagnercosta """ def __init__(self, config: Dict[str, Any]) -> None: From a1cff377ec7b32046fbe254a29e5ef115c01e4a8 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Sat, 23 Jul 2022 13:32:04 +0200 Subject: [PATCH 06/16] add record of contribution to data_kitchen.py --- freqtrade/freqai/data_kitchen.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 4ba6badf9..9662f4f3a 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -35,7 +35,23 @@ class FreqaiDataKitchen: """ Class designed to analyze data for a single pair. Employed by the IFreqaiModel class. Functionalities include holding, saving, loading, and analyzing the data. - author: Robert Caulk, rob.caulk@gmail.com + + Record of contribution: + FreqAI was developed by a group of individuals who all contributed specific skillsets to the + project. + + Conception and software development: + Robert Caulk @robcaulk + + Theoretical brainstorming: + Elin Törnquist @thorntwig + + Code review, software architecture brainstorming: + @xmatthias + + Beta testing and bug reporting: + @bloodhunter4rc, Salah Lamkadem @ikonx, @ken11o2, @longyu, @paranoidandy, @smidelis, @smarm + Juha Nykänen @suikula, Wagner Costa @wagnercosta """ def __init__( From 50d630a1555a26251e162aab50102d491d5946d3 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Sat, 23 Jul 2022 13:35:44 +0200 Subject: [PATCH 07/16] remove unnecessary comments from data_drawer.py --- freqtrade/freqai/data_drawer.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py index baaa55c7e..f9736a498 100644 --- a/freqtrade/freqai/data_drawer.py +++ b/freqtrade/freqai/data_drawer.py @@ -76,7 +76,7 @@ class FreqaiDataDrawer: :returns: exists: bool = whether or not the drawer was located """ - exists = self.pair_dictionary_path.is_file() # resolve().exists() + exists = self.pair_dictionary_path.is_file() if exists: with open(self.pair_dictionary_path, "r") as fp: self.pair_dict = json.load(fp) @@ -96,7 +96,7 @@ class FreqaiDataDrawer: :returns: exists: bool = whether or not the drawer was located """ - exists = self.historic_predictions_path.is_file() # resolve().exists() + exists = self.historic_predictions_path.is_file() if exists: with open(self.historic_predictions_path, "rb") as fp: self.historic_predictions = cloudpickle.load(fp) @@ -143,11 +143,7 @@ class FreqaiDataDrawer: whitelist_pairs = self.config.get("exchange", {}).get("pair_whitelist") - exists = ( - self.follower_dict_path.is_file() - # .resolve() - # .exists() - ) + exists = self.follower_dict_path.is_file() if exists: logger.info("Found an existing follower dictionary") From c91e23dc504e97e9d55b0e2f0636099cce378be2 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Sat, 23 Jul 2022 16:14:13 +0200 Subject: [PATCH 08/16] let user avoid normalizing labels --- freqtrade/freqai/data_kitchen.py | 37 ++++++++++++++++---------------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 9662f4f3a..1c54a6375 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -356,7 +356,7 @@ class FreqaiDataKitchen: return self.data_dictionary - def normalize_data(self, data_dictionary: Dict) -> Dict[Any, Any]: + def normalize_data(self, data_dictionary: Dict, do_labels: bool = True) -> Dict[Any, Any]: """ Normalize all data in the data_dictionary according to the training dataset :params: @@ -374,27 +374,28 @@ class FreqaiDataKitchen: 2 * (data_dictionary["test_features"] - train_min) / (train_max - train_min) - 1 ) - train_labels_max = data_dictionary["train_labels"].max() - train_labels_min = data_dictionary["train_labels"].min() - data_dictionary["train_labels"] = ( - 2 - * (data_dictionary["train_labels"] - train_labels_min) - / (train_labels_max - train_labels_min) - - 1 - ) - data_dictionary["test_labels"] = ( - 2 - * (data_dictionary["test_labels"] - train_labels_min) - / (train_labels_max - train_labels_min) - - 1 - ) - for item in train_max.keys(): self.data[item + "_max"] = train_max[item] self.data[item + "_min"] = train_min[item] - self.data["labels_max"] = train_labels_max.to_dict() - self.data["labels_min"] = train_labels_min.to_dict() + if do_labels: + train_labels_max = data_dictionary["train_labels"].max() + train_labels_min = data_dictionary["train_labels"].min() + data_dictionary["train_labels"] = ( + 2 + * (data_dictionary["train_labels"] - train_labels_min) + / (train_labels_max - train_labels_min) + - 1 + ) + data_dictionary["test_labels"] = ( + 2 + * (data_dictionary["test_labels"] - train_labels_min) + / (train_labels_max - train_labels_min) + - 1 + ) + + self.data["labels_max"] = train_labels_max.to_dict() + self.data["labels_min"] = train_labels_min.to_dict() return data_dictionary From 8fa6e8b4ba9edc8db557bba04adf9b0c6aa87741 Mon Sep 17 00:00:00 2001 From: Matthias Date: Sat, 23 Jul 2022 15:58:31 +0200 Subject: [PATCH 09/16] Remove freqAI model bridge in favor of `self.freqai` --- freqtrade/freqai/strategy_bridge.py | 12 -------- freqtrade/strategy/interface.py | 9 ++++++ freqtrade/templates/FreqaiExampleStrategy.py | 30 +++++++++----------- tests/strategy/strats/freqai_test_strat.py | 10 ++----- 4 files changed, 25 insertions(+), 36 deletions(-) delete mode 100644 freqtrade/freqai/strategy_bridge.py diff --git a/freqtrade/freqai/strategy_bridge.py b/freqtrade/freqai/strategy_bridge.py deleted file mode 100644 index bb43084a0..000000000 --- a/freqtrade/freqai/strategy_bridge.py +++ /dev/null @@ -1,12 +0,0 @@ -from freqtrade.resolvers.freqaimodel_resolver import FreqaiModelResolver - - -class CustomModel: - """ - A bridge between the user defined IFreqaiModel class - and the strategy. - """ - - def __init__(self, config): - - self.bridge = FreqaiModelResolver.load_freqaimodel(config) diff --git a/freqtrade/strategy/interface.py b/freqtrade/strategy/interface.py index df73981bd..83d16b6f6 100644 --- a/freqtrade/strategy/interface.py +++ b/freqtrade/strategy/interface.py @@ -145,11 +145,20 @@ class IStrategy(ABC, HyperStrategyMixin): informative_data.candle_type = config['candle_type_def'] self._ft_informative.append((informative_data, cls_method)) + def load_freqAI_model(self) -> None: + if self.config.get('freqai', None): + # Import here to avoid importing this if freqAI is disabled + from freqtrade.resolvers.freqaimodel_resolver import FreqaiModelResolver + + self.freqai = FreqaiModelResolver.load_freqaimodel(self.config) + def ft_bot_start(self, **kwargs) -> None: """ Strategy init - runs after dataprovider has been added. Must call bot_start() """ + self.load_freqAI_model() + strategy_safe_wrapper(self.bot_start)() self.ft_load_hyper_params(self.config.get('runmode') == RunMode.HYPEROPT) diff --git a/freqtrade/templates/FreqaiExampleStrategy.py b/freqtrade/templates/FreqaiExampleStrategy.py index 402aa9d1c..86f141567 100644 --- a/freqtrade/templates/FreqaiExampleStrategy.py +++ b/freqtrade/templates/FreqaiExampleStrategy.py @@ -7,7 +7,6 @@ from pandas import DataFrame from technical import qtpylib from freqtrade.exchange import timeframe_to_prev_date -from freqtrade.freqai.strategy_bridge import CustomModel from freqtrade.persistence import Trade from freqtrade.strategy import DecimalParameter, IntParameter, merge_informative_pair from freqtrade.strategy.interface import IStrategy @@ -21,7 +20,7 @@ class FreqaiExampleStrategy(IStrategy): Example strategy showing how the user connects their own IFreqaiModel to the strategy. Namely, the user uses: self.model = CustomModel(self.config) - self.model.bridge.start(dataframe, metadata) + self.freqai.start(dataframe, metadata) to make predictions on their data. populate_any_indicators() automatically generates the variety of features indicated by the user in the @@ -67,9 +66,6 @@ class FreqaiExampleStrategy(IStrategy): informative_pairs.append((pair, tf)) return informative_pairs - def bot_start(self): - self.model = CustomModel(self.config) - def populate_any_indicators( self, metadata, pair, df, tf, informative=None, coin="", set_generalized_indicators=False ): @@ -88,7 +84,7 @@ class FreqaiExampleStrategy(IStrategy): :coin: the name of the coin which will modify the feature names. """ - with self.model.bridge.lock: + with self.freqai.lock: if informative is None: informative = self.dp.get_pair_dataframe(pair, tf) @@ -180,7 +176,7 @@ class FreqaiExampleStrategy(IStrategy): # the target mean/std values for each of the labels created by user in # `populate_any_indicators()` for each training period. - dataframe = self.model.bridge.start(dataframe, metadata, self) + dataframe = self.freqai.start(dataframe, metadata, self) dataframe["target_roi"] = dataframe["&-s_close_mean"] + dataframe["&-s_close_std"] * 1.25 dataframe["sell_roi"] = dataframe["&-s_close_mean"] - dataframe["&-s_close_std"] * 1.25 @@ -234,9 +230,9 @@ class FreqaiExampleStrategy(IStrategy): follow_mode = self.config.get("freqai", {}).get("follow_mode", False) if not follow_mode: - pair_dict = self.model.bridge.dd.pair_dict + pair_dict = self.freqai.dd.pair_dict else: - pair_dict = self.model.bridge.dd.follower_dict + pair_dict = self.freqai.dd.follower_dict entry_tag = trade.enter_tag @@ -244,12 +240,12 @@ class FreqaiExampleStrategy(IStrategy): "prediction" + entry_tag not in pair_dict[pair] or pair_dict[pair]["prediction" + entry_tag] > 0 ): - with self.model.bridge.lock: + with self.freqai.lock: pair_dict[pair]["prediction" + entry_tag] = abs(trade_candle["&-s_close"]) if not follow_mode: - self.model.bridge.dd.save_drawer_to_disk() + self.freqai.dd.save_drawer_to_disk() else: - self.model.bridge.dd.save_follower_dict_to_disk() + self.freqai.dd.save_follower_dict_to_disk() roi_price = pair_dict[pair]["prediction" + entry_tag] roi_time = self.max_roi_time_long.value @@ -284,16 +280,16 @@ class FreqaiExampleStrategy(IStrategy): entry_tag = trade.enter_tag follow_mode = self.config.get("freqai", {}).get("follow_mode", False) if not follow_mode: - pair_dict = self.model.bridge.dd.pair_dict + pair_dict = self.freqai.dd.pair_dict else: - pair_dict = self.model.bridge.dd.follower_dict + pair_dict = self.freqai.dd.follower_dict - with self.model.bridge.lock: + with self.freqai.lock: pair_dict[pair]["prediction" + entry_tag] = 0 if not follow_mode: - self.model.bridge.dd.save_drawer_to_disk() + self.freqai.dd.save_drawer_to_disk() else: - self.model.bridge.dd.save_follower_dict_to_disk() + self.freqai.dd.save_follower_dict_to_disk() return True diff --git a/tests/strategy/strats/freqai_test_strat.py b/tests/strategy/strats/freqai_test_strat.py index 28e3dce54..221942bd3 100644 --- a/tests/strategy/strats/freqai_test_strat.py +++ b/tests/strategy/strats/freqai_test_strat.py @@ -5,7 +5,6 @@ import pandas as pd import talib.abstract as ta from pandas import DataFrame -from freqtrade.freqai.strategy_bridge import CustomModel from freqtrade.strategy import DecimalParameter, IntParameter, merge_informative_pair from freqtrade.strategy.interface import IStrategy @@ -18,7 +17,7 @@ class freqai_test_strat(IStrategy): Example strategy showing how the user connects their own IFreqaiModel to the strategy. Namely, the user uses: self.model = CustomModel(self.config) - self.model.bridge.start(dataframe, metadata) + self.freqai.start(dataframe, metadata) to make predictions on their data. populate_any_indicators() automatically generates the variety of features indicated by the user in the @@ -64,9 +63,6 @@ class freqai_test_strat(IStrategy): informative_pairs.append((pair, tf)) return informative_pairs - def bot_start(self): - self.model = CustomModel(self.config) - def populate_any_indicators( self, metadata, pair, df, tf, informative=None, coin="", set_generalized_indicators=False ): @@ -85,7 +81,7 @@ class freqai_test_strat(IStrategy): :coin: the name of the coin which will modify the feature names. """ - with self.model.bridge.lock: + with self.freqai.lock: if informative is None: informative = self.dp.get_pair_dataframe(pair, tf) @@ -146,7 +142,7 @@ class freqai_test_strat(IStrategy): # the model will return 4 values, its prediction, an indication of whether or not the # prediction should be accepted, the target mean/std values from the labels used during # each training period. - dataframe = self.model.bridge.start(dataframe, metadata, self) + dataframe = self.freqai.start(dataframe, metadata, self) dataframe["target_roi"] = dataframe["&-s_close_mean"] + dataframe["&-s_close_std"] * 1.25 dataframe["sell_roi"] = dataframe["&-s_close_mean"] - dataframe["&-s_close_std"] * 1.25 From 62f7606d2c025e793aa24bd041a9d50a07bd0748 Mon Sep 17 00:00:00 2001 From: Matthias Date: Sat, 23 Jul 2022 16:05:25 +0200 Subject: [PATCH 10/16] Update tests to new variant --- tests/freqai/conftest.py | 2 +- tests/freqai/test_freqai_datakitchen.py | 8 ++++---- tests/freqai/test_freqai_interface.py | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/freqai/conftest.py b/tests/freqai/conftest.py index 549ba2663..1d98fd863 100644 --- a/tests/freqai/conftest.py +++ b/tests/freqai/conftest.py @@ -61,7 +61,7 @@ def get_patched_data_kitchen(mocker, freqaiconf): def get_patched_freqai_strategy(mocker, freqaiconf): strategy = StrategyResolver.load_strategy(freqaiconf) - strategy.bot_start() + strategy.ft_bot_start() return strategy diff --git a/tests/freqai/test_freqai_datakitchen.py b/tests/freqai/test_freqai_datakitchen.py index 1964d1423..d0d82b489 100644 --- a/tests/freqai/test_freqai_datakitchen.py +++ b/tests/freqai/test_freqai_datakitchen.py @@ -69,7 +69,7 @@ def test_update_historic_data(mocker, default_conf): strategy = get_patched_freqai_strategy(mocker, freqaiconf) exchange = get_patched_exchange(mocker, freqaiconf) strategy.dp = DataProvider(freqaiconf, exchange) - freqai = strategy.model.bridge + freqai = strategy.freqai freqai.live = True freqai.dk = FreqaiDataKitchen(freqaiconf, freqai.dd) timerange = TimeRange.parse_timerange("20180110-20180114") @@ -105,7 +105,7 @@ def test_load_all_pairs_histories(mocker, default_conf): strategy = get_patched_freqai_strategy(mocker, freqaiconf) exchange = get_patched_exchange(mocker, freqaiconf) strategy.dp = DataProvider(freqaiconf, exchange) - freqai = strategy.model.bridge + freqai = strategy.freqai freqai.live = True freqai.dk = FreqaiDataKitchen(freqaiconf, freqai.dd) timerange = TimeRange.parse_timerange("20180110-20180114") @@ -125,7 +125,7 @@ def test_get_base_and_corr_dataframes(mocker, default_conf): strategy = get_patched_freqai_strategy(mocker, freqaiconf) exchange = get_patched_exchange(mocker, freqaiconf) strategy.dp = DataProvider(freqaiconf, exchange) - freqai = strategy.model.bridge + freqai = strategy.freqai freqai.live = True freqai.dk = FreqaiDataKitchen(freqaiconf, freqai.dd) timerange = TimeRange.parse_timerange("20180110-20180114") @@ -153,7 +153,7 @@ def test_use_strategy_to_populate_indicators(mocker, default_conf): exchange = get_patched_exchange(mocker, freqaiconf) strategy.dp = DataProvider(freqaiconf, exchange) strategy.freqai_info = freqaiconf.get("freqai", {}) - freqai = strategy.model.bridge + freqai = strategy.freqai freqai.live = True freqai.dk = FreqaiDataKitchen(freqaiconf, freqai.dd) timerange = TimeRange.parse_timerange("20180110-20180114") diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py index 9219baee3..e812dd47e 100644 --- a/tests/freqai/test_freqai_interface.py +++ b/tests/freqai/test_freqai_interface.py @@ -21,7 +21,7 @@ def test_train_model_in_series_LightGBM(mocker, default_conf): exchange = get_patched_exchange(mocker, freqaiconf) strategy.dp = DataProvider(freqaiconf, exchange) strategy.freqai_info = freqaiconf.get("freqai", {}) - freqai = strategy.model.bridge + freqai = strategy.freqai freqai.live = True freqai.dk = FreqaiDataKitchen(freqaiconf, freqai.dd) timerange = TimeRange.parse_timerange("20180110-20180130") @@ -113,7 +113,7 @@ def test_start_backtesting(mocker, default_conf): exchange = get_patched_exchange(mocker, freqaiconf) strategy.dp = DataProvider(freqaiconf, exchange) strategy.freqai_info = freqaiconf.get("freqai", {}) - freqai = strategy.model.bridge + freqai = strategy.freqai freqai.live = False freqai.dk = FreqaiDataKitchen(freqaiconf, freqai.dd) timerange = TimeRange.parse_timerange("20180110-20180130") @@ -139,7 +139,7 @@ def test_start_backtesting_from_existing_folder(mocker, default_conf, caplog): exchange = get_patched_exchange(mocker, freqaiconf) strategy.dp = DataProvider(freqaiconf, exchange) strategy.freqai_info = freqaiconf.get("freqai", {}) - freqai = strategy.model.bridge + freqai = strategy.freqai freqai.live = False freqai.dk = FreqaiDataKitchen(freqaiconf, freqai.dd) timerange = TimeRange.parse_timerange("20180110-20180130") @@ -162,7 +162,7 @@ def test_start_backtesting_from_existing_folder(mocker, default_conf, caplog): exchange = get_patched_exchange(mocker, freqaiconf) strategy.dp = DataProvider(freqaiconf, exchange) strategy.freqai_info = freqaiconf.get("freqai", {}) - freqai = strategy.model.bridge + freqai = strategy.freqai freqai.live = False freqai.dk = FreqaiDataKitchen(freqaiconf, freqai.dd) timerange = TimeRange.parse_timerange("20180110-20180130") From 8a3cffcd1b80882d296c877d6b05198d3efacac5 Mon Sep 17 00:00:00 2001 From: Matthias Date: Sat, 23 Jul 2022 16:06:46 +0200 Subject: [PATCH 11/16] Remove remaining CustomModel references --- docs/freqai.md | 12 ++++-------- freqtrade/templates/FreqaiExampleStrategy.py | 1 - tests/freqai/conftest.py | 6 +++--- tests/strategy/strats/freqai_test_strat.py | 1 - 4 files changed, 7 insertions(+), 13 deletions(-) diff --git a/docs/freqai.md b/docs/freqai.md index 5072d3721..48b8968a3 100644 --- a/docs/freqai.md +++ b/docs/freqai.md @@ -166,7 +166,7 @@ config setup includes: Features are added by the user inside the `populate_any_indicators()` method of the strategy by prepending indicators with `%` and labels are added by prepending `&`. There are some important components/structures that the user *must* include when building their feature set. -As shown below, `with self.model.bridge.lock:` must be used to ensure thread safety - especially when using third +As shown below, `with self.freqai.lock:` must be used to ensure thread safety - especially when using third party libraries for indicator construction such as TA-lib. Another structure to consider is the location of the labels at the bottom of the example function (below `if set_generalized_indicators:`). This is where the user will add single features and labels to their feature set to avoid duplication from @@ -191,7 +191,7 @@ various configuration parameters which multiply the feature set such as `include :coin: the name of the coin which will modify the feature names. """ - with self.model.bridge.lock: + with self.freqai.lock: if informative is None: informative = self.dp.get_pair_dataframe(pair, tf) @@ -370,7 +370,6 @@ for each pair, for each backtesting window within the bigger `--timerange`. The Freqai strategy requires the user to include the following lines of code in the strategy: ```python - from freqtrade.freqai.strategy_bridge import CustomModel def informative_pairs(self): whitelist_pairs = self.dp.current_whitelist() @@ -385,9 +384,6 @@ The Freqai strategy requires the user to include the following lines of code in informative_pairs.append((pair, tf)) return informative_pairs - def bot_start(self): - self.model = CustomModel(self.config) - def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame: self.freqai_info = self.config["freqai"] @@ -400,7 +396,7 @@ The Freqai strategy requires the user to include the following lines of code in # the target mean/std values for each of the labels created by user in # `populate_any_indicators()` for each training period. - dataframe = self.model.bridge.start(dataframe, metadata, self) + dataframe = self.freqai.start(dataframe, metadata, self) return dataframe ``` @@ -648,7 +644,7 @@ below this value. An example usage in the strategy may look something like: dataframe["do_predict"], dataframe["target_upper_quantile"], dataframe["target_lower_quantile"], - ) = self.model.bridge.start(dataframe, metadata, self) + ) = self.freqai.start(dataframe, metadata, self) return dataframe diff --git a/freqtrade/templates/FreqaiExampleStrategy.py b/freqtrade/templates/FreqaiExampleStrategy.py index 86f141567..b3c2cc8d7 100644 --- a/freqtrade/templates/FreqaiExampleStrategy.py +++ b/freqtrade/templates/FreqaiExampleStrategy.py @@ -19,7 +19,6 @@ class FreqaiExampleStrategy(IStrategy): """ Example strategy showing how the user connects their own IFreqaiModel to the strategy. Namely, the user uses: - self.model = CustomModel(self.config) self.freqai.start(dataframe, metadata) to make predictions on their data. populate_any_indicators() automatically diff --git a/tests/freqai/conftest.py b/tests/freqai/conftest.py index 1d98fd863..1d0ea0e54 100644 --- a/tests/freqai/conftest.py +++ b/tests/freqai/conftest.py @@ -76,7 +76,7 @@ def get_freqai_live_analyzed_dataframe(mocker, freqaiconf): strategy = get_patched_freqai_strategy(mocker, freqaiconf) exchange = get_patched_exchange(mocker, freqaiconf) strategy.dp = DataProvider(freqaiconf, exchange) - freqai = strategy.model.bridge + freqai = strategy.freqai freqai.live = True freqai.dk = FreqaiDataKitchen(freqaiconf, freqai.dd) timerange = TimeRange.parse_timerange("20180110-20180114") @@ -91,7 +91,7 @@ def get_freqai_analyzed_dataframe(mocker, freqaiconf): exchange = get_patched_exchange(mocker, freqaiconf) strategy.dp = DataProvider(freqaiconf, exchange) strategy.freqai_info = freqaiconf.get("freqai", {}) - freqai = strategy.model.bridge + freqai = strategy.freqai freqai.live = True freqai.dk = FreqaiDataKitchen(freqaiconf, freqai.dd) timerange = TimeRange.parse_timerange("20180110-20180114") @@ -107,7 +107,7 @@ def get_ready_to_train(mocker, freqaiconf): exchange = get_patched_exchange(mocker, freqaiconf) strategy.dp = DataProvider(freqaiconf, exchange) strategy.freqai_info = freqaiconf.get("freqai", {}) - freqai = strategy.model.bridge + freqai = strategy.freqai freqai.live = True freqai.dk = FreqaiDataKitchen(freqaiconf, freqai.dd) timerange = TimeRange.parse_timerange("20180110-20180114") diff --git a/tests/strategy/strats/freqai_test_strat.py b/tests/strategy/strats/freqai_test_strat.py index 221942bd3..49f00d6e2 100644 --- a/tests/strategy/strats/freqai_test_strat.py +++ b/tests/strategy/strats/freqai_test_strat.py @@ -16,7 +16,6 @@ class freqai_test_strat(IStrategy): """ Example strategy showing how the user connects their own IFreqaiModel to the strategy. Namely, the user uses: - self.model = CustomModel(self.config) self.freqai.start(dataframe, metadata) to make predictions on their data. populate_any_indicators() automatically From 81c1aa3c13cdc674cb6e34f16287a2c83481a94b Mon Sep 17 00:00:00 2001 From: Matthias Date: Sat, 23 Jul 2022 16:14:11 +0200 Subject: [PATCH 12/16] Update imports in freqAI sample strategies --- freqtrade/templates/FreqaiExampleStrategy.py | 3 +-- tests/strategy/strats/freqai_test_strat.py | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/freqtrade/templates/FreqaiExampleStrategy.py b/freqtrade/templates/FreqaiExampleStrategy.py index b3c2cc8d7..7008008a3 100644 --- a/freqtrade/templates/FreqaiExampleStrategy.py +++ b/freqtrade/templates/FreqaiExampleStrategy.py @@ -8,8 +8,7 @@ from technical import qtpylib from freqtrade.exchange import timeframe_to_prev_date from freqtrade.persistence import Trade -from freqtrade.strategy import DecimalParameter, IntParameter, merge_informative_pair -from freqtrade.strategy.interface import IStrategy +from freqtrade.strategy import DecimalParameter, IntParameter, IStrategy, merge_informative_pair logger = logging.getLogger(__name__) diff --git a/tests/strategy/strats/freqai_test_strat.py b/tests/strategy/strats/freqai_test_strat.py index 49f00d6e2..8679d4d74 100644 --- a/tests/strategy/strats/freqai_test_strat.py +++ b/tests/strategy/strats/freqai_test_strat.py @@ -5,8 +5,7 @@ import pandas as pd import talib.abstract as ta from pandas import DataFrame -from freqtrade.strategy import DecimalParameter, IntParameter, merge_informative_pair -from freqtrade.strategy.interface import IStrategy +from freqtrade.strategy import DecimalParameter, IntParameter, IStrategy, merge_informative_pair logger = logging.getLogger(__name__) From f3d46613ee501888defb30926cd1f1b3a610e93b Mon Sep 17 00:00:00 2001 From: robcaulk Date: Sat, 23 Jul 2022 17:14:11 +0200 Subject: [PATCH 13/16] move prediction denormalization into datakitchen. remove duplicate associated code. avoid normalization/denormalization for string dtypes. --- freqtrade/freqai/data_kitchen.py | 37 +++++++++++++++---- freqtrade/freqai/freqai_interface.py | 8 +--- .../prediction_models/BaseRegressionModel.py | 7 +--- 3 files changed, 32 insertions(+), 20 deletions(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 1c54a6375..3899c82df 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -356,7 +356,7 @@ class FreqaiDataKitchen: return self.data_dictionary - def normalize_data(self, data_dictionary: Dict, do_labels: bool = True) -> Dict[Any, Any]: + def normalize_data(self, data_dictionary: Dict) -> Dict[Any, Any]: """ Normalize all data in the data_dictionary according to the training dataset :params: @@ -378,24 +378,26 @@ class FreqaiDataKitchen: self.data[item + "_max"] = train_max[item] self.data[item + "_min"] = train_min[item] - if do_labels: - train_labels_max = data_dictionary["train_labels"].max() - train_labels_min = data_dictionary["train_labels"].min() + for item in data_dictionary["train_labels"].keys(): + if data_dictionary["train_labels"][item].dtype == str: + continue + train_labels_max = data_dictionary["train_labels"][item].max() + train_labels_min = data_dictionary["train_labels"][item].min() data_dictionary["train_labels"] = ( 2 - * (data_dictionary["train_labels"] - train_labels_min) + * (data_dictionary["train_labels"][item] - train_labels_min) / (train_labels_max - train_labels_min) - 1 ) - data_dictionary["test_labels"] = ( + data_dictionary["test_labels"][item] = ( 2 * (data_dictionary["test_labels"] - train_labels_min) / (train_labels_max - train_labels_min) - 1 ) - self.data["labels_max"] = train_labels_max.to_dict() - self.data["labels_min"] = train_labels_min.to_dict() + self.data[f"{item}_max"] = train_labels_max # .to_dict() + self.data[f"{item}_min"] = train_labels_min # .to_dict() return data_dictionary @@ -417,6 +419,25 @@ class FreqaiDataKitchen: return df + def denormalize_labels_from_metadata(self, df: DataFrame) -> DataFrame: + """ + Normalize a set of data using the mean and standard deviation from + the associated training data. + :params: + :df: Dataframe of predictions to be denormalized + """ + + for label in self.label_list: + if df[label].dtype == str: + continue + df[label] = ( + (df[label] + 1) + * (self.data[f"{label}_max"] - self.data[f"{label}_min"]) + / 2 + ) + self.data[f"{label}_min"] + + return df + def split_timerange( self, tr: str, train_split: int = 28, bt_split: int = 7 ) -> Tuple[list, list]: diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 5fa15ebf8..ac8cf6e60 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -501,12 +501,8 @@ class IFreqaiModel(ABC): ) -> None: trained_predictions = model.predict(df) pred_df = DataFrame(trained_predictions, columns=dk.label_list) - for label in dk.label_list: - pred_df[label] = ( - (pred_df[label] + 1) - * (dk.data["labels_max"][label] - dk.data["labels_min"][label]) - / 2 - ) + dk.data["labels_min"][label] + + pred_df = dk.denormalize_labels_from_metadata(pred_df) self.dd.historic_predictions[pair] = pd.DataFrame() self.dd.historic_predictions[pair] = copy.deepcopy(pred_df) diff --git a/freqtrade/freqai/prediction_models/BaseRegressionModel.py b/freqtrade/freqai/prediction_models/BaseRegressionModel.py index ffe30ef2a..2654b3726 100644 --- a/freqtrade/freqai/prediction_models/BaseRegressionModel.py +++ b/freqtrade/freqai/prediction_models/BaseRegressionModel.py @@ -107,11 +107,6 @@ class BaseRegressionModel(IFreqaiModel): predictions = self.model.predict(dk.data_dictionary["prediction_features"]) pred_df = DataFrame(predictions, columns=dk.label_list) - for label in dk.label_list: - pred_df[label] = ( - (pred_df[label] + 1) - * (dk.data["labels_max"][label] - dk.data["labels_min"][label]) - / 2 - ) + dk.data["labels_min"][label] + pred_df = dk.denormalize_labels_from_metadata(pred_df) return (pred_df, dk.do_predict) From 95f5218ceb49540f2f3110c907db1335d5085f52 Mon Sep 17 00:00:00 2001 From: Matthias Date: Sun, 24 Jul 2022 07:32:13 +0200 Subject: [PATCH 14/16] Reenable Catboost test (#7118) * Reenable Catboost test * Simplify freqAI tests, ensure they use a tempdir for modelstorage --- tests/freqai/conftest.py | 7 +- tests/freqai/test_freqai_datakitchen.py | 79 +++++++------- tests/freqai/test_freqai_interface.py | 130 ++++++++++-------------- 3 files changed, 96 insertions(+), 120 deletions(-) diff --git a/tests/freqai/conftest.py b/tests/freqai/conftest.py index 1d0ea0e54..ede991240 100644 --- a/tests/freqai/conftest.py +++ b/tests/freqai/conftest.py @@ -2,6 +2,8 @@ from copy import deepcopy from pathlib import Path from unittest.mock import MagicMock +import pytest + from freqtrade.configuration import TimeRange from freqtrade.data.dataprovider import DataProvider from freqtrade.freqai.data_kitchen import FreqaiDataKitchen @@ -10,13 +12,14 @@ from freqtrade.resolvers.freqaimodel_resolver import FreqaiModelResolver from tests.conftest import get_patched_exchange -# @pytest.fixture(scope="function") -def freqai_conf(default_conf): +@pytest.fixture(scope="function") +def freqai_conf(default_conf, tmpdir): freqaiconf = deepcopy(default_conf) freqaiconf.update( { "datadir": Path(default_conf["datadir"]), "strategy": "freqai_test_strat", + "user_data_dir": Path(tmpdir), "strategy-path": "freqtrade/tests/strategy/strats", "freqaimodel": "LightGBMPredictionModel", "freqaimodel_path": "freqai/prediction_models", diff --git a/tests/freqai/test_freqai_datakitchen.py b/tests/freqai/test_freqai_datakitchen.py index d0d82b489..ee1d83982 100644 --- a/tests/freqai/test_freqai_datakitchen.py +++ b/tests/freqai/test_freqai_datakitchen.py @@ -1,6 +1,3 @@ -# from unittest.mock import MagicMock -# from freqtrade.commands.optimize_commands import setup_optimize_configuration, start_edge -import copy import datetime import shutil from pathlib import Path @@ -13,7 +10,7 @@ from freqtrade.data.dataprovider import DataProvider from freqtrade.exceptions import OperationalException from freqtrade.freqai.data_kitchen import FreqaiDataKitchen from tests.conftest import get_patched_exchange -from tests.freqai.conftest import freqai_conf, get_patched_data_kitchen, get_patched_freqai_strategy +from tests.freqai.conftest import get_patched_data_kitchen, get_patched_freqai_strategy @pytest.mark.parametrize( @@ -24,15 +21,15 @@ from tests.freqai.conftest import freqai_conf, get_patched_data_kitchen, get_pat ], ) def test_create_fulltimerange( - timerange, train_period_days, expected_result, default_conf, mocker, caplog + timerange, train_period_days, expected_result, freqai_conf, mocker, caplog ): - dk = get_patched_data_kitchen(mocker, freqai_conf(copy.deepcopy(default_conf))) + dk = get_patched_data_kitchen(mocker, freqai_conf) assert dk.create_fulltimerange(timerange, train_period_days) == expected_result shutil.rmtree(Path(dk.full_path)) -def test_create_fulltimerange_incorrect_backtest_period(mocker, default_conf): - dk = get_patched_data_kitchen(mocker, freqai_conf(copy.deepcopy(default_conf))) +def test_create_fulltimerange_incorrect_backtest_period(mocker, freqai_conf): + dk = get_patched_data_kitchen(mocker, freqai_conf) with pytest.raises(OperationalException, match=r"backtest_period_days must be an integer"): dk.create_fulltimerange("20220101-20220201", 0.5) with pytest.raises(OperationalException, match=r"backtest_period_days must be positive"): @@ -49,11 +46,10 @@ def test_create_fulltimerange_incorrect_backtest_period(mocker, default_conf): ], ) def test_split_timerange( - mocker, default_conf, timerange, train_period_days, backtest_period_days, expected_result + mocker, freqai_conf, timerange, train_period_days, backtest_period_days, expected_result ): - freqaiconf = freqai_conf(copy.deepcopy(default_conf)) - freqaiconf.update({"timerange": "20220101-20220401"}) - dk = get_patched_data_kitchen(mocker, freqaiconf) + freqai_conf.update({"timerange": "20220101-20220401"}) + dk = get_patched_data_kitchen(mocker, freqai_conf) tr_list, bt_list = dk.split_timerange(timerange, train_period_days, backtest_period_days) assert len(tr_list) == len(bt_list) == expected_result @@ -64,14 +60,13 @@ def test_split_timerange( shutil.rmtree(Path(dk.full_path)) -def test_update_historic_data(mocker, default_conf): - freqaiconf = freqai_conf(copy.deepcopy(default_conf)) - strategy = get_patched_freqai_strategy(mocker, freqaiconf) - exchange = get_patched_exchange(mocker, freqaiconf) - strategy.dp = DataProvider(freqaiconf, exchange) +def test_update_historic_data(mocker, freqai_conf): + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) freqai = strategy.freqai freqai.live = True - freqai.dk = FreqaiDataKitchen(freqaiconf, freqai.dd) + freqai.dk = FreqaiDataKitchen(freqai_conf, freqai.dd) timerange = TimeRange.parse_timerange("20180110-20180114") freqai.dk.load_all_pair_histories(timerange) @@ -93,69 +88,65 @@ def test_update_historic_data(mocker, default_conf): (datetime.datetime.now(tz=datetime.timezone.utc).timestamp(), False), ], ) -def test_check_if_model_expired(mocker, default_conf, timestamp, expected): - freqaiconf = freqai_conf(copy.deepcopy(default_conf)) - dk = get_patched_data_kitchen(mocker, freqaiconf) +def test_check_if_model_expired(mocker, freqai_conf, timestamp, expected): + dk = get_patched_data_kitchen(mocker, freqai_conf) assert dk.check_if_model_expired(timestamp) == expected shutil.rmtree(Path(dk.full_path)) -def test_load_all_pairs_histories(mocker, default_conf): - freqaiconf = freqai_conf(copy.deepcopy(default_conf)) - strategy = get_patched_freqai_strategy(mocker, freqaiconf) - exchange = get_patched_exchange(mocker, freqaiconf) - strategy.dp = DataProvider(freqaiconf, exchange) +def test_load_all_pairs_histories(mocker, freqai_conf): + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) freqai = strategy.freqai freqai.live = True - freqai.dk = FreqaiDataKitchen(freqaiconf, freqai.dd) + freqai.dk = FreqaiDataKitchen(freqai_conf, freqai.dd) timerange = TimeRange.parse_timerange("20180110-20180114") freqai.dk.load_all_pair_histories(timerange) assert len(freqai.dd.historic_data.keys()) == len( - freqaiconf.get("exchange", {}).get("pair_whitelist") + freqai_conf.get("exchange", {}).get("pair_whitelist") ) assert len(freqai.dd.historic_data["ADA/BTC"]) == len( - freqaiconf.get("freqai", {}).get("feature_parameters", {}).get("include_timeframes") + freqai_conf.get("freqai", {}).get("feature_parameters", {}).get("include_timeframes") ) shutil.rmtree(Path(freqai.dk.full_path)) -def test_get_base_and_corr_dataframes(mocker, default_conf): - freqaiconf = freqai_conf(copy.deepcopy(default_conf)) - strategy = get_patched_freqai_strategy(mocker, freqaiconf) - exchange = get_patched_exchange(mocker, freqaiconf) - strategy.dp = DataProvider(freqaiconf, exchange) +def test_get_base_and_corr_dataframes(mocker, freqai_conf): + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) freqai = strategy.freqai freqai.live = True - freqai.dk = FreqaiDataKitchen(freqaiconf, freqai.dd) + freqai.dk = FreqaiDataKitchen(freqai_conf, freqai.dd) timerange = TimeRange.parse_timerange("20180110-20180114") freqai.dk.load_all_pair_histories(timerange) sub_timerange = TimeRange.parse_timerange("20180111-20180114") corr_df, base_df = freqai.dk.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC") num_tfs = len( - freqaiconf.get("freqai", {}).get("feature_parameters", {}).get("include_timeframes") + freqai_conf.get("freqai", {}).get("feature_parameters", {}).get("include_timeframes") ) assert len(base_df.keys()) == num_tfs assert len(corr_df.keys()) == len( - freqaiconf.get("freqai", {}).get("feature_parameters", {}).get("include_corr_pairlist") + freqai_conf.get("freqai", {}).get("feature_parameters", {}).get("include_corr_pairlist") ) assert len(corr_df["ADA/BTC"].keys()) == num_tfs shutil.rmtree(Path(freqai.dk.full_path)) -def test_use_strategy_to_populate_indicators(mocker, default_conf): - freqaiconf = freqai_conf(copy.deepcopy(default_conf)) - strategy = get_patched_freqai_strategy(mocker, freqaiconf) - exchange = get_patched_exchange(mocker, freqaiconf) - strategy.dp = DataProvider(freqaiconf, exchange) - strategy.freqai_info = freqaiconf.get("freqai", {}) +def test_use_strategy_to_populate_indicators(mocker, freqai_conf): + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) + strategy.freqai_info = freqai_conf.get("freqai", {}) freqai = strategy.freqai freqai.live = True - freqai.dk = FreqaiDataKitchen(freqaiconf, freqai.dd) + freqai.dk = FreqaiDataKitchen(freqai_conf, freqai.dd) timerange = TimeRange.parse_timerange("20180110-20180114") freqai.dk.load_all_pair_histories(timerange) sub_timerange = TimeRange.parse_timerange("20180111-20180114") diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py index e812dd47e..0bb2dac79 100644 --- a/tests/freqai/test_freqai_interface.py +++ b/tests/freqai/test_freqai_interface.py @@ -1,29 +1,29 @@ # from unittest.mock import MagicMock # from freqtrade.commands.optimize_commands import setup_optimize_configuration, start_edge -import copy -# import platform +import platform import shutil from pathlib import Path from unittest.mock import MagicMock +import pytest + from freqtrade.configuration import TimeRange from freqtrade.data.dataprovider import DataProvider from freqtrade.freqai.data_kitchen import FreqaiDataKitchen from tests.conftest import get_patched_exchange, log_has_re -from tests.freqai.conftest import freqai_conf, get_patched_freqai_strategy +from tests.freqai.conftest import get_patched_freqai_strategy -def test_train_model_in_series_LightGBM(mocker, default_conf): - freqaiconf = freqai_conf(copy.deepcopy(default_conf)) - freqaiconf.update({"timerange": "20180110-20180130"}) +def test_train_model_in_series_LightGBM(mocker, freqai_conf): + freqai_conf.update({"timerange": "20180110-20180130"}) - strategy = get_patched_freqai_strategy(mocker, freqaiconf) - exchange = get_patched_exchange(mocker, freqaiconf) - strategy.dp = DataProvider(freqaiconf, exchange) - strategy.freqai_info = freqaiconf.get("freqai", {}) + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) + strategy.freqai_info = freqai_conf.get("freqai", {}) freqai = strategy.freqai freqai.live = True - freqai.dk = FreqaiDataKitchen(freqaiconf, freqai.dd) + freqai.dk = FreqaiDataKitchen(freqai_conf, freqai.dd) timerange = TimeRange.parse_timerange("20180110-20180130") freqai.dk.load_all_pair_histories(timerange) @@ -58,64 +58,47 @@ def test_train_model_in_series_LightGBM(mocker, default_conf): shutil.rmtree(Path(freqai.dk.full_path)) -# FIXME: hits segfault -# @pytest.mark.skipif("arm" in platform.uname()[-1], reason="no ARM..") -# def test_train_model_in_series_Catboost(mocker, default_conf): -# freqaiconf = freqai_conf(copy.deepcopy(default_conf)) -# freqaiconf.update({"timerange": "20180110-20180130"}) -# freqaiconf.update({"freqaimodel": "CatboostPredictionModel"}) -# strategy = get_patched_freqai_strategy(mocker, freqaiconf) -# exchange = get_patched_exchange(mocker, freqaiconf) -# strategy.dp = DataProvider(freqaiconf, exchange) -# strategy.freqai_info = freqaiconf.get("freqai", {}) -# freqai = strategy.model.bridge -# freqai.live = True -# freqai.dk = FreqaiDataKitchen(freqaiconf, freqai.dd) -# timerange = TimeRange.parse_timerange("20180110-20180130") -# freqai.dk.load_all_pair_histories(timerange) +@pytest.mark.skipif("arm" in platform.uname()[-1], reason="no ARM for Catboost ...") +def test_train_model_in_series_Catboost(mocker, freqai_conf): + freqai_conf.update({"timerange": "20180110-20180130"}) + freqai_conf.update({"freqaimodel": "CatboostPredictionModel"}) + del freqai_conf['freqai']['model_training_parameters']['verbosity'] + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) -# freqai.dd.pair_dict = MagicMock() + strategy.freqai_info = freqai_conf.get("freqai", {}) + freqai = strategy.freqai + freqai.live = True + freqai.dk = FreqaiDataKitchen(freqai_conf, freqai.dd) + timerange = TimeRange.parse_timerange("20180110-20180130") + freqai.dk.load_all_pair_histories(timerange) -# data_load_timerange = TimeRange.parse_timerange("20180110-20180130") -# new_timerange = TimeRange.parse_timerange("20180120-20180130") + freqai.dd.pair_dict = MagicMock() -# freqai.train_model_in_series(new_timerange, "ADA/BTC", -# strategy, freqai.dk, data_load_timerange) + data_load_timerange = TimeRange.parse_timerange("20180110-20180130") + new_timerange = TimeRange.parse_timerange("20180120-20180130") -# assert ( -# Path(freqai.dk.data_path / str(freqai.dk.model_filename + "_model.joblib")) -# .resolve() -# .exists() -# ) -# assert ( -# Path(freqai.dk.data_path / str(freqai.dk.model_filename + "_metadata.json")) -# .resolve() -# .exists() -# ) -# assert ( -# Path(freqai.dk.data_path / str(freqai.dk.model_filename + "_trained_df.pkl")) -# .resolve() -# .exists() -# ) -# assert ( -# Path(freqai.dk.data_path / str(freqai.dk.model_filename + "_svm_model.joblib")) -# .resolve() -# .exists() -# ) + freqai.train_model_in_series(new_timerange, "ADA/BTC", + strategy, freqai.dk, data_load_timerange) -# shutil.rmtree(Path(freqai.dk.full_path)) + assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").exists() + assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").exists() + assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_trained_df.pkl").exists() + assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_svm_model.joblib").exists() + + shutil.rmtree(Path(freqai.dk.full_path)) -def test_start_backtesting(mocker, default_conf): - freqaiconf = freqai_conf(copy.deepcopy(default_conf)) - freqaiconf.update({"timerange": "20180120-20180130"}) - strategy = get_patched_freqai_strategy(mocker, freqaiconf) - exchange = get_patched_exchange(mocker, freqaiconf) - strategy.dp = DataProvider(freqaiconf, exchange) - strategy.freqai_info = freqaiconf.get("freqai", {}) +def test_start_backtesting(mocker, freqai_conf): + freqai_conf.update({"timerange": "20180120-20180130"}) + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) + strategy.freqai_info = freqai_conf.get("freqai", {}) freqai = strategy.freqai freqai.live = False - freqai.dk = FreqaiDataKitchen(freqaiconf, freqai.dd) + freqai.dk = FreqaiDataKitchen(freqai_conf, freqai.dd) timerange = TimeRange.parse_timerange("20180110-20180130") freqai.dk.load_all_pair_histories(timerange) sub_timerange = TimeRange.parse_timerange("20180110-20180130") @@ -132,16 +115,15 @@ def test_start_backtesting(mocker, default_conf): shutil.rmtree(Path(freqai.dk.full_path)) -def test_start_backtesting_from_existing_folder(mocker, default_conf, caplog): - freqaiconf = freqai_conf(copy.deepcopy(default_conf)) - freqaiconf.update({"timerange": "20180120-20180130"}) - strategy = get_patched_freqai_strategy(mocker, freqaiconf) - exchange = get_patched_exchange(mocker, freqaiconf) - strategy.dp = DataProvider(freqaiconf, exchange) - strategy.freqai_info = freqaiconf.get("freqai", {}) +def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog): + freqai_conf.update({"timerange": "20180120-20180130"}) + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) + strategy.freqai_info = freqai_conf.get("freqai", {}) freqai = strategy.freqai freqai.live = False - freqai.dk = FreqaiDataKitchen(freqaiconf, freqai.dd) + freqai.dk = FreqaiDataKitchen(freqai_conf, freqai.dd) timerange = TimeRange.parse_timerange("20180110-20180130") freqai.dk.load_all_pair_histories(timerange) sub_timerange = TimeRange.parse_timerange("20180110-20180130") @@ -157,14 +139,14 @@ def test_start_backtesting_from_existing_folder(mocker, default_conf, caplog): # without deleting the exiting folder structure, re-run - freqaiconf.update({"timerange": "20180120-20180130"}) - strategy = get_patched_freqai_strategy(mocker, freqaiconf) - exchange = get_patched_exchange(mocker, freqaiconf) - strategy.dp = DataProvider(freqaiconf, exchange) - strategy.freqai_info = freqaiconf.get("freqai", {}) + freqai_conf.update({"timerange": "20180120-20180130"}) + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) + strategy.freqai_info = freqai_conf.get("freqai", {}) freqai = strategy.freqai freqai.live = False - freqai.dk = FreqaiDataKitchen(freqaiconf, freqai.dd) + freqai.dk = FreqaiDataKitchen(freqai_conf, freqai.dd) timerange = TimeRange.parse_timerange("20180110-20180130") freqai.dk.load_all_pair_histories(timerange) sub_timerange = TimeRange.parse_timerange("20180110-20180130") From fff39eff9edf8a33a094e78a4f0f6b68259b5ec8 Mon Sep 17 00:00:00 2001 From: Robert Caulk Date: Sun, 24 Jul 2022 08:42:50 +0200 Subject: [PATCH 15/16] fix multitarget bug --- freqtrade/freqai/data_kitchen.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 3899c82df..569cc0c22 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -383,7 +383,7 @@ class FreqaiDataKitchen: continue train_labels_max = data_dictionary["train_labels"][item].max() train_labels_min = data_dictionary["train_labels"][item].min() - data_dictionary["train_labels"] = ( + data_dictionary["train_labels"][item] = ( 2 * (data_dictionary["train_labels"][item] - train_labels_min) / (train_labels_max - train_labels_min) @@ -391,14 +391,13 @@ class FreqaiDataKitchen: ) data_dictionary["test_labels"][item] = ( 2 - * (data_dictionary["test_labels"] - train_labels_min) + * (data_dictionary["test_labels"][item] - train_labels_min) / (train_labels_max - train_labels_min) - 1 ) self.data[f"{item}_max"] = train_labels_max # .to_dict() self.data[f"{item}_min"] = train_labels_min # .to_dict() - return data_dictionary def normalize_data_from_metadata(self, df: DataFrame) -> DataFrame: @@ -412,8 +411,8 @@ class FreqaiDataKitchen: for item in df.keys(): df[item] = ( 2 - * (df[item] - self.data[item + "_min"]) - / (self.data[item + "_max"] - self.data[item + "_min"]) + * (df[item] - self.data[f"{item}_min"]) + / (self.data[f"{item}_max"] - self.data[f"{item}_min"]) - 1 ) From 88e10f73065c53f847aa17a932c98d9239e5f505 Mon Sep 17 00:00:00 2001 From: Robert Caulk Date: Sun, 24 Jul 2022 09:01:23 +0200 Subject: [PATCH 16/16] add exception for not passing timerange. Remove hard coded arguments for CatboostPredictionModels. Update docs --- docs/freqai.md | 2 +- freqtrade/freqai/data_kitchen.py | 3 +++ freqtrade/freqai/prediction_models/CatboostPredictionModel.py | 2 -- .../freqai/prediction_models/CatboostPredictionMultiModel.py | 3 --- 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/docs/freqai.md b/docs/freqai.md index 48b8968a3..b2ee2407a 100644 --- a/docs/freqai.md +++ b/docs/freqai.md @@ -83,7 +83,7 @@ Mandatory parameters are marked as **Required**, which means that they are requi | `backtest_period_days` | **Required.** Number of days to inference into the trained model before sliding the window and retraining. This can be fractional days, but beware that the user provided `timerange` will be divided by this number to yield the number of trainings necessary to complete the backtest.
**Datatype:** Float. | `live_retrain_hours` | Frequency of retraining during dry/live runs. Default set to 0, which means it will retrain as often as possible. **Datatype:** Float > 0. | `follow_mode` | If true, this instance of FreqAI will look for models associated with `identifier` and load those for inferencing. A `follower` will **not** train new models. `False` by default.
**Datatype:** boolean. -| `live_trained_timestamp` | Useful if user wants to start from models trained during a *backtest*. The timestamp can be located in the `user_data/models` backtesting folder. This is not a commonly used parameter, leave undefined for most applications.
**Datatype:** positive integer. +| `startup_candles` | Number of candles needed for *backtesting only* to ensure all indicators are non NaNs at the start of the first train period.
**Datatype:** positive integer. | `fit_live_predictions_candles` | Computes target (label) statistics from prediction data, instead of from the training data set. Number of candles is the number of historical candles it uses to generate the statistics.
**Datatype:** positive integer. | `purge_old_models` | Tell FreqAI to delete obsolete models. Otherwise, all historic models will remain on disk. Defaults to `False`.
**Datatype:** boolean. | `expiration_hours` | Ask FreqAI to avoid making predictions if a model is more than `expiration_hours` old. Defaults to 0 which means models never expire.
**Datatype:** positive integer. diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 569cc0c22..e732649ff 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -76,6 +76,9 @@ class FreqaiDataKitchen: self.keras = self.freqai_config.get("keras", False) self.set_all_pairs() if not self.live: + if not self.config["timerange"]: + raise OperationalException( + 'Please pass --timerange if you intend to use FreqAI for backtesting.') self.full_timerange = self.create_fulltimerange( self.config["timerange"], self.freqai_config.get("train_period_days") ) diff --git a/freqtrade/freqai/prediction_models/CatboostPredictionModel.py b/freqtrade/freqai/prediction_models/CatboostPredictionModel.py index fafb12abe..c69602025 100644 --- a/freqtrade/freqai/prediction_models/CatboostPredictionModel.py +++ b/freqtrade/freqai/prediction_models/CatboostPredictionModel.py @@ -38,8 +38,6 @@ class CatboostPredictionModel(BaseRegressionModel): model = CatBoostRegressor( allow_writing_files=False, - verbose=100, - early_stopping_rounds=400, **self.model_training_parameters, ) model.fit(X=train_data, eval_set=test_data) diff --git a/freqtrade/freqai/prediction_models/CatboostPredictionMultiModel.py b/freqtrade/freqai/prediction_models/CatboostPredictionMultiModel.py index becfb43eb..1b91fe0c6 100644 --- a/freqtrade/freqai/prediction_models/CatboostPredictionMultiModel.py +++ b/freqtrade/freqai/prediction_models/CatboostPredictionMultiModel.py @@ -27,9 +27,6 @@ class CatboostPredictionMultiModel(BaseRegressionModel): cbr = CatBoostRegressor( allow_writing_files=False, - gpu_ram_part=0.5, - verbose=100, - early_stopping_rounds=400, **self.model_training_parameters, )