Merge branch 'freqtrade:feat/freqai' into feat/freqai

This commit is contained in:
lolong 2022-07-26 22:43:30 +02:00 committed by GitHub
commit cb1c3315da
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 729 additions and 448 deletions

View File

@ -8,10 +8,18 @@ from pathlib import Path
from typing import Any, Dict, Tuple
import numpy as np
import numpy.typing as npt
import pandas as pd
from joblib import dump, load
from joblib.externals import cloudpickle
from pandas import DataFrame
from freqtrade.configuration import TimeRange
from freqtrade.data.history import load_pair_history
from freqtrade.exceptions import OperationalException
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.strategy.interface import IStrategy
logger = logging.getLogger(__name__)
@ -55,7 +63,7 @@ class FreqaiDataDrawer:
self.historic_predictions: Dict[str, Any] = {}
self.follower_dict: Dict[str, Any] = {}
self.full_path = full_path
self.follower_name = self.config.get("bot_name", "follower1")
self.follower_name: str = self.config.get("bot_name", "follower1")
self.follower_dict_path = Path(
self.full_path / f"follower_dictionary-{self.follower_name}.json"
)
@ -151,14 +159,13 @@ class FreqaiDataDrawer:
for pair in whitelist_pairs:
self.follower_dict[pair] = {}
with open(self.follower_dict_path, "w") as fp:
json.dump(self.follower_dict, fp, default=self.np_encoder)
self.save_follower_dict_to_disk()
def np_encoder(self, object):
if isinstance(object, np.generic):
return object.item()
def get_pair_dict_info(self, pair: str) -> Tuple[str, int, bool, bool]:
def get_pair_dict_info(self, pair: str) -> Tuple[str, int, bool]:
"""
Locate and load existing model metadata from persistent storage. If not located,
create a new one and append the current pair to it and prepare it for its first
@ -167,23 +174,20 @@ class FreqaiDataDrawer:
:return:
model_filename: str = unique filename used for loading persistent objects from disk
trained_timestamp: int = the last time the coin was trained
coin_first: bool = If the coin is fresh without metadata
return_null_array: bool = Follower could not find pair metadata
"""
pair_in_dict = self.pair_dict.get(pair)
pair_dict = self.pair_dict.get(pair)
data_path_set = self.pair_dict.get(pair, {}).get("data_path", None)
return_null_array = False
if pair_in_dict:
model_filename = self.pair_dict[pair]["model_filename"]
trained_timestamp = self.pair_dict[pair]["trained_timestamp"]
coin_first = self.pair_dict[pair]["first"]
if pair_dict:
model_filename = pair_dict["model_filename"]
trained_timestamp = pair_dict["trained_timestamp"]
elif not self.follow_mode:
self.pair_dict[pair] = {}
model_filename = self.pair_dict[pair]["model_filename"] = ""
coin_first = self.pair_dict[pair]["first"] = True
trained_timestamp = self.pair_dict[pair]["trained_timestamp"] = 0
self.pair_dict[pair]["priority"] = len(self.pair_dict)
pair_dict = self.pair_dict[pair] = {}
model_filename = pair_dict["model_filename"] = ""
trained_timestamp = pair_dict["trained_timestamp"] = 0
pair_dict["priority"] = len(self.pair_dict)
if not data_path_set and self.follow_mode:
logger.warning(
@ -191,9 +195,11 @@ class FreqaiDataDrawer:
f"pair_dictionary at path {self.full_path}, sending null values "
"back to strategy."
)
trained_timestamp = 0
model_filename = ''
return_null_array = True
return model_filename, trained_timestamp, coin_first, return_null_array
return model_filename, trained_timestamp, return_null_array
def set_pair_dict_info(self, metadata: dict) -> None:
pair_in_dict = self.pair_dict.get(metadata["pair"])
@ -214,7 +220,8 @@ class FreqaiDataDrawer:
# send pair to end of queue
self.pair_dict[pair]["priority"] = len(self.pair_dict)
def set_initial_return_values(self, pair: str, dk, pred_df, do_preds) -> None:
def set_initial_return_values(self, pair: str, dk: FreqaiDataKitchen,
pred_df: DataFrame, do_preds: npt.ArrayLike) -> None:
"""
Set the initial return values to a persistent dataframe. This avoids needing to repredict on
historical candles, and also stores historical predictions despite retrainings (so stored
@ -351,6 +358,217 @@ class FreqaiDataDrawer:
if self.config.get("freqai", {}).get("purge_old_models", False):
self.purge_old_models()
# Functions pulled back from FreqaiDataKitchen because they relied on DataDrawer
def save_data(self, model: Any, coin: str, dk: FreqaiDataKitchen) -> None:
"""
Saves all data associated with a model for a single sub-train time range
:params:
:model: User trained model which can be reused for inferencing to generate
predictions
"""
if not dk.data_path.is_dir():
dk.data_path.mkdir(parents=True, exist_ok=True)
save_path = Path(dk.data_path)
# Save the trained model
if not dk.keras:
dump(model, save_path / f"{dk.model_filename}_model.joblib")
else:
model.save(save_path / f"{dk.model_filename}_model.h5")
if dk.svm_model is not None:
dump(dk.svm_model, save_path / str(dk.model_filename + "_svm_model.joblib"))
dk.data["data_path"] = str(dk.data_path)
dk.data["model_filename"] = str(dk.model_filename)
dk.data["training_features_list"] = list(dk.data_dictionary["train_features"].columns)
dk.data["label_list"] = dk.label_list
# store the metadata
with open(save_path / str(dk.model_filename + "_metadata.json"), "w") as fp:
json.dump(dk.data, fp, default=dk.np_encoder)
# save the train data to file so we can check preds for area of applicability later
dk.data_dictionary["train_features"].to_pickle(
save_path / str(dk.model_filename + "_trained_df.pkl")
)
if self.freqai_info.get("feature_parameters", {}).get("principal_component_analysis"):
cloudpickle.dump(
dk.pca, open(dk.data_path / str(dk.model_filename + "_pca_object.pkl"), "wb")
)
# if self.live:
self.model_dictionary[dk.model_filename] = model
self.pair_dict[coin]["model_filename"] = dk.model_filename
self.pair_dict[coin]["data_path"] = str(dk.data_path)
self.save_drawer_to_disk()
return
def load_data(self, coin: str, dk: FreqaiDataKitchen) -> Any:
"""
loads all data required to make a prediction on a sub-train time range
:returns:
:model: User trained model which can be inferenced for new predictions
"""
if not self.pair_dict[coin]["model_filename"]:
return None
if dk.live:
dk.model_filename = self.pair_dict[coin]["model_filename"]
dk.data_path = Path(self.pair_dict[coin]["data_path"])
if self.freqai_info.get("follow_mode", False):
# follower can be on a different system which is rsynced to the leader:
dk.data_path = Path(
self.config["user_data_dir"]
/ "models"
/ dk.data_path.parts[-2]
/ dk.data_path.parts[-1]
)
with open(dk.data_path / str(dk.model_filename + "_metadata.json"), "r") as fp:
dk.data = json.load(fp)
dk.training_features_list = dk.data["training_features_list"]
dk.label_list = dk.data["label_list"]
dk.data_dictionary["train_features"] = pd.read_pickle(
dk.data_path / str(dk.model_filename + "_trained_df.pkl")
)
# try to access model in memory instead of loading object from disk to save time
if dk.live and dk.model_filename in self.model_dictionary:
model = self.model_dictionary[dk.model_filename]
elif not dk.keras:
model = load(dk.data_path / str(dk.model_filename + "_model.joblib"))
else:
from tensorflow import keras
model = keras.models.load_model(dk.data_path / str(dk.model_filename + "_model.h5"))
if Path(dk.data_path / str(dk.model_filename + "_svm_model.joblib")).resolve().exists():
dk.svm_model = load(dk.data_path / str(dk.model_filename + "_svm_model.joblib"))
if not model:
raise OperationalException(
f"Unable to load model, ensure model exists at " f"{dk.data_path} "
)
if self.config["freqai"]["feature_parameters"]["principal_component_analysis"]:
dk.pca = cloudpickle.load(
open(dk.data_path / str(dk.model_filename + "_pca_object.pkl"), "rb")
)
return model
def update_historic_data(self, strategy: IStrategy, dk: FreqaiDataKitchen) -> None:
"""
Append new candles to our stores historic data (in memory) so that
we do not need to load candle history from disk and we dont need to
pinging exchange multiple times for the same candle.
:params:
dataframe: DataFrame = strategy provided dataframe
"""
feat_params = self.freqai_info.get("feature_parameters", {})
with self.history_lock:
history_data = self.historic_data
for pair in dk.all_pairs:
for tf in feat_params.get("include_timeframes"):
# check if newest candle is already appended
df_dp = strategy.dp.get_pair_dataframe(pair, tf)
if len(df_dp.index) == 0:
continue
if str(history_data[pair][tf].iloc[-1]["date"]) == str(
df_dp.iloc[-1:]["date"].iloc[-1]
):
continue
try:
index = (
df_dp.loc[
df_dp["date"] == history_data[pair][tf].iloc[-1]["date"]
].index[0]
+ 1
)
except IndexError:
logger.warning(
f"Unable to update pair history for {pair}. "
"If this does not resolve itself after 1 additional candle, "
"please report the error to #freqai discord channel"
)
return
history_data[pair][tf] = pd.concat(
[
history_data[pair][tf],
strategy.dp.get_pair_dataframe(pair, tf).iloc[index:],
],
ignore_index=True,
axis=0,
)
def load_all_pair_histories(self, timerange: TimeRange, dk: FreqaiDataKitchen) -> None:
"""
Load pair histories for all whitelist and corr_pairlist pairs.
Only called once upon startup of bot.
:params:
timerange: TimeRange = full timerange required to populate all indicators
for training according to user defined train_period_days
"""
history_data = self.historic_data
for pair in dk.all_pairs:
if pair not in history_data:
history_data[pair] = {}
for tf in self.freqai_info.get("feature_parameters", {}).get("include_timeframes"):
history_data[pair][tf] = load_pair_history(
datadir=self.config["datadir"],
timeframe=tf,
pair=pair,
timerange=timerange,
data_format=self.config.get("dataformat_ohlcv", "json"),
candle_type=self.config.get("trading_mode", "spot"),
)
def get_base_and_corr_dataframes(
self, timerange: TimeRange, pair: str, dk: FreqaiDataKitchen
) -> Tuple[Dict[Any, Any], Dict[Any, Any]]:
"""
Searches through our historic_data in memory and returns the dataframes relevant
to the present pair.
:params:
timerange: TimeRange = full timerange required to populate all indicators
for training according to user defined train_period_days
metadata: dict = strategy furnished pair metadata
"""
with self.history_lock:
corr_dataframes: Dict[Any, Any] = {}
base_dataframes: Dict[Any, Any] = {}
historic_data = self.historic_data
pairs = self.freqai_info.get("feature_parameters", {}).get(
"include_corr_pairlist", []
)
for tf in self.freqai_info.get("feature_parameters", {}).get("include_timeframes"):
base_dataframes[tf] = dk.slice_dataframe(timerange, historic_data[pair][tf])
if pairs:
for p in pairs:
if pair in p:
continue # dont repeat anything from whitelist
if p not in corr_dataframes:
corr_dataframes[p] = {}
corr_dataframes[p][tf] = dk.slice_dataframe(
timerange, historic_data[p][tf]
)
return corr_dataframes, base_dataframes
# to be used if we want to send predictions directly to the follower instead of forcing
# follower to load models and inference
# def save_model_return_values_to_disk(self) -> None:

View File

@ -1,6 +1,5 @@
import copy
import datetime
import json
import logging
import shutil
from pathlib import Path
@ -9,18 +8,14 @@ from typing import Any, Dict, List, Tuple
import numpy as np
import numpy.typing as npt
import pandas as pd
from joblib import dump, load # , Parallel, delayed # used for auto distribution assignment
from joblib.externals import cloudpickle
from pandas import DataFrame
from sklearn import linear_model
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.model_selection import train_test_split
from freqtrade.configuration import TimeRange
from freqtrade.data.history import load_pair_history
from freqtrade.data.history.history_utils import refresh_backtest_ohlcv_data
from freqtrade.exceptions import OperationalException
from freqtrade.freqai.data_drawer import FreqaiDataDrawer
from freqtrade.resolvers import ExchangeResolver
from freqtrade.strategy.interface import IStrategy
@ -57,7 +52,6 @@ class FreqaiDataKitchen:
def __init__(
self,
config: Dict[str, Any],
data_drawer: FreqaiDataDrawer,
live: bool = False,
pair: str = "",
):
@ -69,6 +63,7 @@ class FreqaiDataKitchen:
self.append_df: DataFrame = DataFrame()
self.data_path = Path()
self.label_list: List = []
self.training_features_list: List = []
self.model_filename: str = ""
self.live = live
self.pair = pair
@ -89,8 +84,6 @@ class FreqaiDataKitchen:
config["freqai"]["backtest_period_days"],
)
self.dd = data_drawer
def set_paths(
self,
pair: str,
@ -113,110 +106,6 @@ class FreqaiDataKitchen:
return
def save_data(self, model: Any, coin: str = "", label=None) -> None:
"""
Saves all data associated with a model for a single sub-train time range
:params:
:model: User trained model which can be reused for inferencing to generate
predictions
"""
if not self.data_path.is_dir():
self.data_path.mkdir(parents=True, exist_ok=True)
save_path = Path(self.data_path)
# Save the trained model
if not self.keras:
dump(model, save_path / f"{self.model_filename}_model.joblib")
else:
model.save(save_path / f"{self.model_filename}_model.h5")
if self.svm_model is not None:
dump(self.svm_model, save_path / str(self.model_filename + "_svm_model.joblib"))
self.data["data_path"] = str(self.data_path)
self.data["model_filename"] = str(self.model_filename)
self.data["training_features_list"] = list(self.data_dictionary["train_features"].columns)
self.data["label_list"] = self.label_list
# store the metadata
with open(save_path / str(self.model_filename + "_metadata.json"), "w") as fp:
json.dump(self.data, fp, default=self.np_encoder)
# save the train data to file so we can check preds for area of applicability later
self.data_dictionary["train_features"].to_pickle(
save_path / str(self.model_filename + "_trained_df.pkl")
)
if self.freqai_config.get("feature_parameters", {}).get("principal_component_analysis"):
cloudpickle.dump(
self.pca, open(self.data_path / str(self.model_filename + "_pca_object.pkl"), "wb")
)
# if self.live:
self.dd.model_dictionary[self.model_filename] = model
self.dd.pair_dict[coin]["model_filename"] = self.model_filename
self.dd.pair_dict[coin]["data_path"] = str(self.data_path)
self.dd.save_drawer_to_disk()
return
def load_data(self, coin: str = "") -> Any:
"""
loads all data required to make a prediction on a sub-train time range
:returns:
:model: User trained model which can be inferenced for new predictions
"""
if not self.dd.pair_dict[coin]["model_filename"]:
return None
if self.live:
self.model_filename = self.dd.pair_dict[coin]["model_filename"]
self.data_path = Path(self.dd.pair_dict[coin]["data_path"])
if self.freqai_config.get("follow_mode", False):
# follower can be on a different system which is rsynced to the leader:
self.data_path = Path(
self.config["user_data_dir"]
/ "models"
/ self.data_path.parts[-2]
/ self.data_path.parts[-1]
)
with open(self.data_path / str(self.model_filename + "_metadata.json"), "r") as fp:
self.data = json.load(fp)
self.training_features_list = self.data["training_features_list"]
self.label_list = self.data["label_list"]
self.data_dictionary["train_features"] = pd.read_pickle(
self.data_path / str(self.model_filename + "_trained_df.pkl")
)
# try to access model in memory instead of loading object from disk to save time
if self.live and self.model_filename in self.dd.model_dictionary:
model = self.dd.model_dictionary[self.model_filename]
elif not self.keras:
model = load(self.data_path / str(self.model_filename + "_model.joblib"))
else:
from tensorflow import keras
model = keras.models.load_model(self.data_path / str(self.model_filename + "_model.h5"))
if Path(self.data_path / str(self.model_filename + "_svm_model.joblib")).resolve().exists():
self.svm_model = load(self.data_path / str(self.model_filename + "_svm_model.joblib"))
if not model:
raise OperationalException(
f"Unable to load model, ensure model exists at " f"{self.data_path} "
)
if self.config["freqai"]["feature_parameters"]["principal_component_analysis"]:
self.pca = cloudpickle.load(
open(self.data_path / str(self.model_filename + "_pca_object.pkl"), "rb")
)
return model
def make_train_test_datasets(
self, filtered_dataframe: DataFrame, labels: DataFrame
) -> Dict[Any, Any]:
@ -243,20 +132,28 @@ class FreqaiDataKitchen:
else:
stratification = None
(
train_features,
test_features,
train_labels,
test_labels,
train_weights,
test_weights,
) = train_test_split(
filtered_dataframe[: filtered_dataframe.shape[0]],
labels,
weights,
stratify=stratification,
**self.config["freqai"]["data_split_parameters"],
)
if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
(
train_features,
test_features,
train_labels,
test_labels,
train_weights,
test_weights,
) = train_test_split(
filtered_dataframe[: filtered_dataframe.shape[0]],
labels,
weights,
stratify=stratification,
**self.config["freqai"]["data_split_parameters"],
)
else:
test_labels = np.zeros(2)
test_features = pd.DataFrame()
test_weights = np.zeros(2)
train_features = filtered_dataframe
train_labels = labels
train_weights = weights
return self.build_data_dictionary(
train_features, test_features, train_labels, test_labels, train_weights, test_weights
@ -392,12 +289,13 @@ class FreqaiDataKitchen:
/ (train_labels_max - train_labels_min)
- 1
)
data_dictionary["test_labels"][item] = (
2
* (data_dictionary["test_labels"][item] - train_labels_min)
/ (train_labels_max - train_labels_min)
- 1
)
if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
data_dictionary["test_labels"][item] = (
2
* (data_dictionary["test_labels"][item] - train_labels_min)
/ (train_labels_max - train_labels_min)
- 1
)
self.data[f"{item}_max"] = train_labels_max # .to_dict()
self.data[f"{item}_min"] = train_labels_min # .to_dict()
@ -555,11 +453,12 @@ class FreqaiDataKitchen:
self.data["training_features_list_raw"] = copy.deepcopy(self.training_features_list)
self.training_features_list = self.data_dictionary["train_features"].columns
self.data_dictionary["test_features"] = pd.DataFrame(
data=test_components,
columns=["PC" + str(i) for i in range(0, n_keep_components)],
index=self.data_dictionary["test_features"].index,
)
if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
self.data_dictionary["test_features"] = pd.DataFrame(
data=test_components,
columns=["PC" + str(i) for i in range(0, n_keep_components)],
index=self.data_dictionary["test_features"].index,
)
self.data["n_kept_components"] = n_keep_components
self.pca = pca2
@ -652,15 +551,17 @@ class FreqaiDataKitchen:
)
# same for test data
y_pred = self.svm_model.predict(self.data_dictionary["test_features"])
dropped_points = np.where(y_pred == -1, 0, y_pred)
self.data_dictionary["test_features"] = self.data_dictionary["test_features"][
(y_pred == 1)
]
self.data_dictionary["test_labels"] = self.data_dictionary["test_labels"][(y_pred == 1)]
self.data_dictionary["test_weights"] = self.data_dictionary["test_weights"][
(y_pred == 1)
]
if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
y_pred = self.svm_model.predict(self.data_dictionary["test_features"])
dropped_points = np.where(y_pred == -1, 0, y_pred)
self.data_dictionary["test_features"] = self.data_dictionary["test_features"][
(y_pred == 1)
]
self.data_dictionary["test_labels"] = self.data_dictionary["test_labels"][(
y_pred == 1)]
self.data_dictionary["test_weights"] = self.data_dictionary["test_weights"][
(y_pred == 1)
]
logger.info(
f"svm_remove_outliers() tossed {len(y_pred) - dropped_points.sum()}"
@ -909,10 +810,10 @@ class FreqaiDataKitchen:
coin, _ = pair.split("/")
self.data_path = Path(
self.full_path
/ str("sub-train" + "-" + pair.split("/")[0] + "_" + str(int(trained_timerange.stopts)))
/ f"sub-train-{pair.split('/')[0]}_{int(trained_timerange.stopts)}"
)
self.model_filename = "cb_" + coin.lower() + "_" + str(int(trained_timerange.stopts))
self.model_filename = f"cb_{coin.lower()}_{int(trained_timerange.stopts)}"
def download_all_data_for_training(self, timerange: TimeRange) -> None:
"""
@ -941,56 +842,6 @@ class FreqaiDataKitchen:
prepend=self.config.get("prepend_data", False),
)
def update_historic_data(self, strategy: IStrategy) -> None:
"""
Append new candles to our stores historic data (in memory) so that
we do not need to load candle history from disk and we dont need to
pinging exchange multiple times for the same candle.
:params:
dataframe: DataFrame = strategy provided dataframe
"""
feat_params = self.freqai_config.get("feature_parameters", {})
with self.dd.history_lock:
history_data = self.dd.historic_data
for pair in self.all_pairs:
for tf in feat_params.get("include_timeframes"):
# check if newest candle is already appended
df_dp = strategy.dp.get_pair_dataframe(pair, tf)
if len(df_dp.index) == 0:
continue
if str(history_data[pair][tf].iloc[-1]["date"]) == str(
df_dp.iloc[-1:]["date"].iloc[-1]
):
continue
try:
index = (
df_dp.loc[
df_dp["date"] == history_data[pair][tf].iloc[-1]["date"]
].index[0]
+ 1
)
except IndexError:
logger.warning(
f"Unable to update pair history for {pair}. "
"If this does not resolve itself after 1 additional candle, "
"please report the error to #freqai discord channel"
)
return
history_data[pair][tf] = pd.concat(
[
history_data[pair][tf],
strategy.dp.get_pair_dataframe(pair, tf).iloc[index:],
],
ignore_index=True,
axis=0,
)
# logger.info(f'Length of history data {len(history_data[pair][tf])}')
def set_all_pairs(self) -> None:
self.all_pairs = copy.deepcopy(
@ -1000,63 +851,6 @@ class FreqaiDataKitchen:
if pair not in self.all_pairs:
self.all_pairs.append(pair)
def load_all_pair_histories(self, timerange: TimeRange) -> None:
"""
Load pair histories for all whitelist and corr_pairlist pairs.
Only called once upon startup of bot.
:params:
timerange: TimeRange = full timerange required to populate all indicators
for training according to user defined train_period_days
"""
history_data = self.dd.historic_data
for pair in self.all_pairs:
if pair not in history_data:
history_data[pair] = {}
for tf in self.freqai_config.get("feature_parameters", {}).get("include_timeframes"):
history_data[pair][tf] = load_pair_history(
datadir=self.config["datadir"],
timeframe=tf,
pair=pair,
timerange=timerange,
data_format=self.config.get("dataformat_ohlcv", "json"),
candle_type=self.config.get("trading_mode", "spot"),
)
def get_base_and_corr_dataframes(
self, timerange: TimeRange, pair: str
) -> Tuple[Dict[Any, Any], Dict[Any, Any]]:
"""
Searches through our historic_data in memory and returns the dataframes relevant
to the present pair.
:params:
timerange: TimeRange = full timerange required to populate all indicators
for training according to user defined train_period_days
metadata: dict = strategy furnished pair metadata
"""
with self.dd.history_lock:
corr_dataframes: Dict[Any, Any] = {}
base_dataframes: Dict[Any, Any] = {}
historic_data = self.dd.historic_data
pairs = self.freqai_config.get("feature_parameters", {}).get(
"include_corr_pairlist", []
)
for tf in self.freqai_config.get("feature_parameters", {}).get("include_timeframes"):
base_dataframes[tf] = self.slice_dataframe(timerange, historic_data[pair][tf])
if pairs:
for p in pairs:
if pair in p:
continue # dont repeat anything from whitelist
if p not in corr_dataframes:
corr_dataframes[p] = {}
corr_dataframes[p][tf] = self.slice_dataframe(
timerange, historic_data[p][tf]
)
return corr_dataframes, base_dataframes
def use_strategy_to_populate_indicators(
self,
strategy: IStrategy,
@ -1122,20 +916,6 @@ class FreqaiDataKitchen:
return dataframe
def fit_live_predictions(self) -> None:
"""
Fit the labels with a gaussian distribution
"""
import scipy as spy
num_candles = self.freqai_config.get("fit_live_predictions_candles", 100)
self.data["labels_mean"], self.data["labels_std"] = {}, {}
for label in self.label_list:
f = spy.stats.norm.fit(self.dd.historic_predictions[self.pair][label].tail(num_candles))
self.data["labels_mean"][label], self.data["labels_std"][label] = f[0], f[1]
return
def fit_labels(self) -> None:
"""
Fit the labels with a gaussian distribution

View File

@ -102,7 +102,7 @@ class IFreqaiModel(ABC):
self.dd.set_pair_dict_info(metadata)
if self.live:
self.dk = FreqaiDataKitchen(self.config, self.dd, self.live, metadata["pair"])
self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"])
dk = self.start_live(dataframe, metadata, strategy, self.dk)
# For backtesting, each pair enters and then gets trained for each window along the
@ -111,7 +111,7 @@ class IFreqaiModel(ABC):
# FreqAI slides the window and sequentially builds the backtesting results before returning
# the concatenated results for the full backtesting period back to the strategy.
elif not self.follow_mode:
self.dk = FreqaiDataKitchen(self.config, self.dd, self.live, metadata["pair"])
self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"])
logger.info(f"Training {len(self.dk.training_timeranges)} timeranges")
dataframe = self.dk.use_strategy_to_populate_indicators(
@ -120,7 +120,8 @@ class IFreqaiModel(ABC):
dk = self.start_backtesting(dataframe, metadata, self.dk)
dataframe = dk.remove_features_from_df(dk.return_dataframe)
return self.return_values(dataframe, dk)
del dk
return self.return_values(dataframe)
@threaded
def start_scanning(self, strategy: IStrategy) -> None:
@ -134,11 +135,11 @@ class IFreqaiModel(ABC):
time.sleep(1)
for pair in self.config.get("exchange", {}).get("pair_whitelist"):
(_, trained_timestamp, _, _) = self.dd.get_pair_dict_info(pair)
(_, trained_timestamp, _) = self.dd.get_pair_dict_info(pair)
if self.dd.pair_dict[pair]["priority"] != 1:
continue
dk = FreqaiDataKitchen(self.config, self.dd, self.live, pair)
dk = FreqaiDataKitchen(self.config, self.live, pair)
dk.set_paths(pair, trained_timestamp)
(
retrain,
@ -177,7 +178,7 @@ class IFreqaiModel(ABC):
# following tr_train. Both of these windows slide through the
# entire backtest
for tr_train, tr_backtest in zip(dk.training_timeranges, dk.backtesting_timeranges):
(_, _, _, _) = self.dd.get_pair_dict_info(metadata["pair"])
(_, _, _) = self.dd.get_pair_dict_info(metadata["pair"])
train_it += 1
total_trains = len(dk.backtesting_timeranges)
gc.collect()
@ -210,15 +211,16 @@ class IFreqaiModel(ABC):
)
)
if not self.model_exists(
metadata["pair"], dk, trained_timestamp=trained_timestamp.stopts
metadata["pair"], dk, trained_timestamp=int(trained_timestamp.stopts)
):
dk.find_features(dataframe_train)
self.model = self.train(dataframe_train, metadata["pair"], dk)
self.dd.pair_dict[metadata["pair"]]["trained_timestamp"] = trained_timestamp.stopts
self.dd.pair_dict[metadata["pair"]]["trained_timestamp"] = int(
trained_timestamp.stopts)
dk.set_new_model_names(metadata["pair"], trained_timestamp)
dk.save_data(self.model, metadata["pair"])
self.dd.save_data(self.model, metadata["pair"], dk)
else:
self.model = dk.load_data(metadata["pair"])
self.model = self.dd.load_data(metadata["pair"], dk)
self.check_if_feature_list_matches_strategy(dataframe_train, dk)
@ -249,7 +251,7 @@ class IFreqaiModel(ABC):
self.dd.update_follower_metadata()
# get the model metadata associated with the current pair
(_, trained_timestamp, _, return_null_array) = self.dd.get_pair_dict_info(metadata["pair"])
(_, trained_timestamp, return_null_array) = self.dd.get_pair_dict_info(metadata["pair"])
# if the metadata doesnt exist, the follower returns null arrays to strategy
if self.follow_mode and return_null_array:
@ -259,7 +261,7 @@ class IFreqaiModel(ABC):
# append the historic data once per round
if self.dd.historic_data:
dk.update_historic_data(strategy)
self.dd.update_historic_data(strategy, dk)
logger.debug(f'Updating historic data on pair {metadata["pair"]}')
if not self.follow_mode:
@ -277,7 +279,7 @@ class IFreqaiModel(ABC):
"data saved"
)
dk.download_all_data_for_training(data_load_timerange)
dk.load_all_pair_histories(data_load_timerange)
self.dd.load_all_pair_histories(data_load_timerange, dk)
if not self.scanning:
self.scanning = True
@ -291,7 +293,7 @@ class IFreqaiModel(ABC):
)
# load the model and associated data into the data kitchen
self.model = dk.load_data(coin=metadata["pair"])
self.model = self.dd.load_data(metadata["pair"], dk)
dataframe = self.dk.use_strategy_to_populate_indicators(
strategy, prediction_dataframe=dataframe, pair=metadata["pair"]
@ -467,8 +469,8 @@ class IFreqaiModel(ABC):
new_trained_timerange does not contain any NaNs)
"""
corr_dataframes, base_dataframes = dk.get_base_and_corr_dataframes(
data_load_timerange, pair
corr_dataframes, base_dataframes = self.dd.get_base_and_corr_dataframes(
data_load_timerange, pair, dk
)
unfiltered_dataframe = dk.use_strategy_to_populate_indicators(
@ -488,7 +490,7 @@ class IFreqaiModel(ABC):
if self.dd.pair_dict[pair]["priority"] == 1 and self.scanning:
with self.lock:
self.dd.pair_to_end_of_training_queue(pair)
dk.save_data(model, coin=pair)
self.dd.save_data(model, pair, dk)
if self.freqai_info.get("purge_old_models", False):
self.dd.purge_old_models()
@ -504,6 +506,20 @@ class IFreqaiModel(ABC):
self.dd.historic_predictions[pair] = pd.DataFrame()
self.dd.historic_predictions[pair] = copy.deepcopy(pred_df)
def fit_live_predictions(self, dk: FreqaiDataKitchen) -> None:
"""
Fit the labels with a gaussian distribution
"""
import scipy as spy
num_candles = self.freqai_info.get("fit_live_predictions_candles", 100)
dk.data["labels_mean"], dk.data["labels_std"] = {}, {}
for label in dk.label_list:
f = spy.stats.norm.fit(self.dd.historic_predictions[dk.pair][label].tail(num_candles))
dk.data["labels_mean"][label], dk.data["labels_std"][label] = f[0], f[1]
return
# Following methods which are overridden by user made prediction models.
# See freqai/prediction_models/CatboostPredictionModlel.py for an example.
@ -545,12 +561,11 @@ class IFreqaiModel(ABC):
"""
@abstractmethod
def return_values(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> DataFrame:
def return_values(self, dataframe: DataFrame) -> DataFrame:
"""
User defines the dataframe to be returned to strategy here.
:param dataframe: DataFrame = the full dataframe for the current prediction (live)
or --timerange (backtesting)
:param dk: FreqaiDataKitchen = Data management/analysis tool associated to present pair only
:return: dataframe: DataFrame = dataframe filled with user defined data
"""

View File

@ -18,7 +18,7 @@ class BaseRegressionModel(IFreqaiModel):
such as prediction_models/CatboostPredictionModel.py for guidance.
"""
def return_values(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> DataFrame:
def return_values(self, dataframe: DataFrame) -> DataFrame:
"""
User uses this function to add any additional return values to the dataframe.
e.g.
@ -55,8 +55,6 @@ class BaseRegressionModel(IFreqaiModel):
f"{end_date}--------------------")
# split data into train/test data.
data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
if not self.freqai_info.get('fit_live_predictions', 0):
dk.fit_labels()
# normalize all data based on train_dataset only
data_dictionary = dk.normalize_data(data_dictionary)
@ -73,8 +71,11 @@ class BaseRegressionModel(IFreqaiModel):
if pair not in self.dd.historic_predictions:
self.set_initial_historic_predictions(
data_dictionary['train_features'], model, dk, pair)
elif self.freqai_info.get('fit_live_predictions_candles', 0):
dk.fit_live_predictions()
if self.freqai_info.get('fit_live_predictions_candles', 0) and self.live:
self.fit_live_predictions(dk)
else:
dk.fit_labels()
self.dd.save_historic_predictions_to_disk()

View File

@ -1,5 +1,5 @@
import logging
from typing import Tuple
from typing import Any
from pandas import DataFrame
@ -16,7 +16,7 @@ class BaseTensorFlowModel(IFreqaiModel):
User *must* inherit from this class and set fit() and predict().
"""
def return_values(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> DataFrame:
def return_values(self, dataframe: DataFrame) -> DataFrame:
"""
User uses this function to add any additional return values to the dataframe.
e.g.
@ -27,7 +27,7 @@ class BaseTensorFlowModel(IFreqaiModel):
def train(
self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen
) -> Tuple[DataFrame, DataFrame]:
) -> Any:
"""
Filter the training data and train a model to it. Train makes heavy use of the datakitchen
for storing, saving, loading, and analyzing the data.
@ -49,8 +49,7 @@ class BaseTensorFlowModel(IFreqaiModel):
# split data into train/test data.
data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
if not self.freqai_info.get('fit_live_predictions', 0):
dk.fit_labels()
# normalize all data based on train_dataset only
data_dictionary = dk.normalize_data(data_dictionary)
@ -67,8 +66,11 @@ class BaseTensorFlowModel(IFreqaiModel):
if pair not in self.dd.historic_predictions:
self.set_initial_historic_predictions(
data_dictionary['train_features'], model, dk, pair)
elif self.freqai_info.get('fit_live_predictions_candles', 0):
dk.fit_live_predictions()
if self.freqai_info.get('fit_live_predictions_candles', 0) and self.live:
self.fit_live_predictions(dk)
else:
dk.fit_labels()
self.dd.save_historic_predictions_to_disk()

View File

@ -1,6 +1,6 @@
import logging
from typing import Any, Dict
import gc
from catboost import CatBoostRegressor, Pool
from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel
@ -28,17 +28,25 @@ class CatboostPredictionModel(BaseRegressionModel):
label=data_dictionary["train_labels"],
weight=data_dictionary["train_weights"],
)
test_data = Pool(
data=data_dictionary["test_features"],
label=data_dictionary["test_labels"],
weight=data_dictionary["test_weights"],
)
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:
test_data = None
else:
test_data = Pool(
data=data_dictionary["test_features"],
label=data_dictionary["test_labels"],
weight=data_dictionary["test_weights"],
)
model = CatBoostRegressor(
allow_writing_files=False,
**self.model_training_parameters,
)
model.fit(X=train_data, eval_set=test_data)
# some evidence that catboost pools have memory leaks:
# https://github.com/catboost/catboost/issues/1835
del train_data, test_data
gc.collect()
return model

View File

@ -36,7 +36,9 @@ class CatboostPredictionMultiModel(BaseRegressionModel):
model = MultiOutputRegressor(estimator=cbr)
model.fit(X=X, y=y, sample_weight=sample_weight) # , eval_set=eval_set)
train_score = model.score(X, y)
test_score = model.score(*eval_set)
logger.info(f"Train score {train_score}, Test score {test_score}")
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
train_score = model.score(X, y)
test_score = model.score(*eval_set)
logger.info(f"Train score {train_score}, Test score {test_score}")
return model

View File

@ -25,11 +25,15 @@ class LightGBMPredictionModel(BaseRegressionModel):
all the training and test data/labels.
"""
eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"])
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:
eval_set = None
else:
eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"])
X = data_dictionary["train_features"]
y = data_dictionary["train_labels"]
model = LGBMRegressor(**self.model_training_parameters)
model.fit(X=X, y=y, eval_set=eval_set)
return model

View File

@ -1,11 +1,11 @@
from copy import deepcopy
from pathlib import Path
from unittest.mock import MagicMock
import pytest
from freqtrade.configuration import TimeRange
from freqtrade.data.dataprovider import DataProvider
from freqtrade.freqai.data_drawer import FreqaiDataDrawer
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.resolvers import StrategyResolver
from freqtrade.resolvers.freqaimodel_resolver import FreqaiModelResolver
@ -57,11 +57,17 @@ def freqai_conf(default_conf, tmpdir):
def get_patched_data_kitchen(mocker, freqaiconf):
dd = mocker.patch('freqtrade.freqai.data_drawer', MagicMock())
dk = FreqaiDataKitchen(freqaiconf, dd)
# dd = mocker.patch('freqtrade.freqai.data_drawer', MagicMock())
dk = FreqaiDataKitchen(freqaiconf)
return dk
def get_patched_data_drawer(mocker, freqaiconf):
# dd = mocker.patch('freqtrade.freqai.data_drawer', MagicMock())
dd = FreqaiDataDrawer(freqaiconf)
return dd
def get_patched_freqai_strategy(mocker, freqaiconf):
strategy = StrategyResolver.load_strategy(freqaiconf)
strategy.ft_bot_start()

View File

@ -0,0 +1,94 @@
import shutil
from pathlib import Path
from freqtrade.configuration import TimeRange
from freqtrade.data.dataprovider import DataProvider
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from tests.conftest import get_patched_exchange
from tests.freqai.conftest import get_patched_freqai_strategy
def test_update_historic_data(mocker, freqai_conf):
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
exchange = get_patched_exchange(mocker, freqai_conf)
strategy.dp = DataProvider(freqai_conf, exchange)
freqai = strategy.freqai
freqai.live = True
freqai.dk = FreqaiDataKitchen(freqai_conf)
timerange = TimeRange.parse_timerange("20180110-20180114")
freqai.dd.load_all_pair_histories(timerange, freqai.dk)
historic_candles = len(freqai.dd.historic_data["ADA/BTC"]["5m"])
dp_candles = len(strategy.dp.get_pair_dataframe("ADA/BTC", "5m"))
candle_difference = dp_candles - historic_candles
freqai.dd.update_historic_data(strategy, freqai.dk)
updated_historic_candles = len(freqai.dd.historic_data["ADA/BTC"]["5m"])
assert updated_historic_candles - historic_candles == candle_difference
shutil.rmtree(Path(freqai.dk.full_path))
def test_load_all_pairs_histories(mocker, freqai_conf):
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
exchange = get_patched_exchange(mocker, freqai_conf)
strategy.dp = DataProvider(freqai_conf, exchange)
freqai = strategy.freqai
freqai.live = True
freqai.dk = FreqaiDataKitchen(freqai_conf)
timerange = TimeRange.parse_timerange("20180110-20180114")
freqai.dd.load_all_pair_histories(timerange, freqai.dk)
assert len(freqai.dd.historic_data.keys()) == len(
freqai_conf.get("exchange", {}).get("pair_whitelist")
)
assert len(freqai.dd.historic_data["ADA/BTC"]) == len(
freqai_conf.get("freqai", {}).get("feature_parameters", {}).get("include_timeframes")
)
shutil.rmtree(Path(freqai.dk.full_path))
def test_get_base_and_corr_dataframes(mocker, freqai_conf):
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
exchange = get_patched_exchange(mocker, freqai_conf)
strategy.dp = DataProvider(freqai_conf, exchange)
freqai = strategy.freqai
freqai.live = True
freqai.dk = FreqaiDataKitchen(freqai_conf)
timerange = TimeRange.parse_timerange("20180110-20180114")
freqai.dd.load_all_pair_histories(timerange, freqai.dk)
sub_timerange = TimeRange.parse_timerange("20180111-20180114")
corr_df, base_df = freqai.dd.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC", freqai.dk)
num_tfs = len(
freqai_conf.get("freqai", {}).get("feature_parameters", {}).get("include_timeframes")
)
assert len(base_df.keys()) == num_tfs
assert len(corr_df.keys()) == len(
freqai_conf.get("freqai", {}).get("feature_parameters", {}).get("include_corr_pairlist")
)
assert len(corr_df["ADA/BTC"].keys()) == num_tfs
shutil.rmtree(Path(freqai.dk.full_path))
def test_use_strategy_to_populate_indicators(mocker, freqai_conf):
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
exchange = get_patched_exchange(mocker, freqai_conf)
strategy.dp = DataProvider(freqai_conf, exchange)
strategy.freqai_info = freqai_conf.get("freqai", {})
freqai = strategy.freqai
freqai.live = True
freqai.dk = FreqaiDataKitchen(freqai_conf)
timerange = TimeRange.parse_timerange("20180110-20180114")
freqai.dd.load_all_pair_histories(timerange, freqai.dk)
sub_timerange = TimeRange.parse_timerange("20180111-20180114")
corr_df, base_df = freqai.dd.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC", freqai.dk)
df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, 'LTC/BTC')
assert len(df.columns) == 45
shutil.rmtree(Path(freqai.dk.full_path))

View File

@ -4,13 +4,8 @@ from pathlib import Path
import pytest
from freqtrade.configuration import TimeRange
from freqtrade.data.dataprovider import DataProvider
# from freqtrade.freqai.data_drawer import FreqaiDataDrawer
from freqtrade.exceptions import OperationalException
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from tests.conftest import get_patched_exchange
from tests.freqai.conftest import get_patched_data_kitchen, get_patched_freqai_strategy
from tests.freqai.conftest import get_patched_data_kitchen
@pytest.mark.parametrize(
@ -60,27 +55,6 @@ def test_split_timerange(
shutil.rmtree(Path(dk.full_path))
def test_update_historic_data(mocker, freqai_conf):
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
exchange = get_patched_exchange(mocker, freqai_conf)
strategy.dp = DataProvider(freqai_conf, exchange)
freqai = strategy.freqai
freqai.live = True
freqai.dk = FreqaiDataKitchen(freqai_conf, freqai.dd)
timerange = TimeRange.parse_timerange("20180110-20180114")
freqai.dk.load_all_pair_histories(timerange)
historic_candles = len(freqai.dd.historic_data["ADA/BTC"]["5m"])
dp_candles = len(strategy.dp.get_pair_dataframe("ADA/BTC", "5m"))
candle_difference = dp_candles - historic_candles
freqai.dk.update_historic_data(strategy)
updated_historic_candles = len(freqai.dd.historic_data["ADA/BTC"]["5m"])
assert updated_historic_candles - historic_candles == candle_difference
shutil.rmtree(Path(freqai.dk.full_path))
@pytest.mark.parametrize(
"timestamp, expected",
[
@ -92,67 +66,3 @@ def test_check_if_model_expired(mocker, freqai_conf, timestamp, expected):
dk = get_patched_data_kitchen(mocker, freqai_conf)
assert dk.check_if_model_expired(timestamp) == expected
shutil.rmtree(Path(dk.full_path))
def test_load_all_pairs_histories(mocker, freqai_conf):
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
exchange = get_patched_exchange(mocker, freqai_conf)
strategy.dp = DataProvider(freqai_conf, exchange)
freqai = strategy.freqai
freqai.live = True
freqai.dk = FreqaiDataKitchen(freqai_conf, freqai.dd)
timerange = TimeRange.parse_timerange("20180110-20180114")
freqai.dk.load_all_pair_histories(timerange)
assert len(freqai.dd.historic_data.keys()) == len(
freqai_conf.get("exchange", {}).get("pair_whitelist")
)
assert len(freqai.dd.historic_data["ADA/BTC"]) == len(
freqai_conf.get("freqai", {}).get("feature_parameters", {}).get("include_timeframes")
)
shutil.rmtree(Path(freqai.dk.full_path))
def test_get_base_and_corr_dataframes(mocker, freqai_conf):
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
exchange = get_patched_exchange(mocker, freqai_conf)
strategy.dp = DataProvider(freqai_conf, exchange)
freqai = strategy.freqai
freqai.live = True
freqai.dk = FreqaiDataKitchen(freqai_conf, freqai.dd)
timerange = TimeRange.parse_timerange("20180110-20180114")
freqai.dk.load_all_pair_histories(timerange)
sub_timerange = TimeRange.parse_timerange("20180111-20180114")
corr_df, base_df = freqai.dk.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC")
num_tfs = len(
freqai_conf.get("freqai", {}).get("feature_parameters", {}).get("include_timeframes")
)
assert len(base_df.keys()) == num_tfs
assert len(corr_df.keys()) == len(
freqai_conf.get("freqai", {}).get("feature_parameters", {}).get("include_corr_pairlist")
)
assert len(corr_df["ADA/BTC"].keys()) == num_tfs
shutil.rmtree(Path(freqai.dk.full_path))
def test_use_strategy_to_populate_indicators(mocker, freqai_conf):
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
exchange = get_patched_exchange(mocker, freqai_conf)
strategy.dp = DataProvider(freqai_conf, exchange)
strategy.freqai_info = freqai_conf.get("freqai", {})
freqai = strategy.freqai
freqai.live = True
freqai.dk = FreqaiDataKitchen(freqai_conf, freqai.dd)
timerange = TimeRange.parse_timerange("20180110-20180114")
freqai.dk.load_all_pair_histories(timerange)
sub_timerange = TimeRange.parse_timerange("20180111-20180114")
corr_df, base_df = freqai.dk.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC")
df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, 'LTC/BTC')
assert len(df.columns) == 45
shutil.rmtree(Path(freqai.dk.full_path))

View File

@ -1,5 +1,3 @@
# from unittest.mock import MagicMock
# from freqtrade.commands.optimize_commands import setup_optimize_configuration, start_edge
import platform
import shutil
from pathlib import Path
@ -23,9 +21,9 @@ def test_train_model_in_series_LightGBM(mocker, freqai_conf):
strategy.freqai_info = freqai_conf.get("freqai", {})
freqai = strategy.freqai
freqai.live = True
freqai.dk = FreqaiDataKitchen(freqai_conf, freqai.dd)
freqai.dk = FreqaiDataKitchen(freqai_conf)
timerange = TimeRange.parse_timerange("20180110-20180130")
freqai.dk.load_all_pair_histories(timerange)
freqai.dd.load_all_pair_histories(timerange, freqai.dk)
freqai.dd.pair_dict = MagicMock()
@ -42,6 +40,36 @@ def test_train_model_in_series_LightGBM(mocker, freqai_conf):
shutil.rmtree(Path(freqai.dk.full_path))
def test_train_model_in_series_LightGBMMultiModel(mocker, freqai_conf):
freqai_conf.update({"timerange": "20180110-20180130"})
freqai_conf.update({"strategy": "freqai_test_multimodel_strat"})
freqai_conf.update({"freqaimodel": "LightGBMPredictionMultiModel"})
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
exchange = get_patched_exchange(mocker, freqai_conf)
strategy.dp = DataProvider(freqai_conf, exchange)
strategy.freqai_info = freqai_conf.get("freqai", {})
freqai = strategy.freqai
freqai.live = True
freqai.dk = FreqaiDataKitchen(freqai_conf)
timerange = TimeRange.parse_timerange("20180110-20180130")
freqai.dd.load_all_pair_histories(timerange, freqai.dk)
freqai.dd.pair_dict = MagicMock()
data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
new_timerange = TimeRange.parse_timerange("20180120-20180130")
freqai.train_model_in_series(new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)
assert len(freqai.dk.label_list) == 2
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").is_file()
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").is_file()
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_trained_df.pkl").is_file()
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_svm_model.joblib").is_file()
shutil.rmtree(Path(freqai.dk.full_path))
@pytest.mark.skipif("arm" in platform.uname()[-1], reason="no ARM for Catboost ...")
def test_train_model_in_series_Catboost(mocker, freqai_conf):
freqai_conf.update({"timerange": "20180110-20180130"})
@ -54,9 +82,9 @@ def test_train_model_in_series_Catboost(mocker, freqai_conf):
strategy.freqai_info = freqai_conf.get("freqai", {})
freqai = strategy.freqai
freqai.live = True
freqai.dk = FreqaiDataKitchen(freqai_conf, freqai.dd)
freqai.dk = FreqaiDataKitchen(freqai_conf)
timerange = TimeRange.parse_timerange("20180110-20180130")
freqai.dk.load_all_pair_histories(timerange)
freqai.dd.load_all_pair_histories(timerange, freqai.dk)
freqai.dd.pair_dict = MagicMock()
@ -82,15 +110,15 @@ def test_start_backtesting(mocker, freqai_conf):
strategy.freqai_info = freqai_conf.get("freqai", {})
freqai = strategy.freqai
freqai.live = False
freqai.dk = FreqaiDataKitchen(freqai_conf, freqai.dd)
freqai.dk = FreqaiDataKitchen(freqai_conf)
timerange = TimeRange.parse_timerange("20180110-20180130")
freqai.dk.load_all_pair_histories(timerange)
freqai.dd.load_all_pair_histories(timerange, freqai.dk)
sub_timerange = TimeRange.parse_timerange("20180110-20180130")
corr_df, base_df = freqai.dk.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC")
corr_df, base_df = freqai.dd.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC", freqai.dk)
df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC")
metadata = {"pair": "ADA/BTC"}
metadata = {"pair": "LTC/BTC"}
freqai.start_backtesting(df, metadata, freqai.dk)
model_folders = [x for x in freqai.dd.full_path.iterdir() if x.is_dir()]
@ -99,6 +127,31 @@ def test_start_backtesting(mocker, freqai_conf):
shutil.rmtree(Path(freqai.dk.full_path))
def test_start_backtesting_subdaily_backtest_period(mocker, freqai_conf):
freqai_conf.update({"timerange": "20180120-20180124"})
freqai_conf.get("freqai", {}).update({"backtest_period_days": 0.5})
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
exchange = get_patched_exchange(mocker, freqai_conf)
strategy.dp = DataProvider(freqai_conf, exchange)
strategy.freqai_info = freqai_conf.get("freqai", {})
freqai = strategy.freqai
freqai.live = False
freqai.dk = FreqaiDataKitchen(freqai_conf)
timerange = TimeRange.parse_timerange("20180110-20180130")
freqai.dd.load_all_pair_histories(timerange, freqai.dk)
sub_timerange = TimeRange.parse_timerange("20180110-20180130")
corr_df, base_df = freqai.dd.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC", freqai.dk)
df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC")
metadata = {"pair": "LTC/BTC"}
freqai.start_backtesting(df, metadata, freqai.dk)
model_folders = [x for x in freqai.dd.full_path.iterdir() if x.is_dir()]
assert len(model_folders) == 8
shutil.rmtree(Path(freqai.dk.full_path))
def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog):
freqai_conf.update({"timerange": "20180120-20180130"})
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
@ -107,11 +160,11 @@ def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog):
strategy.freqai_info = freqai_conf.get("freqai", {})
freqai = strategy.freqai
freqai.live = False
freqai.dk = FreqaiDataKitchen(freqai_conf, freqai.dd)
freqai.dk = FreqaiDataKitchen(freqai_conf)
timerange = TimeRange.parse_timerange("20180110-20180130")
freqai.dk.load_all_pair_histories(timerange)
freqai.dd.load_all_pair_histories(timerange, freqai.dk)
sub_timerange = TimeRange.parse_timerange("20180110-20180130")
corr_df, base_df = freqai.dk.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC")
corr_df, base_df = freqai.dd.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC", freqai.dk)
df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC")
@ -130,11 +183,11 @@ def test_start_backtesting_from_existing_folder(mocker, freqai_conf, caplog):
strategy.freqai_info = freqai_conf.get("freqai", {})
freqai = strategy.freqai
freqai.live = False
freqai.dk = FreqaiDataKitchen(freqai_conf, freqai.dd)
freqai.dk = FreqaiDataKitchen(freqai_conf)
timerange = TimeRange.parse_timerange("20180110-20180130")
freqai.dk.load_all_pair_histories(timerange)
freqai.dd.load_all_pair_histories(timerange, freqai.dk)
sub_timerange = TimeRange.parse_timerange("20180110-20180130")
corr_df, base_df = freqai.dk.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC")
corr_df, base_df = freqai.dd.get_base_and_corr_dataframes(sub_timerange, "LTC/BTC", freqai.dk)
df = freqai.dk.use_strategy_to_populate_indicators(strategy, corr_df, base_df, "LTC/BTC")
freqai.start_backtesting(df, metadata, freqai.dk)
@ -156,13 +209,12 @@ def test_follow_mode(mocker, freqai_conf):
strategy.freqai_info = freqai_conf.get("freqai", {})
freqai = strategy.freqai
freqai.live = True
freqai.dk = FreqaiDataKitchen(freqai_conf, freqai.dd)
freqai.dk = FreqaiDataKitchen(freqai_conf)
timerange = TimeRange.parse_timerange("20180110-20180130")
freqai.dk.load_all_pair_histories(timerange)
freqai.dd.load_all_pair_histories(timerange, freqai.dk)
metadata = {"pair": "ADA/BTC"}
freqai.dd.set_pair_dict_info(metadata)
# freqai.dd.pair_dict = MagicMock()
data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
new_timerange = TimeRange.parse_timerange("20180120-20180130")
@ -184,9 +236,9 @@ def test_follow_mode(mocker, freqai_conf):
strategy.freqai_info = freqai_conf.get("freqai", {})
freqai = strategy.freqai
freqai.live = True
freqai.dk = FreqaiDataKitchen(freqai_conf, freqai.dd, freqai.live)
freqai.dk = FreqaiDataKitchen(freqai_conf, freqai.live)
timerange = TimeRange.parse_timerange("20180110-20180130")
freqai.dk.load_all_pair_histories(timerange)
freqai.dd.load_all_pair_histories(timerange, freqai.dk)
df = strategy.dp.get_pair_dataframe('ADA/BTC', '5m')
freqai.start_live(df, metadata, strategy, freqai.dk)
@ -207,9 +259,9 @@ def test_principal_component_analysis(mocker, freqai_conf):
strategy.freqai_info = freqai_conf.get("freqai", {})
freqai = strategy.freqai
freqai.live = True
freqai.dk = FreqaiDataKitchen(freqai_conf, freqai.dd)
freqai.dk = FreqaiDataKitchen(freqai_conf)
timerange = TimeRange.parse_timerange("20180110-20180130")
freqai.dk.load_all_pair_histories(timerange)
freqai.dd.load_all_pair_histories(timerange, freqai.dk)
freqai.dd.pair_dict = MagicMock()

View File

@ -1404,6 +1404,7 @@ def test_api_strategies(botclient):
'StrategyTestV3',
'StrategyTestV3Analysis',
'StrategyTestV3Futures',
'freqai_test_multimodel_strat',
'freqai_test_strat'
]}

View File

@ -0,0 +1,188 @@
import logging
from functools import reduce
import pandas as pd
import talib.abstract as ta
from pandas import DataFrame
from freqtrade.strategy import DecimalParameter, IntParameter, IStrategy, merge_informative_pair
logger = logging.getLogger(__name__)
class freqai_test_multimodel_strat(IStrategy):
"""
Example strategy showing how the user connects their own
IFreqaiModel to the strategy. Namely, the user uses:
self.freqai.start(dataframe, metadata)
to make predictions on their data. populate_any_indicators() automatically
generates the variety of features indicated by the user in the
canonical freqtrade configuration file under config['freqai'].
"""
minimal_roi = {"0": 0.1, "240": -1}
plot_config = {
"main_plot": {},
"subplots": {
"prediction": {"prediction": {"color": "blue"}},
"target_roi": {
"target_roi": {"color": "brown"},
},
"do_predict": {
"do_predict": {"color": "brown"},
},
},
}
process_only_new_candles = True
stoploss = -0.05
use_exit_signal = True
startup_candle_count: int = 300
can_short = False
linear_roi_offset = DecimalParameter(
0.00, 0.02, default=0.005, space="sell", optimize=False, load=True
)
max_roi_time_long = IntParameter(0, 800, default=400, space="sell", optimize=False, load=True)
def informative_pairs(self):
whitelist_pairs = self.dp.current_whitelist()
corr_pairs = self.config["freqai"]["feature_parameters"]["include_corr_pairlist"]
informative_pairs = []
for tf in self.config["freqai"]["feature_parameters"]["include_timeframes"]:
for pair in whitelist_pairs:
informative_pairs.append((pair, tf))
for pair in corr_pairs:
if pair in whitelist_pairs:
continue # avoid duplication
informative_pairs.append((pair, tf))
return informative_pairs
def populate_any_indicators(
self, metadata, pair, df, tf, informative=None, coin="", set_generalized_indicators=False
):
"""
Function designed to automatically generate, name and merge features
from user indicated timeframes in the configuration file. User controls the indicators
passed to the training/prediction by prepending indicators with `'%-' + coin `
(see convention below). I.e. user should not prepend any supporting metrics
(e.g. bb_lowerband below) with % unless they explicitly want to pass that metric to the
model.
:params:
:pair: pair to be used as informative
:df: strategy dataframe which will receive merges from informatives
:tf: timeframe of the dataframe which will modify the feature names
:informative: the dataframe associated with the informative pair
:coin: the name of the coin which will modify the feature names.
"""
with self.freqai.lock:
if informative is None:
informative = self.dp.get_pair_dataframe(pair, tf)
# first loop is automatically duplicating indicators for time periods
for t in self.freqai_info["feature_parameters"]["indicator_periods_candles"]:
t = int(t)
informative[f"%-{coin}rsi-period_{t}"] = ta.RSI(informative, timeperiod=t)
informative[f"%-{coin}mfi-period_{t}"] = ta.MFI(informative, timeperiod=t)
informative[f"%-{coin}adx-period_{t}"] = ta.ADX(informative, window=t)
informative[f"%-{coin}pct-change"] = informative["close"].pct_change()
informative[f"%-{coin}raw_volume"] = informative["volume"]
informative[f"%-{coin}raw_price"] = informative["close"]
indicators = [col for col in informative if col.startswith("%")]
# This loop duplicates and shifts all indicators to add a sense of recency to data
for n in range(self.freqai_info["feature_parameters"]["include_shifted_candles"] + 1):
if n == 0:
continue
informative_shift = informative[indicators].shift(n)
informative_shift = informative_shift.add_suffix("_shift-" + str(n))
informative = pd.concat((informative, informative_shift), axis=1)
df = merge_informative_pair(df, informative, self.config["timeframe"], tf, ffill=True)
skip_columns = [
(s + "_" + tf) for s in ["date", "open", "high", "low", "close", "volume"]
]
df = df.drop(columns=skip_columns)
# Add generalized indicators here (because in live, it will call this
# function to populate indicators during training). Notice how we ensure not to
# add them multiple times
if set_generalized_indicators:
df["%-day_of_week"] = (df["date"].dt.dayofweek + 1) / 7
df["%-hour_of_day"] = (df["date"].dt.hour + 1) / 25
# user adds targets here by prepending them with &- (see convention below)
# If user wishes to use multiple targets, a multioutput prediction model
# needs to be used such as templates/CatboostPredictionMultiModel.py
df["&-s_close"] = (
df["close"]
.shift(-self.freqai_info["feature_parameters"]["label_period_candles"])
.rolling(self.freqai_info["feature_parameters"]["label_period_candles"])
.mean()
/ df["close"]
- 1
)
df["&-s_range"] = (
df["close"]
.shift(-self.freqai_info["feature_parameters"]["label_period_candles"])
.rolling(self.freqai_info["feature_parameters"]["label_period_candles"])
.max()
-
df["close"]
.shift(-self.freqai_info["feature_parameters"]["label_period_candles"])
.rolling(self.freqai_info["feature_parameters"]["label_period_candles"])
.min()
)
return df
def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
self.freqai_info = self.config["freqai"]
# All indicators must be populated by populate_any_indicators() for live functionality
# to work correctly.
# the model will return 4 values, its prediction, an indication of whether or not the
# prediction should be accepted, the target mean/std values from the labels used during
# each training period.
dataframe = self.freqai.start(dataframe, metadata, self)
dataframe["target_roi"] = dataframe["&-s_close_mean"] + dataframe["&-s_close_std"] * 1.25
dataframe["sell_roi"] = dataframe["&-s_close_mean"] - dataframe["&-s_close_std"] * 1.25
return dataframe
def populate_entry_trend(self, df: DataFrame, metadata: dict) -> DataFrame:
enter_long_conditions = [df["do_predict"] == 1, df["&-s_close"] > df["target_roi"]]
if enter_long_conditions:
df.loc[
reduce(lambda x, y: x & y, enter_long_conditions), ["enter_long", "enter_tag"]
] = (1, "long")
enter_short_conditions = [df["do_predict"] == 1, df["&-s_close"] < df["sell_roi"]]
if enter_short_conditions:
df.loc[
reduce(lambda x, y: x & y, enter_short_conditions), ["enter_short", "enter_tag"]
] = (1, "short")
return df
def populate_exit_trend(self, df: DataFrame, metadata: dict) -> DataFrame:
exit_long_conditions = [df["do_predict"] == 1, df["&-s_close"] < df["sell_roi"] * 0.25]
if exit_long_conditions:
df.loc[reduce(lambda x, y: x & y, exit_long_conditions), "exit_long"] = 1
exit_short_conditions = [df["do_predict"] == 1, df["&-s_close"] > df["target_roi"] * 0.25]
if exit_short_conditions:
df.loc[reduce(lambda x, y: x & y, exit_short_conditions), "exit_short"] = 1
return df

View File

@ -34,7 +34,7 @@ def test_search_all_strategies_no_failed():
directory = Path(__file__).parent / "strats"
strategies = StrategyResolver.search_all_objects(directory, enum_failed=False)
assert isinstance(strategies, list)
assert len(strategies) == 8
assert len(strategies) == 9
assert isinstance(strategies[0], dict)
@ -42,10 +42,10 @@ def test_search_all_strategies_with_failed():
directory = Path(__file__).parent / "strats"
strategies = StrategyResolver.search_all_objects(directory, enum_failed=True)
assert isinstance(strategies, list)
assert len(strategies) == 9
assert len(strategies) == 10
# with enum_failed=True search_all_objects() shall find 2 good strategies
# and 1 which fails to load
assert len([x for x in strategies if x['class'] is not None]) == 8
assert len([x for x in strategies if x['class'] is not None]) == 9
assert len([x for x in strategies if x['class'] is None]) == 1