add config asserts, use .get method with default values for optional functionality, move data_cleaning_* to freqai_interface (away from user custom pred model) since it is controlled by config params.
This commit is contained in:
parent
dede128648
commit
e1c068ca66
@ -43,6 +43,7 @@ class FreqaiDataKitchen:
|
|||||||
self.data: Dict[Any, Any] = {}
|
self.data: Dict[Any, Any] = {}
|
||||||
self.data_dictionary: Dict[Any, Any] = {}
|
self.data_dictionary: Dict[Any, Any] = {}
|
||||||
self.config = config
|
self.config = config
|
||||||
|
self.assert_config(self.config, live)
|
||||||
self.freqai_config = config["freqai"]
|
self.freqai_config = config["freqai"]
|
||||||
self.predictions: npt.ArrayLike = np.array([])
|
self.predictions: npt.ArrayLike = np.array([])
|
||||||
self.do_predict: npt.ArrayLike = np.array([])
|
self.do_predict: npt.ArrayLike = np.array([])
|
||||||
@ -59,7 +60,7 @@ class FreqaiDataKitchen:
|
|||||||
self.svm_model: linear_model.SGDOneClassSVM = None
|
self.svm_model: linear_model.SGDOneClassSVM = None
|
||||||
if not self.live:
|
if not self.live:
|
||||||
self.full_timerange = self.create_fulltimerange(self.config["timerange"],
|
self.full_timerange = self.create_fulltimerange(self.config["timerange"],
|
||||||
self.freqai_config["train_period"]
|
self.freqai_config.get("train_period")
|
||||||
)
|
)
|
||||||
|
|
||||||
(self.training_timeranges, self.backtesting_timeranges) = self.split_timerange(
|
(self.training_timeranges, self.backtesting_timeranges) = self.split_timerange(
|
||||||
@ -68,14 +69,33 @@ class FreqaiDataKitchen:
|
|||||||
config["freqai"]["backtest_period"],
|
config["freqai"]["backtest_period"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def assert_config(self, config: Dict[str, Any], live: bool) -> None:
|
||||||
|
assert config.get('freqai'), "No Freqai parameters found in config file."
|
||||||
|
assert config.get('freqai', {}).get('train_period'), ("No Freqai train_period found in"
|
||||||
|
"config file.")
|
||||||
|
assert type(config.get('freqai', {})
|
||||||
|
.get('train_period')) is int, ('Can only train on full day period.'
|
||||||
|
'No fractional days permitted.')
|
||||||
|
assert config.get('freqai', {}).get('backtest_period'), ("No Freqai backtest_period found"
|
||||||
|
"in config file.")
|
||||||
|
if not live:
|
||||||
|
assert type(config.get('freqai', {})
|
||||||
|
.get('backtest_period')) is int, ('Can only backtest on full day'
|
||||||
|
'backtest_period. Only live/dry mode'
|
||||||
|
'allows fractions of days')
|
||||||
|
assert config.get('freqai', {}).get('identifier'), ("No Freqai identifier found in config"
|
||||||
|
"file.")
|
||||||
|
assert config.get('freqai', {}).get('feature_parameters'), ("No Freqai feature_parameters"
|
||||||
|
"found in config file.")
|
||||||
|
|
||||||
def set_paths(self) -> None:
|
def set_paths(self) -> None:
|
||||||
self.full_path = Path(self.config['user_data_dir'] /
|
self.full_path = Path(self.config['user_data_dir'] /
|
||||||
"models" /
|
"models" /
|
||||||
str(self.freqai_config['live_full_backtestrange'] +
|
str(self.freqai_config.get('live_full_backtestrange') +
|
||||||
self.freqai_config['identifier']))
|
self.freqai_config.get('identifier')))
|
||||||
|
|
||||||
self.model_path = Path(self.full_path / str("sub-train" + "-" +
|
self.model_path = Path(self.full_path / str("sub-train" + "-" +
|
||||||
str(self.freqai_config['live_trained_timerange'])))
|
str(self.freqai_config.get('live_trained_timerange'))))
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
@ -117,7 +137,7 @@ class FreqaiDataKitchen:
|
|||||||
# do not want them having to edit the default save/load methods here. Below is an example
|
# do not want them having to edit the default save/load methods here. Below is an example
|
||||||
# of what we do NOT want.
|
# of what we do NOT want.
|
||||||
|
|
||||||
# if self.freqai_config['feature_parameters']['determine_statistical_distributions']:
|
# if self.freqai_config.get('feature_parameters','determine_statistical_distributions'):
|
||||||
# self.data_dictionary["upper_quantiles"].to_pickle(
|
# self.data_dictionary["upper_quantiles"].to_pickle(
|
||||||
# save_path / str(self.model_filename + "_upper_quantiles.pkl")
|
# save_path / str(self.model_filename + "_upper_quantiles.pkl")
|
||||||
# )
|
# )
|
||||||
@ -147,7 +167,7 @@ class FreqaiDataKitchen:
|
|||||||
# do not want them having to edit the default save/load methods here. Below is an example
|
# do not want them having to edit the default save/load methods here. Below is an example
|
||||||
# of what we do NOT want.
|
# of what we do NOT want.
|
||||||
|
|
||||||
# if self.freqai_config['feature_parameters']['determine_statistical_distributions']:
|
# if self.freqai_config.get('feature_parameters','determine_statistical_distributions'):
|
||||||
# self.data_dictionary["upper_quantiles"] = pd.read_pickle(
|
# self.data_dictionary["upper_quantiles"] = pd.read_pickle(
|
||||||
# self.model_path / str(self.model_filename + "_upper_quantiles.pkl")
|
# self.model_path / str(self.model_filename + "_upper_quantiles.pkl")
|
||||||
# )
|
# )
|
||||||
@ -193,15 +213,15 @@ class FreqaiDataKitchen:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
weights: npt.ArrayLike
|
weights: npt.ArrayLike
|
||||||
if self.config["freqai"]["feature_parameters"]["weight_factor"] > 0:
|
if self.freqai_config["feature_parameters"].get("weight_factor", 0) > 0:
|
||||||
weights = self.set_weights_higher_recent(len(filtered_dataframe))
|
weights = self.set_weights_higher_recent(len(filtered_dataframe))
|
||||||
else:
|
else:
|
||||||
weights = np.ones(len(filtered_dataframe))
|
weights = np.ones(len(filtered_dataframe))
|
||||||
|
|
||||||
if self.config["freqai"]["feature_parameters"]["stratify"] > 0:
|
if self.freqai_config["feature_parameters"].get("stratify", 0) > 0:
|
||||||
stratification = np.zeros(len(filtered_dataframe))
|
stratification = np.zeros(len(filtered_dataframe))
|
||||||
for i in range(1, len(stratification)):
|
for i in range(1, len(stratification)):
|
||||||
if i % self.config["freqai"]["feature_parameters"]["stratify"] == 0:
|
if i % self.freqai_config.get("feature_parameters", {}).get("stratify", 0) == 0:
|
||||||
stratification[i] = 1
|
stratification[i] = 1
|
||||||
|
|
||||||
(
|
(
|
||||||
@ -525,6 +545,14 @@ class FreqaiDataKitchen:
|
|||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def pca_transform(self, filtered_dataframe: DataFrame) -> None:
|
||||||
|
pca_components = self.pca.transform(filtered_dataframe)
|
||||||
|
self.data_dictionary["prediction_features"] = pd.DataFrame(
|
||||||
|
data=pca_components,
|
||||||
|
columns=["PC" + str(i) for i in range(0, self.data["n_kept_components"])],
|
||||||
|
index=filtered_dataframe.index,
|
||||||
|
)
|
||||||
|
|
||||||
def compute_distances(self) -> float:
|
def compute_distances(self) -> float:
|
||||||
logger.info("computing average mean distance for all training points")
|
logger.info("computing average mean distance for all training points")
|
||||||
pairwise = pairwise_distances(self.data_dictionary["train_features"], n_jobs=-1)
|
pairwise = pairwise_distances(self.data_dictionary["train_features"], n_jobs=-1)
|
||||||
@ -675,7 +703,7 @@ class FreqaiDataKitchen:
|
|||||||
self.full_path = Path(
|
self.full_path = Path(
|
||||||
self.config["user_data_dir"]
|
self.config["user_data_dir"]
|
||||||
/ "models"
|
/ "models"
|
||||||
/ str(full_timerange + self.freqai_config["identifier"])
|
/ str(full_timerange + self.freqai_config.get("identifier"))
|
||||||
)
|
)
|
||||||
|
|
||||||
config_path = Path(self.config["config_files"][0])
|
config_path = Path(self.config["config_files"][0])
|
||||||
@ -696,13 +724,15 @@ class FreqaiDataKitchen:
|
|||||||
|
|
||||||
if trained_timerange.startts != 0:
|
if trained_timerange.startts != 0:
|
||||||
elapsed_time = (time - trained_timerange.stopts) / SECONDS_IN_DAY
|
elapsed_time = (time - trained_timerange.stopts) / SECONDS_IN_DAY
|
||||||
retrain = elapsed_time > self.freqai_config['backtest_period']
|
retrain = elapsed_time > self.freqai_config.get('backtest_period')
|
||||||
if retrain:
|
if retrain:
|
||||||
trained_timerange.startts += self.freqai_config['backtest_period'] * SECONDS_IN_DAY
|
trained_timerange.startts += self.freqai_config.get(
|
||||||
trained_timerange.stopts += self.freqai_config['backtest_period'] * SECONDS_IN_DAY
|
'backtest_period', 0) * SECONDS_IN_DAY
|
||||||
|
trained_timerange.stopts += self.freqai_config.get(
|
||||||
|
'backtest_period', 0) * SECONDS_IN_DAY
|
||||||
else: # user passed no live_trained_timerange in config
|
else: # user passed no live_trained_timerange in config
|
||||||
trained_timerange = TimeRange()
|
trained_timerange = TimeRange()
|
||||||
trained_timerange.startts = int(time - self.freqai_config['train_period'] *
|
trained_timerange.startts = int(time - self.freqai_config.get('train_period') *
|
||||||
SECONDS_IN_DAY)
|
SECONDS_IN_DAY)
|
||||||
trained_timerange.stopts = int(time)
|
trained_timerange.stopts = int(time)
|
||||||
retrain = True
|
retrain = True
|
||||||
@ -725,13 +755,13 @@ class FreqaiDataKitchen:
|
|||||||
|
|
||||||
exchange = ExchangeResolver.load_exchange(self.config['exchange']['name'],
|
exchange = ExchangeResolver.load_exchange(self.config['exchange']['name'],
|
||||||
self.config, validate=False)
|
self.config, validate=False)
|
||||||
pairs = self.freqai_config['corr_pairlist']
|
pairs = self.freqai_config.get('corr_pairlist', [])
|
||||||
if metadata['pair'] not in pairs:
|
if metadata['pair'] not in pairs:
|
||||||
pairs += metadata['pair'] # dont include pair twice
|
pairs += metadata['pair'] # dont include pair twice
|
||||||
# timerange = TimeRange.parse_timerange(new_timerange)
|
# timerange = TimeRange.parse_timerange(new_timerange)
|
||||||
|
|
||||||
refresh_backtest_ohlcv_data(
|
refresh_backtest_ohlcv_data(
|
||||||
exchange, pairs=pairs, timeframes=self.freqai_config['timeframes'],
|
exchange, pairs=pairs, timeframes=self.freqai_config.get('timeframes'),
|
||||||
datadir=self.config['datadir'], timerange=timerange,
|
datadir=self.config['datadir'], timerange=timerange,
|
||||||
new_pairs_days=self.config['new_pairs_days'],
|
new_pairs_days=self.config['new_pairs_days'],
|
||||||
erase=False, data_format=self.config['dataformat_ohlcv'],
|
erase=False, data_format=self.config['dataformat_ohlcv'],
|
||||||
@ -743,21 +773,22 @@ class FreqaiDataKitchen:
|
|||||||
DataFrame]:
|
DataFrame]:
|
||||||
corr_dataframes: Dict[Any, Any] = {}
|
corr_dataframes: Dict[Any, Any] = {}
|
||||||
base_dataframes: Dict[Any, Any] = {}
|
base_dataframes: Dict[Any, Any] = {}
|
||||||
pairs = self.freqai_config['corr_pairlist'] # + [metadata['pair']]
|
pairs = self.freqai_config.get('corr_pairlist', []) # + [metadata['pair']]
|
||||||
# timerange = TimeRange.parse_timerange(new_timerange)
|
# timerange = TimeRange.parse_timerange(new_timerange)
|
||||||
|
|
||||||
for tf in self.freqai_config['timeframes']:
|
for tf in self.freqai_config.get('timeframes'):
|
||||||
base_dataframes[tf] = load_pair_history(datadir=self.config['datadir'],
|
base_dataframes[tf] = load_pair_history(datadir=self.config['datadir'],
|
||||||
timeframe=tf,
|
timeframe=tf,
|
||||||
pair=metadata['pair'], timerange=timerange)
|
pair=metadata['pair'], timerange=timerange)
|
||||||
for p in pairs:
|
if pairs:
|
||||||
if metadata['pair'] in p:
|
for p in pairs:
|
||||||
continue # dont repeat anything from whitelist
|
if metadata['pair'] in p:
|
||||||
if p not in corr_dataframes:
|
continue # dont repeat anything from whitelist
|
||||||
corr_dataframes[p] = {}
|
if p not in corr_dataframes:
|
||||||
corr_dataframes[p][tf] = load_pair_history(datadir=self.config['datadir'],
|
corr_dataframes[p] = {}
|
||||||
timeframe=tf,
|
corr_dataframes[p][tf] = load_pair_history(datadir=self.config['datadir'],
|
||||||
pair=p, timerange=timerange)
|
timeframe=tf,
|
||||||
|
pair=p, timerange=timerange)
|
||||||
|
|
||||||
return corr_dataframes, base_dataframes
|
return corr_dataframes, base_dataframes
|
||||||
|
|
||||||
@ -767,23 +798,25 @@ class FreqaiDataKitchen:
|
|||||||
metadata: dict) -> DataFrame:
|
metadata: dict) -> DataFrame:
|
||||||
|
|
||||||
dataframe = base_dataframes[self.config['timeframe']]
|
dataframe = base_dataframes[self.config['timeframe']]
|
||||||
|
pairs = self.freqai_config.get("corr_pairlist", [])
|
||||||
|
|
||||||
for tf in self.freqai_config["timeframes"]:
|
for tf in self.freqai_config.get("timeframes"):
|
||||||
dataframe = strategy.populate_any_indicators(metadata['pair'],
|
dataframe = strategy.populate_any_indicators(metadata['pair'],
|
||||||
dataframe.copy(),
|
dataframe.copy(),
|
||||||
tf,
|
tf,
|
||||||
base_dataframes[tf],
|
base_dataframes[tf],
|
||||||
coin=metadata['pair'].split("/")[0] + "-"
|
coin=metadata['pair'].split("/")[0] + "-"
|
||||||
)
|
)
|
||||||
for i in self.freqai_config["corr_pairlist"]:
|
if pairs:
|
||||||
if metadata['pair'] in i:
|
for i in pairs:
|
||||||
continue # dont repeat anything from whitelist
|
if metadata['pair'] in i:
|
||||||
dataframe = strategy.populate_any_indicators(i,
|
continue # dont repeat anything from whitelist
|
||||||
dataframe.copy(),
|
dataframe = strategy.populate_any_indicators(i,
|
||||||
tf,
|
dataframe.copy(),
|
||||||
corr_dataframes[i][tf],
|
tf,
|
||||||
coin=i.split("/")[0] + "-"
|
corr_dataframes[i][tf],
|
||||||
)
|
coin=i.split("/")[0] + "-"
|
||||||
|
)
|
||||||
|
|
||||||
return dataframe
|
return dataframe
|
||||||
|
|
||||||
|
@ -20,7 +20,7 @@ from freqtrade.strategy.interface import IStrategy
|
|||||||
pd.options.mode.chained_assignment = None
|
pd.options.mode.chained_assignment = None
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# FIXME: suppress stdout for background training
|
# FIXME: suppress stdout for background training?
|
||||||
# class DummyFile(object):
|
# class DummyFile(object):
|
||||||
# def write(self, x): pass
|
# def write(self, x): pass
|
||||||
|
|
||||||
@ -51,6 +51,7 @@ class IFreqaiModel(ABC):
|
|||||||
def __init__(self, config: Dict[str, Any]) -> None:
|
def __init__(self, config: Dict[str, Any]) -> None:
|
||||||
|
|
||||||
self.config = config
|
self.config = config
|
||||||
|
self.assert_config(self.config)
|
||||||
self.freqai_info = config["freqai"]
|
self.freqai_info = config["freqai"]
|
||||||
self.data_split_parameters = config["freqai"]["data_split_parameters"]
|
self.data_split_parameters = config["freqai"]["data_split_parameters"]
|
||||||
self.model_training_parameters = config["freqai"]["model_training_parameters"]
|
self.model_training_parameters = config["freqai"]["model_training_parameters"]
|
||||||
@ -64,12 +65,25 @@ class IFreqaiModel(ABC):
|
|||||||
self.training_on_separate_thread = False
|
self.training_on_separate_thread = False
|
||||||
self.retrain = False
|
self.retrain = False
|
||||||
self.first = True
|
self.first = True
|
||||||
if self.freqai_info['live_trained_timerange']:
|
if self.freqai_info.get('live_trained_timerange'):
|
||||||
self.new_trained_timerange = TimeRange.parse_timerange(
|
self.new_trained_timerange = TimeRange.parse_timerange(
|
||||||
self.freqai_info['live_trained_timerange'])
|
self.freqai_info['live_trained_timerange'])
|
||||||
else:
|
else:
|
||||||
self.new_trained_timerange = TimeRange()
|
self.new_trained_timerange = TimeRange()
|
||||||
|
|
||||||
|
def assert_config(self, config: Dict[str, Any]) -> None:
|
||||||
|
|
||||||
|
assert config.get('freqai'), "No Freqai parameters found in config file."
|
||||||
|
assert config.get('freqai', {}).get('data_split_parameters'), ("No Freqai"
|
||||||
|
"data_split_parameters"
|
||||||
|
"in config file.")
|
||||||
|
assert config.get('freqai', {}).get('model_training_parameters'), ("No Freqai"
|
||||||
|
"modeltrainingparameters"
|
||||||
|
"found in config file.")
|
||||||
|
assert config.get('freqai', {}).get('feature_parameters'), ("No Freqai"
|
||||||
|
"feature_parameters found in"
|
||||||
|
"config file.")
|
||||||
|
|
||||||
def start(self, dataframe: DataFrame, metadata: dict, strategy: IStrategy) -> DataFrame:
|
def start(self, dataframe: DataFrame, metadata: dict, strategy: IStrategy) -> DataFrame:
|
||||||
"""
|
"""
|
||||||
Entry point to the FreqaiModel, it will train a new model if
|
Entry point to the FreqaiModel, it will train a new model if
|
||||||
@ -192,55 +206,30 @@ class IFreqaiModel(ABC):
|
|||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def train(self, unfiltered_dataframe: DataFrame, metadata: dict) -> Any:
|
|
||||||
"""
|
|
||||||
Filter the training data and train a model to it. Train makes heavy use of the datahandler
|
|
||||||
for storing, saving, loading, and analyzing the data.
|
|
||||||
:params:
|
|
||||||
:unfiltered_dataframe: Full dataframe for the current training period
|
|
||||||
:metadata: pair metadata from strategy.
|
|
||||||
:returns:
|
|
||||||
:model: Trained model which can be used to inference (self.predict)
|
|
||||||
"""
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def fit(self) -> Any:
|
|
||||||
"""
|
|
||||||
Most regressors use the same function names and arguments e.g. user
|
|
||||||
can drop in LGBMRegressor in place of CatBoostRegressor and all data
|
|
||||||
management will be properly handled by Freqai.
|
|
||||||
:params:
|
|
||||||
:data_dictionary: the dictionary constructed by DataHandler to hold
|
|
||||||
all the training and test data/labels.
|
|
||||||
"""
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def predict(self, dataframe: DataFrame, metadata: dict) -> Tuple[npt.ArrayLike, npt.ArrayLike]:
|
|
||||||
"""
|
|
||||||
Filter the prediction features data and predict with it.
|
|
||||||
:param: unfiltered_dataframe: Full dataframe for the current backtest period.
|
|
||||||
:return:
|
|
||||||
:predictions: np.array of predictions
|
|
||||||
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
|
|
||||||
data (NaNs) or felt uncertain about data (PCA and DI index)
|
|
||||||
"""
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def data_cleaning_train(self) -> None:
|
def data_cleaning_train(self) -> None:
|
||||||
"""
|
"""
|
||||||
User can add data analysis and cleaning here.
|
Base data cleaning method for train
|
||||||
Any function inside this method should drop training data points from the filtered_dataframe
|
Any function inside this method should drop training data points from the filtered_dataframe
|
||||||
based on user decided logic. See FreqaiDataKitchen::remove_outliers() for an example
|
based on user decided logic. See FreqaiDataKitchen::remove_outliers() for an example
|
||||||
of how outlier data points are dropped from the dataframe used for training.
|
of how outlier data points are dropped from the dataframe used for training.
|
||||||
"""
|
"""
|
||||||
|
if self.freqai_info.get('feature_parameters', {}).get('principal_component_analysis'):
|
||||||
|
self.dh.principal_component_analysis()
|
||||||
|
|
||||||
@abstractmethod
|
# if self.feature_parameters["determine_statistical_distributions"]:
|
||||||
def data_cleaning_predict(self) -> None:
|
# self.dh.determine_statistical_distributions()
|
||||||
|
# if self.feature_parameters["remove_outliers"]:
|
||||||
|
# self.dh.remove_outliers(predict=False)
|
||||||
|
|
||||||
|
if self.freqai_info.get('feature_parameters', {}).get('use_SVM_to_remove_outliers'):
|
||||||
|
self.dh.use_SVM_to_remove_outliers(predict=False)
|
||||||
|
|
||||||
|
if self.freqai_info.get('feature_parameters', {}).get('DI_threshold'):
|
||||||
|
self.dh.data["avg_mean_dist"] = self.dh.compute_distances()
|
||||||
|
|
||||||
|
def data_cleaning_predict(self, filtered_dataframe: DataFrame) -> None:
|
||||||
"""
|
"""
|
||||||
User can add data analysis and cleaning here.
|
Base data cleaning method for predict.
|
||||||
These functions each modify self.dh.do_predict, which is a dataframe with equal length
|
These functions each modify self.dh.do_predict, which is a dataframe with equal length
|
||||||
to the number of candles coming from and returning to the strategy. Inside do_predict,
|
to the number of candles coming from and returning to the strategy. Inside do_predict,
|
||||||
1 allows prediction and < 0 signals to the strategy that the model is not confident in
|
1 allows prediction and < 0 signals to the strategy that the model is not confident in
|
||||||
@ -249,6 +238,19 @@ class IFreqaiModel(ABC):
|
|||||||
of how the do_predict vector is modified. do_predict is ultimately passed back to strategy
|
of how the do_predict vector is modified. do_predict is ultimately passed back to strategy
|
||||||
for buy signals.
|
for buy signals.
|
||||||
"""
|
"""
|
||||||
|
if self.freqai_info.get('feature_parameters', {}).get('principal_component_analysis'):
|
||||||
|
self.dh.pca_transform()
|
||||||
|
|
||||||
|
# if self.feature_parameters["determine_statistical_distributions"]:
|
||||||
|
# self.dh.determine_statistical_distributions()
|
||||||
|
# if self.feature_parameters["remove_outliers"]:
|
||||||
|
# self.dh.remove_outliers(predict=True) # creates dropped index
|
||||||
|
|
||||||
|
if self.freqai_info.get('feature_parameters', {}).get('use_SVM_to_remove_outliers'):
|
||||||
|
self.dh.use_SVM_to_remove_outliers(predict=True)
|
||||||
|
|
||||||
|
if self.freqai_info.get('feature_parameters', {}).get('DI_threshold'):
|
||||||
|
self.dh.check_if_pred_in_training_spaces() # sets do_predict
|
||||||
|
|
||||||
def model_exists(self, pair: str, training_timerange: str) -> bool:
|
def model_exists(self, pair: str, training_timerange: str) -> bool:
|
||||||
"""
|
"""
|
||||||
@ -303,3 +305,42 @@ class IFreqaiModel(ABC):
|
|||||||
self.model = self.train(unfiltered_dataframe, metadata)
|
self.model = self.train(unfiltered_dataframe, metadata)
|
||||||
self.dh.save_data(self.model)
|
self.dh.save_data(self.model)
|
||||||
self.retrain = False
|
self.retrain = False
|
||||||
|
|
||||||
|
# Methods which are overridden by user made prediction models.
|
||||||
|
# See freqai/prediction_models/CatboostPredictionModlel.py for an example.
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def train(self, unfiltered_dataframe: DataFrame, metadata: dict) -> Any:
|
||||||
|
"""
|
||||||
|
Filter the training data and train a model to it. Train makes heavy use of the datahandler
|
||||||
|
for storing, saving, loading, and analyzing the data.
|
||||||
|
:params:
|
||||||
|
:unfiltered_dataframe: Full dataframe for the current training period
|
||||||
|
:metadata: pair metadata from strategy.
|
||||||
|
:returns:
|
||||||
|
:model: Trained model which can be used to inference (self.predict)
|
||||||
|
"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def fit(self) -> Any:
|
||||||
|
"""
|
||||||
|
Most regressors use the same function names and arguments e.g. user
|
||||||
|
can drop in LGBMRegressor in place of CatBoostRegressor and all data
|
||||||
|
management will be properly handled by Freqai.
|
||||||
|
:params:
|
||||||
|
:data_dictionary: the dictionary constructed by DataHandler to hold
|
||||||
|
all the training and test data/labels.
|
||||||
|
"""
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def predict(self, dataframe: DataFrame, metadata: dict) -> Tuple[npt.ArrayLike, npt.ArrayLike]:
|
||||||
|
"""
|
||||||
|
Filter the prediction features data and predict with it.
|
||||||
|
:param: unfiltered_dataframe: Full dataframe for the current backtest period.
|
||||||
|
:return:
|
||||||
|
:predictions: np.array of predictions
|
||||||
|
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
|
||||||
|
data (NaNs) or felt uncertain about data (PCA and DI index)
|
||||||
|
"""
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
import logging
|
import logging
|
||||||
from typing import Any, Dict, Tuple
|
from typing import Any, Dict, Tuple
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
from catboost import CatBoostRegressor, Pool
|
from catboost import CatBoostRegressor, Pool
|
||||||
from pandas import DataFrame
|
from pandas import DataFrame
|
||||||
|
|
||||||
@ -149,7 +148,7 @@ class CatboostPredictionModel(IFreqaiModel):
|
|||||||
based on user decided logic. See FreqaiDataKitchen::remove_outliers() for an example
|
based on user decided logic. See FreqaiDataKitchen::remove_outliers() for an example
|
||||||
of how outlier data points are dropped from the dataframe used for training.
|
of how outlier data points are dropped from the dataframe used for training.
|
||||||
"""
|
"""
|
||||||
if self.feature_parameters["principal_component_analysis"]:
|
if self.freqai_info.get('feature_parameters', {}).get('principal_component_analysis'):
|
||||||
self.dh.principal_component_analysis()
|
self.dh.principal_component_analysis()
|
||||||
|
|
||||||
# if self.feature_parameters["determine_statistical_distributions"]:
|
# if self.feature_parameters["determine_statistical_distributions"]:
|
||||||
@ -157,9 +156,10 @@ class CatboostPredictionModel(IFreqaiModel):
|
|||||||
# if self.feature_parameters["remove_outliers"]:
|
# if self.feature_parameters["remove_outliers"]:
|
||||||
# self.dh.remove_outliers(predict=False)
|
# self.dh.remove_outliers(predict=False)
|
||||||
|
|
||||||
if self.feature_parameters["use_SVM_to_remove_outliers"]:
|
if self.freqai_info.get('feature_parameters', {}).get('use_SVM_to_remove_outliers'):
|
||||||
self.dh.use_SVM_to_remove_outliers(predict=False)
|
self.dh.use_SVM_to_remove_outliers(predict=False)
|
||||||
if self.feature_parameters["DI_threshold"]:
|
|
||||||
|
if self.freqai_info.get('feature_parameters', {}).get('DI_threshold'):
|
||||||
self.dh.data["avg_mean_dist"] = self.dh.compute_distances()
|
self.dh.data["avg_mean_dist"] = self.dh.compute_distances()
|
||||||
|
|
||||||
def data_cleaning_predict(self, filtered_dataframe: DataFrame) -> None:
|
def data_cleaning_predict(self, filtered_dataframe: DataFrame) -> None:
|
||||||
@ -173,21 +173,16 @@ class CatboostPredictionModel(IFreqaiModel):
|
|||||||
of how the do_predict vector is modified. do_predict is ultimately passed back to strategy
|
of how the do_predict vector is modified. do_predict is ultimately passed back to strategy
|
||||||
for buy signals.
|
for buy signals.
|
||||||
"""
|
"""
|
||||||
if self.feature_parameters["principal_component_analysis"]:
|
if self.freqai_info.get('feature_parameters', {}).get('principal_component_analysis'):
|
||||||
pca_components = self.dh.pca.transform(filtered_dataframe)
|
self.dh.pca_transform()
|
||||||
self.dh.data_dictionary["prediction_features"] = pd.DataFrame(
|
|
||||||
data=pca_components,
|
|
||||||
columns=["PC" + str(i) for i in range(0, self.dh.data["n_kept_components"])],
|
|
||||||
index=filtered_dataframe.index,
|
|
||||||
)
|
|
||||||
|
|
||||||
# if self.feature_parameters["determine_statistical_distributions"]:
|
# if self.feature_parameters["determine_statistical_distributions"]:
|
||||||
# self.dh.determine_statistical_distributions()
|
# self.dh.determine_statistical_distributions()
|
||||||
# if self.feature_parameters["remove_outliers"]:
|
# if self.feature_parameters["remove_outliers"]:
|
||||||
# self.dh.remove_outliers(predict=True) # creates dropped index
|
# self.dh.remove_outliers(predict=True) # creates dropped index
|
||||||
|
|
||||||
if self.feature_parameters["use_SVM_to_remove_outliers"]:
|
if self.freqai_info.get('feature_parameters', {}).get('use_SVM_to_remove_outliers'):
|
||||||
self.dh.use_SVM_to_remove_outliers(predict=True)
|
self.dh.use_SVM_to_remove_outliers(predict=True)
|
||||||
|
|
||||||
if self.feature_parameters["DI_threshold"]:
|
if self.freqai_info.get('feature_parameters', {}).get('DI_threshold'):
|
||||||
self.dh.check_if_pred_in_training_spaces() # sets do_predict
|
self.dh.check_if_pred_in_training_spaces() # sets do_predict
|
||||||
|
@ -207,7 +207,7 @@ class Backtesting:
|
|||||||
if self.config.get('freqai') is not None:
|
if self.config.get('freqai') is not None:
|
||||||
self.required_startup += int((self.config.get('freqai', {}).get('train_period') *
|
self.required_startup += int((self.config.get('freqai', {}).get('train_period') *
|
||||||
86400) / timeframe_to_seconds(self.config['timeframe']))
|
86400) / timeframe_to_seconds(self.config['timeframe']))
|
||||||
logger.info("Increasing startup_candle_count for freqai to %s", self.required_startup)
|
logger.info(f'Increasing startup_candle_count for freqai to {self.required_startup}')
|
||||||
self.config['startup_candle_count'] = self.required_startup
|
self.config['startup_candle_count'] = self.required_startup
|
||||||
|
|
||||||
data = history.load_data(
|
data = history.load_data(
|
||||||
|
Loading…
Reference in New Issue
Block a user