merge develop into add-single-prec
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
import copy
|
||||
import logging
|
||||
import shutil
|
||||
from datetime import datetime, timezone
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from math import cos, sin
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Tuple
|
||||
@@ -80,25 +80,32 @@ class FreqaiDataKitchen:
|
||||
self.svm_model: linear_model.SGDOneClassSVM = None
|
||||
self.keras: bool = self.freqai_config.get("keras", False)
|
||||
self.set_all_pairs()
|
||||
if not self.live:
|
||||
if not self.config["timerange"]:
|
||||
raise OperationalException(
|
||||
'Please pass --timerange if you intend to use FreqAI for backtesting.')
|
||||
self.full_timerange = self.create_fulltimerange(
|
||||
self.config["timerange"], self.freqai_config.get("train_period_days", 0)
|
||||
)
|
||||
self.backtest_live_models = config.get("freqai_backtest_live_models", False)
|
||||
|
||||
(self.training_timeranges, self.backtesting_timeranges) = self.split_timerange(
|
||||
self.full_timerange,
|
||||
config["freqai"]["train_period_days"],
|
||||
config["freqai"]["backtest_period_days"],
|
||||
)
|
||||
if not self.live:
|
||||
self.full_path = self.get_full_models_path(self.config)
|
||||
|
||||
if self.backtest_live_models:
|
||||
if self.pair:
|
||||
self.set_timerange_from_ready_models()
|
||||
(self.training_timeranges,
|
||||
self.backtesting_timeranges) = self.split_timerange_live_models()
|
||||
else:
|
||||
self.full_timerange = self.create_fulltimerange(
|
||||
self.config["timerange"], self.freqai_config.get("train_period_days", 0)
|
||||
)
|
||||
(self.training_timeranges, self.backtesting_timeranges) = self.split_timerange(
|
||||
self.full_timerange,
|
||||
config["freqai"]["train_period_days"],
|
||||
config["freqai"]["backtest_period_days"],
|
||||
)
|
||||
|
||||
self.data['extra_returns_per_train'] = self.freqai_config.get('extra_returns_per_train', {})
|
||||
self.thread_count = self.freqai_config.get("data_kitchen_thread_count", -1)
|
||||
self.train_dates: DataFrame = pd.DataFrame()
|
||||
self.unique_classes: Dict[str, list] = {}
|
||||
self.unique_class_list: list = []
|
||||
self.backtest_live_models_data: Dict[str, Any] = {}
|
||||
|
||||
def set_paths(
|
||||
self,
|
||||
@@ -110,10 +117,7 @@ class FreqaiDataKitchen:
|
||||
:param metadata: dict = strategy furnished pair metadata
|
||||
:param trained_timestamp: int = timestamp of most recent training
|
||||
"""
|
||||
self.full_path = Path(
|
||||
self.config["user_data_dir"] / "models" / str(self.freqai_config.get("identifier"))
|
||||
)
|
||||
|
||||
self.full_path = self.get_full_models_path(self.config)
|
||||
self.data_path = Path(
|
||||
self.full_path
|
||||
/ f"sub-train-{pair.split('/')[0]}_{trained_timestamp}"
|
||||
@@ -244,7 +248,7 @@ class FreqaiDataKitchen:
|
||||
self.data["filter_drop_index_training"] = drop_index
|
||||
|
||||
else:
|
||||
if len(self.data['constant_features_list']):
|
||||
if 'constant_features_list' in self.data and len(self.data['constant_features_list']):
|
||||
filtered_df = self.check_pred_labels(filtered_df)
|
||||
# we are backtesting so we need to preserve row number to send back to strategy,
|
||||
# so now we use do_predict to avoid any prediction based on a NaN
|
||||
@@ -455,6 +459,29 @@ class FreqaiDataKitchen:
|
||||
# print(tr_training_list, tr_backtesting_list)
|
||||
return tr_training_list_timerange, tr_backtesting_list_timerange
|
||||
|
||||
def split_timerange_live_models(
|
||||
self
|
||||
) -> Tuple[list, list]:
|
||||
|
||||
tr_backtesting_list_timerange = []
|
||||
asset = self.pair.split("/")[0]
|
||||
if asset not in self.backtest_live_models_data["assets_end_dates"]:
|
||||
raise OperationalException(
|
||||
f"Model not available for pair {self.pair}. "
|
||||
"Please, try again after removing this pair from the configuration file."
|
||||
)
|
||||
asset_data = self.backtest_live_models_data["assets_end_dates"][asset]
|
||||
backtesting_timerange = self.backtest_live_models_data["backtesting_timerange"]
|
||||
model_end_dates = [x for x in asset_data]
|
||||
model_end_dates.append(backtesting_timerange.stopts)
|
||||
model_end_dates.sort()
|
||||
for index, item in enumerate(model_end_dates):
|
||||
if len(model_end_dates) > (index + 1):
|
||||
tr_to_add = TimeRange("date", "date", item, model_end_dates[index + 1])
|
||||
tr_backtesting_list_timerange.append(tr_to_add)
|
||||
|
||||
return tr_backtesting_list_timerange, tr_backtesting_list_timerange
|
||||
|
||||
def slice_dataframe(self, timerange: TimeRange, df: DataFrame) -> DataFrame:
|
||||
"""
|
||||
Given a full dataframe, extract the user desired window
|
||||
@@ -962,8 +989,10 @@ class FreqaiDataKitchen:
|
||||
append_df[label] = predictions[label]
|
||||
if append_df[label].dtype == object:
|
||||
continue
|
||||
append_df[f"{label}_mean"] = self.data["labels_mean"][label]
|
||||
append_df[f"{label}_std"] = self.data["labels_std"][label]
|
||||
if "labels_mean" in self.data:
|
||||
append_df[f"{label}_mean"] = self.data["labels_mean"][label]
|
||||
if "labels_std" in self.data:
|
||||
append_df[f"{label}_std"] = self.data["labels_std"][label]
|
||||
|
||||
for extra_col in self.data["extra_returns_per_train"]:
|
||||
append_df[f"{extra_col}"] = self.data["extra_returns_per_train"][extra_col]
|
||||
@@ -1031,11 +1060,6 @@ class FreqaiDataKitchen:
|
||||
start = datetime.fromtimestamp(backtest_timerange.startts, tz=timezone.utc)
|
||||
stop = datetime.fromtimestamp(backtest_timerange.stopts, tz=timezone.utc)
|
||||
full_timerange = start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d")
|
||||
|
||||
self.full_path = Path(
|
||||
self.config["user_data_dir"] / "models" / f"{self.freqai_config['identifier']}"
|
||||
)
|
||||
|
||||
config_path = Path(self.config["config_files"][0])
|
||||
|
||||
if not self.full_path.is_dir():
|
||||
@@ -1118,15 +1142,15 @@ class FreqaiDataKitchen:
|
||||
|
||||
return retrain, trained_timerange, data_load_timerange
|
||||
|
||||
def set_new_model_names(self, pair: str, trained_timerange: TimeRange):
|
||||
def set_new_model_names(self, pair: str, timestamp_id: int):
|
||||
|
||||
coin, _ = pair.split("/")
|
||||
self.data_path = Path(
|
||||
self.full_path
|
||||
/ f"sub-train-{pair.split('/')[0]}_{int(trained_timerange.stopts)}"
|
||||
/ f"sub-train-{pair.split('/')[0]}_{timestamp_id}"
|
||||
)
|
||||
|
||||
self.model_filename = f"cb_{coin.lower()}_{int(trained_timerange.stopts)}"
|
||||
self.model_filename = f"cb_{coin.lower()}_{timestamp_id}"
|
||||
|
||||
def set_all_pairs(self) -> None:
|
||||
|
||||
@@ -1153,11 +1177,13 @@ class FreqaiDataKitchen:
|
||||
pairs = self.freqai_config["feature_parameters"].get("include_corr_pairlist", [])
|
||||
|
||||
for pair in pairs:
|
||||
pair = pair.replace(':', '') # lightgbm doesnt like colons
|
||||
valid_strs = [f"%-{pair}", f"%{pair}", f"%_{pair}"]
|
||||
pair_cols = [col for col in dataframe.columns if
|
||||
any(substr in col for substr in valid_strs)]
|
||||
pair_cols.insert(0, 'date')
|
||||
corr_dataframes[pair] = dataframe.filter(pair_cols, axis=1)
|
||||
if pair_cols:
|
||||
pair_cols.insert(0, 'date')
|
||||
corr_dataframes[pair] = dataframe.filter(pair_cols, axis=1)
|
||||
|
||||
return corr_dataframes
|
||||
|
||||
@@ -1175,8 +1201,9 @@ class FreqaiDataKitchen:
|
||||
ready for training
|
||||
"""
|
||||
pairs = self.freqai_config["feature_parameters"].get("include_corr_pairlist", [])
|
||||
|
||||
current_pair = current_pair.replace(':', '')
|
||||
for pair in pairs:
|
||||
pair = pair.replace(':', '') # lightgbm doesnt work with colons
|
||||
if current_pair != pair:
|
||||
dataframe = dataframe.merge(corr_dataframes[pair], how='left', on='date')
|
||||
|
||||
@@ -1246,6 +1273,8 @@ class FreqaiDataKitchen:
|
||||
|
||||
self.get_unique_classes_from_labels(dataframe)
|
||||
|
||||
dataframe = self.remove_special_chars_from_feature_names(dataframe)
|
||||
|
||||
if self.freqai_config.get('convert_df_to_float32', False):
|
||||
dataframe = self.reduce_dataframe_footprint(dataframe)
|
||||
|
||||
@@ -1317,11 +1346,11 @@ class FreqaiDataKitchen:
|
||||
|
||||
def check_if_backtest_prediction_is_valid(
|
||||
self,
|
||||
length_backtesting_dataframe: int
|
||||
len_backtest_df: int
|
||||
) -> bool:
|
||||
"""
|
||||
Check if a backtesting prediction already exists and if the predictions
|
||||
to append has the same size of backtesting dataframe slice
|
||||
to append have the same size as the backtesting dataframe slice
|
||||
:param length_backtesting_dataframe: Length of backtesting dataframe slice
|
||||
:return:
|
||||
:boolean: whether the prediction file is valid.
|
||||
@@ -1335,7 +1364,7 @@ class FreqaiDataKitchen:
|
||||
|
||||
if file_exists:
|
||||
append_df = self.get_backtesting_prediction()
|
||||
if len(append_df) == length_backtesting_dataframe:
|
||||
if len(append_df) == len_backtest_df:
|
||||
logger.info(f"Found backtesting prediction file at {path_to_predictionfile}")
|
||||
return True
|
||||
else:
|
||||
@@ -1348,6 +1377,122 @@ class FreqaiDataKitchen:
|
||||
)
|
||||
return False
|
||||
|
||||
def set_timerange_from_ready_models(self):
|
||||
backtesting_timerange, \
|
||||
assets_end_dates = (
|
||||
self.get_timerange_and_assets_end_dates_from_ready_models(self.full_path))
|
||||
|
||||
self.backtest_live_models_data = {
|
||||
"backtesting_timerange": backtesting_timerange,
|
||||
"assets_end_dates": assets_end_dates
|
||||
}
|
||||
return
|
||||
|
||||
def get_full_models_path(self, config: Config) -> Path:
|
||||
"""
|
||||
Returns default FreqAI model path
|
||||
:param config: Configuration dictionary
|
||||
"""
|
||||
freqai_config: Dict[str, Any] = config["freqai"]
|
||||
return Path(
|
||||
config["user_data_dir"] / "models" / str(freqai_config.get("identifier"))
|
||||
)
|
||||
|
||||
def get_timerange_and_assets_end_dates_from_ready_models(
|
||||
self, models_path: Path) -> Tuple[TimeRange, Dict[str, Any]]:
|
||||
"""
|
||||
Returns timerange information based on a FreqAI model directory
|
||||
:param models_path: FreqAI model path
|
||||
|
||||
:return: a Tuple with (Timerange calculated from directory and
|
||||
a Dict with pair and model end training dates info)
|
||||
"""
|
||||
all_models_end_dates = []
|
||||
assets_end_dates: Dict[str, Any] = self.get_assets_timestamps_training_from_ready_models(
|
||||
models_path)
|
||||
for key in assets_end_dates:
|
||||
for model_end_date in assets_end_dates[key]:
|
||||
if model_end_date not in all_models_end_dates:
|
||||
all_models_end_dates.append(model_end_date)
|
||||
|
||||
if len(all_models_end_dates) == 0:
|
||||
raise OperationalException(
|
||||
'At least 1 saved model is required to '
|
||||
'run backtest with the freqai-backtest-live-models option'
|
||||
)
|
||||
|
||||
if len(all_models_end_dates) == 1:
|
||||
logger.warning(
|
||||
"Only 1 model was found. Backtesting will run with the "
|
||||
"timerange from the end of the training date to the current date"
|
||||
)
|
||||
|
||||
finish_timestamp = int(datetime.now(tz=timezone.utc).timestamp())
|
||||
if len(all_models_end_dates) > 1:
|
||||
# After last model end date, use the same period from previous model
|
||||
# to finish the backtest
|
||||
all_models_end_dates.sort(reverse=True)
|
||||
finish_timestamp = all_models_end_dates[0] + \
|
||||
(all_models_end_dates[0] - all_models_end_dates[1])
|
||||
|
||||
all_models_end_dates.append(finish_timestamp)
|
||||
all_models_end_dates.sort()
|
||||
start_date = (datetime(*datetime.fromtimestamp(min(all_models_end_dates),
|
||||
timezone.utc).timetuple()[:3], tzinfo=timezone.utc))
|
||||
end_date = (datetime(*datetime.fromtimestamp(max(all_models_end_dates),
|
||||
timezone.utc).timetuple()[:3], tzinfo=timezone.utc))
|
||||
|
||||
# add 1 day to string timerange to ensure BT module will load all dataframe data
|
||||
end_date = end_date + timedelta(days=1)
|
||||
backtesting_timerange = TimeRange(
|
||||
'date', 'date', int(start_date.timestamp()), int(end_date.timestamp())
|
||||
)
|
||||
return backtesting_timerange, assets_end_dates
|
||||
|
||||
def get_assets_timestamps_training_from_ready_models(
|
||||
self, models_path: Path) -> Dict[str, Any]:
|
||||
"""
|
||||
Scan the models path and returns all assets end training dates (timestamp)
|
||||
:param models_path: FreqAI model path
|
||||
|
||||
:return: a Dict with asset and model end training dates info
|
||||
"""
|
||||
assets_end_dates: Dict[str, Any] = {}
|
||||
if not models_path.is_dir():
|
||||
raise OperationalException(
|
||||
'Model folders not found. Saved models are required '
|
||||
'to run backtest with the freqai-backtest-live-models option'
|
||||
)
|
||||
for model_dir in models_path.iterdir():
|
||||
if str(model_dir.name).startswith("sub-train"):
|
||||
model_end_date = int(model_dir.name.split("_")[1])
|
||||
asset = model_dir.name.split("_")[0].replace("sub-train-", "")
|
||||
model_file_name = (
|
||||
f"cb_{str(model_dir.name).replace('sub-train-', '').lower()}"
|
||||
"_model.joblib"
|
||||
)
|
||||
|
||||
model_path_file = Path(model_dir / model_file_name)
|
||||
if model_path_file.is_file():
|
||||
if asset not in assets_end_dates:
|
||||
assets_end_dates[asset] = []
|
||||
assets_end_dates[asset].append(model_end_date)
|
||||
|
||||
return assets_end_dates
|
||||
|
||||
def remove_special_chars_from_feature_names(self, dataframe: pd.DataFrame) -> pd.DataFrame:
|
||||
"""
|
||||
Remove all special characters from feature strings (:)
|
||||
:param dataframe: the dataframe that just finished indicator population. (unfiltered)
|
||||
:return: dataframe with cleaned featrue names
|
||||
"""
|
||||
|
||||
spec_chars = [':']
|
||||
for c in spec_chars:
|
||||
dataframe.columns = dataframe.columns.str.replace(c, "")
|
||||
|
||||
return dataframe
|
||||
|
||||
def reduce_dataframe_footprint(self, df: DataFrame) -> DataFrame:
|
||||
"""
|
||||
Ensure all values are float32
|
||||
|
||||
Reference in New Issue
Block a user