Merge branch 'develop' into spice-rack
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import logging
|
||||
from time import time
|
||||
from typing import Any, Tuple
|
||||
|
||||
import numpy as np
|
||||
@@ -32,7 +33,9 @@ class BaseClassifierModel(IFreqaiModel):
|
||||
:model: Trained model which can be used to inference (self.predict)
|
||||
"""
|
||||
|
||||
logger.info("-------------------- Starting training " f"{pair} --------------------")
|
||||
logger.info(f"-------------------- Starting training {pair} --------------------")
|
||||
|
||||
start_time = time()
|
||||
|
||||
# filter the features requested by user in the configuration file and elegantly handle NaNs
|
||||
features_filtered, labels_filtered = dk.filter_features(
|
||||
@@ -45,10 +48,10 @@ class BaseClassifierModel(IFreqaiModel):
|
||||
start_date = unfiltered_df["date"].iloc[0].strftime("%Y-%m-%d")
|
||||
end_date = unfiltered_df["date"].iloc[-1].strftime("%Y-%m-%d")
|
||||
logger.info(f"-------------------- Training on data from {start_date} to "
|
||||
f"{end_date}--------------------")
|
||||
f"{end_date} --------------------")
|
||||
# split data into train/test data.
|
||||
data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
|
||||
if not self.freqai_info.get('fit_live_predictions', 0) or not self.live:
|
||||
if not self.freqai_info.get("fit_live_predictions", 0) or not self.live:
|
||||
dk.fit_labels()
|
||||
# normalize all data based on train_dataset only
|
||||
data_dictionary = dk.normalize_data(data_dictionary)
|
||||
@@ -57,13 +60,16 @@ class BaseClassifierModel(IFreqaiModel):
|
||||
self.data_cleaning_train(dk)
|
||||
|
||||
logger.info(
|
||||
f'Training model on {len(dk.data_dictionary["train_features"].columns)}' " features"
|
||||
f"Training model on {len(dk.data_dictionary['train_features'].columns)} features"
|
||||
)
|
||||
logger.info(f'Training model on {len(data_dictionary["train_features"])} data points')
|
||||
logger.info(f"Training model on {len(data_dictionary['train_features'])} data points")
|
||||
|
||||
model = self.fit(data_dictionary, dk)
|
||||
|
||||
logger.info(f"--------------------done training {pair}--------------------")
|
||||
end_time = time()
|
||||
|
||||
logger.info(f"-------------------- Done training {pair} "
|
||||
f"({end_time - start_time:.2f} secs) --------------------")
|
||||
|
||||
return model
|
||||
|
||||
|
@@ -1,4 +1,5 @@
|
||||
import logging
|
||||
from time import time
|
||||
from typing import Any, Tuple
|
||||
|
||||
import numpy as np
|
||||
@@ -31,7 +32,9 @@ class BaseRegressionModel(IFreqaiModel):
|
||||
:model: Trained model which can be used to inference (self.predict)
|
||||
"""
|
||||
|
||||
logger.info("-------------------- Starting training " f"{pair} --------------------")
|
||||
logger.info(f"-------------------- Starting training {pair} --------------------")
|
||||
|
||||
start_time = time()
|
||||
|
||||
# filter the features requested by user in the configuration file and elegantly handle NaNs
|
||||
features_filtered, labels_filtered = dk.filter_features(
|
||||
@@ -44,10 +47,10 @@ class BaseRegressionModel(IFreqaiModel):
|
||||
start_date = unfiltered_df["date"].iloc[0].strftime("%Y-%m-%d")
|
||||
end_date = unfiltered_df["date"].iloc[-1].strftime("%Y-%m-%d")
|
||||
logger.info(f"-------------------- Training on data from {start_date} to "
|
||||
f"{end_date}--------------------")
|
||||
f"{end_date} --------------------")
|
||||
# split data into train/test data.
|
||||
data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
|
||||
if not self.freqai_info.get('fit_live_predictions', 0) or not self.live:
|
||||
if not self.freqai_info.get("fit_live_predictions", 0) or not self.live:
|
||||
dk.fit_labels()
|
||||
# normalize all data based on train_dataset only
|
||||
data_dictionary = dk.normalize_data(data_dictionary)
|
||||
@@ -56,13 +59,16 @@ class BaseRegressionModel(IFreqaiModel):
|
||||
self.data_cleaning_train(dk)
|
||||
|
||||
logger.info(
|
||||
f'Training model on {len(dk.data_dictionary["train_features"].columns)}' " features"
|
||||
f"Training model on {len(dk.data_dictionary['train_features'].columns)} features"
|
||||
)
|
||||
logger.info(f'Training model on {len(data_dictionary["train_features"])} data points')
|
||||
logger.info(f"Training model on {len(data_dictionary['train_features'])} data points")
|
||||
|
||||
model = self.fit(data_dictionary, dk)
|
||||
|
||||
logger.info(f"--------------------done training {pair}--------------------")
|
||||
end_time = time()
|
||||
|
||||
logger.info(f"-------------------- Done training {pair} "
|
||||
f"({end_time - start_time:.2f} secs) --------------------")
|
||||
|
||||
return model
|
||||
|
||||
|
@@ -1,4 +1,5 @@
|
||||
import logging
|
||||
from time import time
|
||||
from typing import Any
|
||||
|
||||
from pandas import DataFrame
|
||||
@@ -28,7 +29,9 @@ class BaseTensorFlowModel(IFreqaiModel):
|
||||
:model: Trained model which can be used to inference (self.predict)
|
||||
"""
|
||||
|
||||
logger.info("-------------------- Starting training " f"{pair} --------------------")
|
||||
logger.info(f"-------------------- Starting training {pair} --------------------")
|
||||
|
||||
start_time = time()
|
||||
|
||||
# filter the features requested by user in the configuration file and elegantly handle NaNs
|
||||
features_filtered, labels_filtered = dk.filter_features(
|
||||
@@ -41,10 +44,10 @@ class BaseTensorFlowModel(IFreqaiModel):
|
||||
start_date = unfiltered_df["date"].iloc[0].strftime("%Y-%m-%d")
|
||||
end_date = unfiltered_df["date"].iloc[-1].strftime("%Y-%m-%d")
|
||||
logger.info(f"-------------------- Training on data from {start_date} to "
|
||||
f"{end_date}--------------------")
|
||||
f"{end_date} --------------------")
|
||||
# split data into train/test data.
|
||||
data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
|
||||
if not self.freqai_info.get('fit_live_predictions', 0) or not self.live:
|
||||
if not self.freqai_info.get("fit_live_predictions", 0) or not self.live:
|
||||
dk.fit_labels()
|
||||
# normalize all data based on train_dataset only
|
||||
data_dictionary = dk.normalize_data(data_dictionary)
|
||||
@@ -53,12 +56,15 @@ class BaseTensorFlowModel(IFreqaiModel):
|
||||
self.data_cleaning_train(dk)
|
||||
|
||||
logger.info(
|
||||
f'Training model on {len(dk.data_dictionary["train_features"].columns)}' " features"
|
||||
f"Training model on {len(dk.data_dictionary['train_features'].columns)} features"
|
||||
)
|
||||
logger.info(f'Training model on {len(data_dictionary["train_features"])} data points')
|
||||
logger.info(f"Training model on {len(data_dictionary['train_features'])} data points")
|
||||
|
||||
model = self.fit(data_dictionary, dk)
|
||||
|
||||
logger.info(f"--------------------done training {pair}--------------------")
|
||||
end_time = time()
|
||||
|
||||
logger.info(f"-------------------- Done training {pair} "
|
||||
f"({end_time - start_time:.2f} secs) --------------------")
|
||||
|
||||
return model
|
||||
|
@@ -1,4 +1,3 @@
|
||||
|
||||
from joblib import Parallel
|
||||
from sklearn.multioutput import MultiOutputRegressor, _fit_estimator
|
||||
from sklearn.utils.fixes import delayed
|
||||
|
@@ -16,6 +16,7 @@ from numpy.typing import NDArray
|
||||
from pandas import DataFrame
|
||||
|
||||
from freqtrade.configuration import TimeRange
|
||||
from freqtrade.constants import Config
|
||||
from freqtrade.data.history import load_pair_history
|
||||
from freqtrade.exceptions import OperationalException
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
@@ -27,9 +28,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
class pair_info(TypedDict):
|
||||
model_filename: str
|
||||
first: bool
|
||||
trained_timestamp: int
|
||||
priority: int
|
||||
data_path: str
|
||||
extras: dict
|
||||
|
||||
@@ -58,7 +57,7 @@ class FreqaiDataDrawer:
|
||||
Juha Nykänen @suikula, Wagner Costa @wagnercosta, Johan Vlugt @Jooopieeert
|
||||
"""
|
||||
|
||||
def __init__(self, full_path: Path, config: dict, follow_mode: bool = False):
|
||||
def __init__(self, full_path: Path, config: Config, follow_mode: bool = False):
|
||||
|
||||
self.config = config
|
||||
self.freqai_info = config.get("freqai", {})
|
||||
@@ -91,7 +90,7 @@ class FreqaiDataDrawer:
|
||||
self.old_DBSCAN_eps: Dict[str, float] = {}
|
||||
self.empty_pair_dict: pair_info = {
|
||||
"model_filename": "", "trained_timestamp": 0,
|
||||
"priority": 1, "first": True, "data_path": "", "extras": {}}
|
||||
"data_path": "", "extras": {}}
|
||||
|
||||
def load_drawer_from_disk(self):
|
||||
"""
|
||||
@@ -216,7 +215,6 @@ class FreqaiDataDrawer:
|
||||
self.pair_dict[pair] = self.empty_pair_dict.copy()
|
||||
model_filename = ""
|
||||
trained_timestamp = 0
|
||||
self.pair_dict[pair]["priority"] = len(self.pair_dict)
|
||||
|
||||
if not data_path_set and self.follow_mode:
|
||||
logger.warning(
|
||||
@@ -236,18 +234,9 @@ class FreqaiDataDrawer:
|
||||
return
|
||||
else:
|
||||
self.pair_dict[metadata["pair"]] = self.empty_pair_dict.copy()
|
||||
self.pair_dict[metadata["pair"]]["priority"] = len(self.pair_dict)
|
||||
|
||||
return
|
||||
|
||||
def pair_to_end_of_training_queue(self, pair: str) -> None:
|
||||
# march all pairs up in the queue
|
||||
with self.pair_dict_lock:
|
||||
for p in self.pair_dict:
|
||||
self.pair_dict[p]["priority"] -= 1
|
||||
# send pair to end of queue
|
||||
self.pair_dict[pair]["priority"] = len(self.pair_dict)
|
||||
|
||||
def set_initial_return_values(self, pair: str, pred_df: DataFrame) -> None:
|
||||
"""
|
||||
Set the initial return values to the historical predictions dataframe. This avoids needing
|
||||
@@ -441,6 +430,16 @@ class FreqaiDataDrawer:
|
||||
|
||||
return
|
||||
|
||||
def load_metadata(self, dk: FreqaiDataKitchen) -> None:
|
||||
"""
|
||||
Load only metadata into datakitchen to increase performance during
|
||||
presaved backtesting (prediction file loading).
|
||||
"""
|
||||
with open(dk.data_path / f"{dk.model_filename}_metadata.json", "r") as fp:
|
||||
dk.data = json.load(fp)
|
||||
dk.training_features_list = dk.data["training_features_list"]
|
||||
dk.label_list = dk.data["label_list"]
|
||||
|
||||
def load_data(self, coin: str, dk: FreqaiDataKitchen) -> Any:
|
||||
"""
|
||||
loads all data required to make a prediction on a sub-train time range
|
||||
|
@@ -18,6 +18,7 @@ from sklearn.model_selection import train_test_split
|
||||
from sklearn.neighbors import NearestNeighbors
|
||||
|
||||
from freqtrade.configuration import TimeRange
|
||||
from freqtrade.constants import Config
|
||||
from freqtrade.exceptions import OperationalException
|
||||
from freqtrade.exchange import timeframe_to_seconds
|
||||
from freqtrade.strategy.interface import IStrategy
|
||||
@@ -57,7 +58,7 @@ class FreqaiDataKitchen:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: Dict[str, Any],
|
||||
config: Config,
|
||||
live: bool = False,
|
||||
pair: str = "",
|
||||
):
|
||||
@@ -466,27 +467,6 @@ class FreqaiDataKitchen:
|
||||
|
||||
return df
|
||||
|
||||
def remove_training_from_backtesting(
|
||||
self
|
||||
) -> DataFrame:
|
||||
"""
|
||||
Function which takes the backtesting time range and
|
||||
remove training data from dataframe, keeping only the
|
||||
startup_candle_count candles
|
||||
"""
|
||||
startup_candle_count = self.config.get('startup_candle_count', 0)
|
||||
tf = self.config['timeframe']
|
||||
tr = self.config["timerange"]
|
||||
|
||||
backtesting_timerange = TimeRange.parse_timerange(tr)
|
||||
if startup_candle_count > 0 and backtesting_timerange:
|
||||
backtesting_timerange.subtract_start(timeframe_to_seconds(tf) * startup_candle_count)
|
||||
|
||||
start = datetime.fromtimestamp(backtesting_timerange.startts, tz=timezone.utc)
|
||||
df = self.return_dataframe
|
||||
df = df.loc[df["date"] >= start, :]
|
||||
return df
|
||||
|
||||
def principal_component_analysis(self) -> None:
|
||||
"""
|
||||
Performs Principal Component Analysis on the data for dimensionality reduction
|
||||
@@ -775,12 +755,22 @@ class FreqaiDataKitchen:
|
||||
|
||||
def compute_inlier_metric(self, set_='train') -> None:
|
||||
"""
|
||||
|
||||
Compute inlier metric from backwards distance distributions.
|
||||
This metric defines how well features from a timepoint fit
|
||||
into previous timepoints.
|
||||
"""
|
||||
|
||||
def normalise(dataframe: DataFrame, key: str) -> DataFrame:
|
||||
if set_ == 'train':
|
||||
min_value = dataframe.min()
|
||||
max_value = dataframe.max()
|
||||
self.data[f'{key}_min'] = min_value
|
||||
self.data[f'{key}_max'] = max_value
|
||||
else:
|
||||
min_value = self.data[f'{key}_min']
|
||||
max_value = self.data[f'{key}_max']
|
||||
return (dataframe - min_value) / (max_value - min_value)
|
||||
|
||||
no_prev_pts = self.freqai_config["feature_parameters"]["inlier_metric_window"]
|
||||
|
||||
if set_ == 'train':
|
||||
@@ -825,7 +815,12 @@ class FreqaiDataKitchen:
|
||||
inliers = pd.DataFrame(index=distances.index)
|
||||
for key in distances.keys():
|
||||
current_distances = distances[key].dropna()
|
||||
fit_params = stats.weibull_min.fit(current_distances)
|
||||
current_distances = normalise(current_distances, key)
|
||||
if set_ == 'train':
|
||||
fit_params = stats.weibull_min.fit(current_distances)
|
||||
self.data[f'{key}_fit_params'] = fit_params
|
||||
else:
|
||||
fit_params = self.data[f'{key}_fit_params']
|
||||
quantiles = stats.weibull_min.cdf(current_distances, *fit_params)
|
||||
|
||||
df_inlier = pd.DataFrame(
|
||||
@@ -979,8 +974,6 @@ class FreqaiDataKitchen:
|
||||
|
||||
to_keep = [col for col in dataframe.columns if not col.startswith("&")]
|
||||
self.return_dataframe = pd.concat([dataframe[to_keep], self.full_df], axis=1)
|
||||
|
||||
# self.return_dataframe = self.remove_training_from_backtesting()
|
||||
self.full_df = DataFrame()
|
||||
|
||||
return
|
||||
|
@@ -3,6 +3,7 @@ import shutil
|
||||
import threading
|
||||
import time
|
||||
from abc import ABC, abstractmethod
|
||||
from collections import deque
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from threading import Lock
|
||||
@@ -14,12 +15,13 @@ from numpy.typing import NDArray
|
||||
from pandas import DataFrame
|
||||
|
||||
from freqtrade.configuration import TimeRange
|
||||
from freqtrade.constants import DATETIME_PRINT_FORMAT
|
||||
from freqtrade.constants import DATETIME_PRINT_FORMAT, Config
|
||||
from freqtrade.enums import RunMode
|
||||
from freqtrade.exceptions import OperationalException
|
||||
from freqtrade.exchange import timeframe_to_seconds
|
||||
from freqtrade.freqai.data_drawer import FreqaiDataDrawer
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
from freqtrade.freqai.utils import plot_feature_importance
|
||||
from freqtrade.strategy.interface import IStrategy
|
||||
|
||||
|
||||
@@ -50,7 +52,7 @@ class IFreqaiModel(ABC):
|
||||
Juha Nykänen @suikula, Wagner Costa @wagnercosta, Johan Vlugt @Jooopieeert
|
||||
"""
|
||||
|
||||
def __init__(self, config: Dict[str, Any]) -> None:
|
||||
def __init__(self, config: Config) -> None:
|
||||
|
||||
self.config = config
|
||||
self.assert_config(self.config)
|
||||
@@ -63,7 +65,7 @@ class IFreqaiModel(ABC):
|
||||
self.first = True
|
||||
self.set_full_path()
|
||||
self.follow_mode: bool = self.freqai_info.get("follow_mode", False)
|
||||
self.save_backtest_models: bool = self.freqai_info.get("save_backtest_models", False)
|
||||
self.save_backtest_models: bool = self.freqai_info.get("save_backtest_models", True)
|
||||
if self.save_backtest_models:
|
||||
logger.info('Backtesting module configured to save all models.')
|
||||
self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode)
|
||||
@@ -80,6 +82,7 @@ class IFreqaiModel(ABC):
|
||||
self.pair_it = 0
|
||||
self.pair_it_train = 0
|
||||
self.total_pairs = len(self.config.get("exchange", {}).get("pair_whitelist"))
|
||||
self.train_queue = self._set_train_queue()
|
||||
self.last_trade_database_summary: DataFrame = {}
|
||||
self.current_trade_database_summary: DataFrame = {}
|
||||
self.analysis_lock = Lock()
|
||||
@@ -99,7 +102,7 @@ class IFreqaiModel(ABC):
|
||||
"""
|
||||
return ({})
|
||||
|
||||
def assert_config(self, config: Dict[str, Any]) -> None:
|
||||
def assert_config(self, config: Config) -> None:
|
||||
|
||||
if not config.get("freqai", {}):
|
||||
raise OperationalException("No freqai parameters found in configuration file.")
|
||||
@@ -181,29 +184,40 @@ class IFreqaiModel(ABC):
|
||||
"""
|
||||
while not self._stop_event.is_set():
|
||||
time.sleep(1)
|
||||
for pair in self.config.get("exchange", {}).get("pair_whitelist"):
|
||||
pair = self.train_queue[0]
|
||||
|
||||
(_, trained_timestamp, _) = self.dd.get_pair_dict_info(pair)
|
||||
# ensure pair is avaialble in dp
|
||||
if pair not in strategy.dp.current_whitelist():
|
||||
self.train_queue.popleft()
|
||||
logger.warning(f'{pair} not in current whitelist, removing from train queue.')
|
||||
continue
|
||||
|
||||
if self.dd.pair_dict[pair]["priority"] != 1:
|
||||
continue
|
||||
dk = FreqaiDataKitchen(self.config, self.live, pair)
|
||||
dk.set_paths(pair, trained_timestamp)
|
||||
(
|
||||
retrain,
|
||||
new_trained_timerange,
|
||||
data_load_timerange,
|
||||
) = dk.check_if_new_training_required(trained_timestamp)
|
||||
dk.set_paths(pair, new_trained_timerange.stopts)
|
||||
(_, trained_timestamp, _) = self.dd.get_pair_dict_info(pair)
|
||||
|
||||
if retrain:
|
||||
self.train_timer('start')
|
||||
dk = FreqaiDataKitchen(self.config, self.live, pair)
|
||||
dk.set_paths(pair, trained_timestamp)
|
||||
(
|
||||
retrain,
|
||||
new_trained_timerange,
|
||||
data_load_timerange,
|
||||
) = dk.check_if_new_training_required(trained_timestamp)
|
||||
dk.set_paths(pair, new_trained_timerange.stopts)
|
||||
|
||||
if retrain:
|
||||
self.train_timer('start')
|
||||
try:
|
||||
self.extract_data_and_train_model(
|
||||
new_trained_timerange, pair, strategy, dk, data_load_timerange
|
||||
)
|
||||
self.train_timer('stop')
|
||||
except Exception as msg:
|
||||
logger.warning(f'Training {pair} raised exception {msg}, skipping.')
|
||||
|
||||
self.dd.save_historic_predictions_to_disk()
|
||||
self.train_timer('stop')
|
||||
|
||||
# only rotate the queue after the first has been trained.
|
||||
self.train_queue.rotate(-1)
|
||||
|
||||
self.dd.save_historic_predictions_to_disk()
|
||||
|
||||
def start_backtesting(
|
||||
self, dataframe: DataFrame, metadata: dict, dk: FreqaiDataKitchen
|
||||
@@ -230,7 +244,8 @@ class IFreqaiModel(ABC):
|
||||
# following tr_train. Both of these windows slide through the
|
||||
# entire backtest
|
||||
for tr_train, tr_backtest in zip(dk.training_timeranges, dk.backtesting_timeranges):
|
||||
(_, _, _) = self.dd.get_pair_dict_info(metadata["pair"])
|
||||
pair = metadata["pair"]
|
||||
(_, _, _) = self.dd.get_pair_dict_info(pair)
|
||||
train_it += 1
|
||||
total_trains = len(dk.backtesting_timeranges)
|
||||
self.training_timerange = tr_train
|
||||
@@ -245,37 +260,37 @@ class IFreqaiModel(ABC):
|
||||
tr_train.stopts,
|
||||
tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT)
|
||||
logger.info(
|
||||
f"Training {metadata['pair']}, {self.pair_it}/{self.total_pairs} pairs"
|
||||
f"Training {pair}, {self.pair_it}/{self.total_pairs} pairs"
|
||||
f" from {tr_train_startts_str} to {tr_train_stopts_str}, {train_it}/{total_trains} "
|
||||
"trains"
|
||||
)
|
||||
|
||||
trained_timestamp_int = int(trained_timestamp.stopts)
|
||||
dk.data_path = Path(
|
||||
dk.full_path
|
||||
/
|
||||
f"sub-train-{metadata['pair'].split('/')[0]}_{trained_timestamp_int}"
|
||||
dk.full_path / f"sub-train-{pair.split('/')[0]}_{trained_timestamp_int}"
|
||||
)
|
||||
|
||||
dk.set_new_model_names(metadata["pair"], trained_timestamp)
|
||||
dk.set_new_model_names(pair, trained_timestamp)
|
||||
|
||||
if dk.check_if_backtest_prediction_exists():
|
||||
self.dd.load_metadata(dk)
|
||||
self.check_if_feature_list_matches_strategy(dataframe_train, dk)
|
||||
append_df = dk.get_backtesting_prediction()
|
||||
dk.append_predictions(append_df)
|
||||
else:
|
||||
if not self.model_exists(
|
||||
metadata["pair"], dk, trained_timestamp=trained_timestamp_int
|
||||
pair, dk, trained_timestamp=trained_timestamp_int
|
||||
):
|
||||
dk.find_features(dataframe_train)
|
||||
self.model = self.train(dataframe_train, metadata["pair"], dk)
|
||||
self.dd.pair_dict[metadata["pair"]]["trained_timestamp"] = int(
|
||||
self.model = self.train(dataframe_train, pair, dk)
|
||||
self.dd.pair_dict[pair]["trained_timestamp"] = int(
|
||||
trained_timestamp.stopts)
|
||||
|
||||
if self.save_backtest_models:
|
||||
logger.info('Saving backtest model to disk.')
|
||||
self.dd.save_data(self.model, metadata["pair"], dk)
|
||||
self.dd.save_data(self.model, pair, dk)
|
||||
else:
|
||||
self.model = self.dd.load_data(metadata["pair"], dk)
|
||||
self.model = self.dd.load_data(pair, dk)
|
||||
|
||||
self.check_if_feature_list_matches_strategy(dataframe_train, dk)
|
||||
|
||||
@@ -416,14 +431,16 @@ class IFreqaiModel(ABC):
|
||||
if "training_features_list_raw" in dk.data:
|
||||
feature_list = dk.data["training_features_list_raw"]
|
||||
else:
|
||||
feature_list = dk.training_features_list
|
||||
feature_list = dk.data['training_features_list']
|
||||
if dk.training_features_list != feature_list:
|
||||
raise OperationalException(
|
||||
"Trying to access pretrained model with `identifier` "
|
||||
"but found different features furnished by current strategy."
|
||||
"Change `identifier` to train from scratch, or ensure the"
|
||||
"strategy is furnishing the same features as the pretrained"
|
||||
"model"
|
||||
"model. In case of --strategy-list, please be aware that FreqAI "
|
||||
"requires all strategies to maintain identical "
|
||||
"populate_any_indicator() functions"
|
||||
)
|
||||
|
||||
def data_cleaning_train(self, dk: FreqaiDataKitchen) -> None:
|
||||
@@ -557,11 +574,11 @@ class IFreqaiModel(ABC):
|
||||
|
||||
self.dd.pair_dict[pair]["trained_timestamp"] = new_trained_timerange.stopts
|
||||
dk.set_new_model_names(pair, new_trained_timerange)
|
||||
self.dd.pair_dict[pair]["first"] = False
|
||||
if self.dd.pair_dict[pair]["priority"] == 1 and self.scanning:
|
||||
self.dd.pair_to_end_of_training_queue(pair)
|
||||
self.dd.save_data(model, pair, dk)
|
||||
|
||||
if self.freqai_info["feature_parameters"].get("plot_feature_importance", False):
|
||||
plot_feature_importance(model, pair, dk)
|
||||
|
||||
if self.freqai_info.get("purge_old_models", False):
|
||||
self.dd.purge_old_models()
|
||||
|
||||
@@ -685,6 +702,32 @@ class IFreqaiModel(ABC):
|
||||
|
||||
return init_model
|
||||
|
||||
def _set_train_queue(self):
|
||||
"""
|
||||
Sets train queue from existing train timestamps if they exist
|
||||
otherwise it sets the train queue based on the provided whitelist.
|
||||
"""
|
||||
current_pairlist = self.config.get("exchange", {}).get("pair_whitelist")
|
||||
if not self.dd.pair_dict:
|
||||
logger.info('Set fresh train queue from whitelist. '
|
||||
f'Queue: {current_pairlist}')
|
||||
return deque(current_pairlist)
|
||||
|
||||
best_queue = deque()
|
||||
|
||||
pair_dict_sorted = sorted(self.dd.pair_dict.items(),
|
||||
key=lambda k: k[1]['trained_timestamp'])
|
||||
for pair in pair_dict_sorted:
|
||||
if pair[0] in current_pairlist:
|
||||
best_queue.append(pair[0])
|
||||
for pair in current_pairlist:
|
||||
if pair not in best_queue:
|
||||
best_queue.appendleft(pair)
|
||||
|
||||
logger.info('Set existing queue from trained timestamps. '
|
||||
f'Best approximation queue: {best_queue}')
|
||||
return best_queue
|
||||
|
||||
def spice_rack(self, indicator: str, dataframe: DataFrame,
|
||||
metadata: dict, strategy: IStrategy) -> NDArray:
|
||||
if not self.spice_rack_open:
|
||||
|
@@ -10,11 +10,13 @@ from scipy.signal import argrelextrema
|
||||
from technical import qtpylib
|
||||
|
||||
from freqtrade.configuration import TimeRange
|
||||
from freqtrade.constants import Config
|
||||
from freqtrade.data.dataprovider import DataProvider
|
||||
from freqtrade.data.history.history_utils import refresh_backtest_ohlcv_data
|
||||
from freqtrade.exceptions import OperationalException
|
||||
from freqtrade.exchange import Exchange, timeframe_to_seconds
|
||||
from freqtrade.exchange.exchange import market_is_active
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
from freqtrade.plugins.pairlist.pairlist_helpers import dynamic_expand_pairlist
|
||||
from freqtrade.strategy import merge_informative_pair
|
||||
|
||||
@@ -22,7 +24,7 @@ from freqtrade.strategy import merge_informative_pair
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def download_all_data_for_training(dp: DataProvider, config: dict) -> None:
|
||||
def download_all_data_for_training(dp: DataProvider, config: Config) -> None:
|
||||
"""
|
||||
Called only once upon start of bot to download the necessary data for
|
||||
populating indicators and training the model.
|
||||
@@ -56,9 +58,7 @@ def download_all_data_for_training(dp: DataProvider, config: dict) -> None:
|
||||
)
|
||||
|
||||
|
||||
def get_required_data_timerange(
|
||||
config: dict
|
||||
) -> TimeRange:
|
||||
def get_required_data_timerange(config: Config) -> TimeRange:
|
||||
"""
|
||||
Used to compute the required data download time range
|
||||
for auto data-download in FreqAI
|
||||
@@ -226,7 +226,7 @@ def setup_freqai_spice_rack(config: dict, exchange: Optional[Exchange]) -> Dict[
|
||||
return config
|
||||
|
||||
# Keep below for when we wish to download heterogeneously lengthed data for FreqAI.
|
||||
# def download_all_data_for_training(dp: DataProvider, config: dict) -> None:
|
||||
# def download_all_data_for_training(dp: DataProvider, config: Config) -> None:
|
||||
# """
|
||||
# Called only once upon start of bot to download the necessary data for
|
||||
# populating indicators and training a FreqAI model.
|
||||
@@ -272,3 +272,58 @@ def setup_freqai_spice_rack(config: dict, exchange: Optional[Exchange]) -> Dict[
|
||||
# trading_mode=config.get("trading_mode", "spot"),
|
||||
# prepend=config.get("prepend_data", False),
|
||||
# )
|
||||
|
||||
|
||||
def plot_feature_importance(model: Any, pair: str, dk: FreqaiDataKitchen,
|
||||
count_max: int = 25) -> None:
|
||||
"""
|
||||
Plot Best and worst features by importance for a single sub-train.
|
||||
:param model: Any = A model which was `fit` using a common library
|
||||
such as catboost or lightgbm
|
||||
:param pair: str = pair e.g. BTC/USD
|
||||
:param dk: FreqaiDataKitchen = non-persistent data container for current coin/loop
|
||||
:param count_max: int = the amount of features to be loaded per column
|
||||
"""
|
||||
from freqtrade.plot.plotting import go, make_subplots, store_plot_file
|
||||
|
||||
# Extract feature importance from model
|
||||
models = {}
|
||||
if 'FreqaiMultiOutputRegressor' in str(model.__class__):
|
||||
for estimator, label in zip(model.estimators_, dk.label_list):
|
||||
models[label] = estimator
|
||||
else:
|
||||
models[dk.label_list[0]] = model
|
||||
|
||||
for label in models:
|
||||
mdl = models[label]
|
||||
if "catboost.core" in str(mdl.__class__):
|
||||
feature_importance = mdl.get_feature_importance()
|
||||
elif "lightgbm.sklearn" or "xgb" in str(mdl.__class__):
|
||||
feature_importance = mdl.feature_importances_
|
||||
else:
|
||||
logger.info('Model type not support for generating feature importances.')
|
||||
return
|
||||
|
||||
# Data preparation
|
||||
fi_df = pd.DataFrame({
|
||||
"feature_names": np.array(dk.training_features_list),
|
||||
"feature_importance": np.array(feature_importance)
|
||||
})
|
||||
fi_df_top = fi_df.nlargest(count_max, "feature_importance")[::-1]
|
||||
fi_df_worst = fi_df.nsmallest(count_max, "feature_importance")[::-1]
|
||||
|
||||
# Plotting
|
||||
def add_feature_trace(fig, fi_df, col):
|
||||
return fig.add_trace(
|
||||
go.Bar(
|
||||
x=fi_df["feature_importance"],
|
||||
y=fi_df["feature_names"],
|
||||
orientation='h', showlegend=False
|
||||
), row=1, col=col
|
||||
)
|
||||
fig = make_subplots(rows=1, cols=2, horizontal_spacing=0.5)
|
||||
fig = add_feature_trace(fig, fi_df_top, 1)
|
||||
fig = add_feature_trace(fig, fi_df_worst, 2)
|
||||
fig.update_layout(title_text=f"Best and worst features by importance {pair}")
|
||||
label = label.replace('&', '').replace('%', '') # escape two FreqAI specific characters
|
||||
store_plot_file(fig, f"{dk.model_filename}-{label}.html", dk.data_path)
|
||||
|
Reference in New Issue
Block a user