give user ability to analyze live trade dataframe inside custom prediction model. Add documentation to explain new functionality

This commit is contained in:
robcaulk
2022-08-02 20:14:02 +02:00
parent 895ebbfd18
commit 95d3009a95
4 changed files with 147 additions and 13 deletions

View File

@@ -39,7 +39,7 @@ class FreqaiDataDrawer:
Robert Caulk @robcaulk
Theoretical brainstorming:
Elin Törnquist @thorntwig
Elin Törnquist @th0rntwig
Code review, software architecture brainstorming:
@xmatthias
@@ -238,6 +238,11 @@ class FreqaiDataDrawer:
mrv_df["do_predict"] = do_preds
if dk.data['extra_returns_per_train']:
rets = dk.data['extra_returns_per_train']
for return_str in rets:
mrv_df[return_str] = rets[return_str]
# for keras type models, the conv_window needs to be prepended so
# viewing is correct in frequi
if self.freqai_info.get('keras', False):
@@ -282,9 +287,15 @@ class FreqaiDataDrawer:
if self.freqai_info["feature_parameters"].get("DI_threshold", 0) > 0:
df["DI_values"].iloc[-1] = dk.DI_values[-1]
if dk.data['extra_returns_per_train']:
rets = dk.data['extra_returns_per_train']
for return_str in rets:
df[return_str].iloc[-1] = rets[return_str]
# append the new predictions to persistent storage
if pair in self.historic_predictions:
self.historic_predictions[pair].iloc[-1] = df[label].iloc[-1]
for key in df.keys():
self.historic_predictions[pair][key].iloc[-1] = df[key].iloc[-1]
if length_difference < 0:
prepend_df = pd.DataFrame(
@@ -320,7 +331,12 @@ class FreqaiDataDrawer:
dataframe["do_predict"] = 0
if self.freqai_info["feature_parameters"].get("DI_threshold", 0) > 0:
dataframe["DI_value"] = 0
dataframe["DI_values"] = 0
if dk.data['extra_returns_per_train']:
rets = dk.data['extra_returns_per_train']
for return_str in rets:
dataframe[return_str] = 0
dk.return_dataframe = dataframe

View File

@@ -2,6 +2,7 @@ import copy
import datetime
import logging
import shutil
import sqlite3
from pathlib import Path
from typing import Any, Dict, List, Tuple
@@ -39,7 +40,7 @@ class FreqaiDataKitchen:
Robert Caulk @robcaulk
Theoretical brainstorming:
Elin Törnquist @thorntwig
Elin Törnquist @th0rntwig
Code review, software architecture brainstorming:
@xmatthias
@@ -84,6 +85,12 @@ class FreqaiDataKitchen:
config["freqai"]["backtest_period_days"],
)
db_url = self.config.get('db_url', None)
self.database_path = '' if db_url == 'sqlite://' else str(db_url).split('///')[1]
self.trade_database_df: DataFrame = pd.DataFrame()
self.data['extra_returns_per_train'] = self.freqai_config.get('extra_returns_per_train', {})
def set_paths(
self,
pair: str,
@@ -101,7 +108,7 @@ class FreqaiDataKitchen:
self.data_path = Path(
self.full_path
/ str("sub-train" + "-" + pair.split("/")[0] + "_" + str(trained_timestamp))
/ f"sub-train-{pair.split('/')[0]}_{trained_timestamp}"
)
return
@@ -328,7 +335,7 @@ class FreqaiDataKitchen:
"""
for label in self.label_list:
if df[label].dtype == str:
if df[label].dtype == object:
continue
df[label] = (
(df[label] + 1)
@@ -493,7 +500,6 @@ class FreqaiDataKitchen:
tc = self.freqai_config.get("model_training_parameters", {}).get("thread_count", -1)
pairwise = pairwise_distances(self.data_dictionary["train_features"], n_jobs=tc)
avg_mean_dist = pairwise.mean(axis=1).mean()
logger.info(f"avg_mean_dist {avg_mean_dist:.2f}")
return avg_mean_dist
@@ -599,10 +605,11 @@ class FreqaiDataKitchen:
from the training data set.
"""
tc = self.freqai_config.get("model_training_parameters", {}).get("thread_count", -1)
distance = pairwise_distances(
self.data_dictionary["train_features"],
self.data_dictionary["prediction_features"],
n_jobs=-1,
n_jobs=tc,
)
self.DI_values = distance.min(axis=0) / self.data["avg_mean_dist"]
@@ -946,6 +953,19 @@ class FreqaiDataKitchen:
]
return dataframe[to_keep]
def get_current_trade_database(self) -> None:
if self.database_path == '':
logger.warning('No trade databse found. Skipping analysis.')
return
data = sqlite3.connect(self.database_path)
query = data.execute("SELECT * From trades")
cols = [column[0] for column in query.description]
df = pd.DataFrame.from_records(data=query.fetchall(), columns=cols)
self.trade_database_df = df.dropna(subset='close_date')
data.close()
def np_encoder(self, object):
if isinstance(object, np.generic):
return object.item()

View File

@@ -1,5 +1,4 @@
# import contextlib
import copy
import datetime
import logging
import shutil
@@ -46,7 +45,7 @@ class IFreqaiModel(ABC):
Robert Caulk @robcaulk
Theoretical brainstorming:
Elin Törnquist @thorntwig
Elin Törnquist @th0rntwig
Code review, software architecture brainstorming:
@xmatthias
@@ -81,6 +80,8 @@ class IFreqaiModel(ABC):
self.CONV_WIDTH = self.freqai_info.get("conv_width", 2)
self.pair_it = 0
self.total_pairs = len(self.config.get("exchange", {}).get("pair_whitelist"))
self.last_trade_database_summary: DataFrame = {}
self.current_trade_database_summary: DataFrame = {}
def assert_config(self, config: Dict[str, Any]) -> None:
@@ -479,6 +480,9 @@ class IFreqaiModel(ABC):
model = self.train(unfiltered_dataframe, pair, dk)
dk.get_current_trade_database()
self.analyze_trade_database(dk, pair)
self.dd.pair_dict[pair]["trained_timestamp"] = new_trained_timerange.stopts
dk.set_new_model_names(pair, new_trained_timerange)
self.dd.pair_dict[pair]["first"] = False
@@ -493,13 +497,50 @@ class IFreqaiModel(ABC):
def set_initial_historic_predictions(
self, df: DataFrame, model: Any, dk: FreqaiDataKitchen, pair: str
) -> None:
trained_predictions = model.predict(df)
"""
This function is called only if the datadrawer failed to load an
existing set of historic predictions. In this case, it builds
the structure and sets fake predictions off the first training
data. After that, FreqAI will append new real predictions to the
set of historic predictions.
These values are used to generate live statistics which can be used
in the strategy for adaptive values. E.g. &*_mean/std are quantities
that can computed based on live predictions from the set of historical
predictions. Those values can be used in the user strategy to better
assess prediction rarity, and thus wait for probabilistically favorable
entries relative to the live historical predictions.
If the user reuses an identifier on a subsequent instance,
this function will not be called. In that case, "real" predictions
will be appended to the loaded set of historic predictions.
:param: df: DataFrame = the dataframe containing the training feature data
:param: model: Any = A model which was `fit` using a common librariy such as
catboost or lightgbm
:param: dk: FreqaiDataKitchen = object containing methods for data analysis
:param: pair: str = current pair
"""
num_candles = self.freqai_info.get('fit_live_predictions_candles', 600)
df_tail = df.tail(num_candles)
trained_predictions = model.predict(df_tail)
pred_df = DataFrame(trained_predictions, columns=dk.label_list)
pred_df = dk.denormalize_labels_from_metadata(pred_df)
self.dd.historic_predictions[pair] = pd.DataFrame()
self.dd.historic_predictions[pair] = copy.deepcopy(pred_df)
self.dd.historic_predictions[pair] = pred_df
hist_preds_df = self.dd.historic_predictions[pair]
hist_preds_df['do_predict'] = 0
if self.freqai_info['feature_parameters'].get('DI_threshold', 0) > 0:
hist_preds_df['DI_values'] = 0
for label in dk.data['labels_mean']:
hist_preds_df[f'{label}_mean'] = 0
hist_preds_df[f'{label}_std'] = 0
for return_str in dk.data['extra_returns_per_train']:
hist_preds_df[return_str] = 0
def fit_live_predictions(self, dk: FreqaiDataKitchen) -> None:
"""
@@ -565,3 +606,20 @@ class IFreqaiModel(ABC):
"""
return
def analyze_trade_database(self, dk: FreqaiDataKitchen, pair: str) -> None:
"""
User analyzes the trade database here and returns summary stats which will be passed back
to the strategy for reinforcement learning or for additional adaptive metrics for use
in entry/exit signals. Store these metrics in dk.data['extra_returns_per_train'] and
they will format themselves into the dataframe as an additional column in the user
strategy. User has access to the current trade database in dk.trade_database_df.
"""
if dk.trade_database_df.empty:
logger.warning(f'No trades found for {pair} to analyze DB')
return
total_profit = dk.trade_database_df['close_profit_abs'].sum()
dk.data['extra_returns_per_train']['total_profit'] = total_profit
return