remove trade database analyzer, clean up a bit
This commit is contained in:
parent
91d0c91287
commit
2cae3c42e6
@ -663,79 +663,13 @@ The user needs to set the standard dictionary in the config so FreqAI can return
|
|||||||
These values will likely be overridden by the user prediction model, but in the case where the user model has yet to set them, or needs
|
These values will likely be overridden by the user prediction model, but in the case where the user model has yet to set them, or needs
|
||||||
a default initial value - this is the value that will be returned.
|
a default initial value - this is the value that will be returned.
|
||||||
|
|
||||||
## Analyzing the trade live database
|
|
||||||
|
|
||||||
Users can analyze the live trade database by calling `analyze_trade_database()` in their custom prediction model. FreqAI already has the
|
|
||||||
database setup in a pandas dataframe and ready to be analyzed. Here is an example usecase:
|
|
||||||
|
|
||||||
```python
|
|
||||||
def analyze_trade_database(self, dk: FreqaiDataKitchen, pair: str) -> None:
|
|
||||||
"""
|
|
||||||
User analyzes the trade database here and returns summary stats which will be passed back
|
|
||||||
to the strategy for reinforcement learning or for additional adaptive metrics for use
|
|
||||||
in entry/exit signals. Store these metrics in dk.data['extra_returns_per_train'] and
|
|
||||||
they will format themselves into the dataframe as an additional column in the user
|
|
||||||
strategy. User has access to the current trade database in dk.trade_database_df.
|
|
||||||
"""
|
|
||||||
total_profit = dk.trade_database_df['close_profit_abs'].sum()
|
|
||||||
dk.data['extra_returns_per_train']['total_profit'] = total_profit
|
|
||||||
|
|
||||||
return
|
|
||||||
```
|
|
||||||
## Building an IFreqaiModel
|
## Building an IFreqaiModel
|
||||||
|
|
||||||
FreqAI has multiple example prediction model based libraries such as `Catboost` regression (`freqai/prediction_models/CatboostRegressor.py`) and `LightGBM` regression.
|
FreqAI has multiple example prediction model based libraries such as `Catboost` regression (`freqai/prediction_models/CatboostRegressor.py`) and `LightGBM` regression.
|
||||||
However, users can customize and create their own prediction models using the `IFreqaiModel` class.
|
However, users can customize and create their own prediction models using the `IFreqaiModel` class.
|
||||||
Users are encouraged to inherit `train()` and `predict()` to let them customize various aspects of their training procedures.
|
Users are encouraged to inherit `train()` and `predict()` to let them customize various aspects of their training procedures.
|
||||||
|
|
||||||
<!-- ## Dynamic target expectation
|
|
||||||
|
|
||||||
The labels used for model training have a unique statistical distribution for each separate model training.
|
|
||||||
We can use this information to know if our current prediction is in the realm of what the model was trained on,
|
|
||||||
and if so, what is the statistical probability of the current prediction. With this information, we can
|
|
||||||
make more informed prediction.
|
|
||||||
FreqAI builds this label distribution and provides a quantile to the strategy, which can be optionally used as a
|
|
||||||
dynamic threshold. The `target_quantile: X` means that X% of the labels are below this value. So setting:
|
|
||||||
|
|
||||||
```json
|
|
||||||
"freqai": {
|
|
||||||
"feature_parameters" : {
|
|
||||||
"target_quantile": 0.9
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
Means the user will get back in the strategy the label threshold at which 90% of the labels were
|
|
||||||
below this value. An example usage in the strategy may look something like:
|
|
||||||
|
|
||||||
```python
|
|
||||||
|
|
||||||
def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
|
|
||||||
|
|
||||||
# ... #
|
|
||||||
|
|
||||||
(
|
|
||||||
dataframe["prediction"],
|
|
||||||
dataframe["do_predict"],
|
|
||||||
dataframe["target_upper_quantile"],
|
|
||||||
dataframe["target_lower_quantile"],
|
|
||||||
) = self.freqai.start(dataframe, metadata, self)
|
|
||||||
|
|
||||||
return dataframe
|
|
||||||
|
|
||||||
def populate_buy_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
|
|
||||||
|
|
||||||
buy_conditions = [
|
|
||||||
(dataframe["prediction"] > dataframe["target_upper_quantile"]) & (dataframe["do_predict"] == 1)
|
|
||||||
]
|
|
||||||
|
|
||||||
if buy_conditions:
|
|
||||||
dataframe.loc[reduce(lambda x, y: x | y, buy_conditions), "buy"] = 1
|
|
||||||
|
|
||||||
return dataframe
|
|
||||||
|
|
||||||
``` -->
|
|
||||||
|
|
||||||
## Additional information
|
## Additional information
|
||||||
|
|
||||||
### Common pitfalls
|
### Common pitfalls
|
||||||
|
@ -2,9 +2,8 @@ import copy
|
|||||||
import datetime
|
import datetime
|
||||||
import logging
|
import logging
|
||||||
import shutil
|
import shutil
|
||||||
import sqlite3
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, List, Optional, Tuple
|
from typing import Any, Dict, List, Tuple
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import numpy.typing as npt
|
import numpy.typing as npt
|
||||||
@ -88,20 +87,6 @@ class FreqaiDataKitchen:
|
|||||||
config["freqai"]["backtest_period_days"],
|
config["freqai"]["backtest_period_days"],
|
||||||
)
|
)
|
||||||
|
|
||||||
self.database_path: Optional[Path] = None
|
|
||||||
|
|
||||||
if self.live:
|
|
||||||
db_url = self.config.get('db_url', None)
|
|
||||||
self.database_path = Path(db_url)
|
|
||||||
if 'sqlite' not in self.database_path.parts[0]:
|
|
||||||
self.database_path = None
|
|
||||||
logger.warning('FreqAI database analyzer only available for sqlite dbs. '
|
|
||||||
' FreqAI will still run, but user cannot use database analyzer.')
|
|
||||||
else:
|
|
||||||
self.database_name = Path(*self.database_path.parts[1:])
|
|
||||||
|
|
||||||
self.trade_database_df: DataFrame = pd.DataFrame()
|
|
||||||
|
|
||||||
self.data['extra_returns_per_train'] = self.freqai_config.get('extra_returns_per_train', {})
|
self.data['extra_returns_per_train'] = self.freqai_config.get('extra_returns_per_train', {})
|
||||||
self.thread_count = self.freqai_config.get("data_kitchen_thread_count", -1)
|
self.thread_count = self.freqai_config.get("data_kitchen_thread_count", -1)
|
||||||
self.train_dates: DataFrame = pd.DataFrame()
|
self.train_dates: DataFrame = pd.DataFrame()
|
||||||
@ -1007,13 +992,6 @@ class FreqaiDataKitchen:
|
|||||||
f = spy.stats.norm.fit(self.data_dictionary["train_labels"][label])
|
f = spy.stats.norm.fit(self.data_dictionary["train_labels"][label])
|
||||||
self.data["labels_mean"][label], self.data["labels_std"][label] = f[0], f[1]
|
self.data["labels_mean"][label], self.data["labels_std"][label] = f[0], f[1]
|
||||||
|
|
||||||
# KEEPME incase we want to let user start to grab quantiles.
|
|
||||||
# upper_q = spy.stats.norm.ppf(self.freqai_config['feature_parameters'][
|
|
||||||
# 'target_quantile'], *f)
|
|
||||||
# lower_q = spy.stats.norm.ppf(1 - self.freqai_config['feature_parameters'][
|
|
||||||
# 'target_quantile'], *f)
|
|
||||||
# self.data["upper_quantile"] = upper_q
|
|
||||||
# self.data["lower_quantile"] = lower_q
|
|
||||||
return
|
return
|
||||||
|
|
||||||
def remove_features_from_df(self, dataframe: DataFrame) -> DataFrame:
|
def remove_features_from_df(self, dataframe: DataFrame) -> DataFrame:
|
||||||
@ -1025,181 +1003,3 @@ class FreqaiDataKitchen:
|
|||||||
col for col in dataframe.columns if not col.startswith("%") or col.startswith("%%")
|
col for col in dataframe.columns if not col.startswith("%") or col.startswith("%%")
|
||||||
]
|
]
|
||||||
return dataframe[to_keep]
|
return dataframe[to_keep]
|
||||||
|
|
||||||
def get_current_trade_database(self) -> None:
|
|
||||||
|
|
||||||
if self.database_path is None:
|
|
||||||
logger.warning('No trade database found. Skipping analysis.')
|
|
||||||
return
|
|
||||||
|
|
||||||
data = sqlite3.connect(self.database_name)
|
|
||||||
query = data.execute("SELECT * From trades")
|
|
||||||
cols = [column[0] for column in query.description]
|
|
||||||
df = pd.DataFrame.from_records(data=query.fetchall(), columns=cols)
|
|
||||||
self.trade_database_df = df.dropna(subset='close_date')
|
|
||||||
data.close()
|
|
||||||
|
|
||||||
def np_encoder(self, object):
|
|
||||||
if isinstance(object, np.generic):
|
|
||||||
return object.item()
|
|
||||||
|
|
||||||
# Functions containing useful data manipulation examples. but not actively in use.
|
|
||||||
|
|
||||||
# Possibly phasing these outlier removal methods below out in favor of
|
|
||||||
# use_SVM_to_remove_outliers (computationally more efficient and apparently higher performance).
|
|
||||||
# But these have good data manipulation examples, so keep them commented here for now.
|
|
||||||
|
|
||||||
# def determine_statistical_distributions(self) -> None:
|
|
||||||
# from fitter import Fitter
|
|
||||||
|
|
||||||
# logger.info('Determining best model for all features, may take some time')
|
|
||||||
|
|
||||||
# def compute_quantiles(ft):
|
|
||||||
# f = Fitter(self.data_dictionary["train_features"][ft],
|
|
||||||
# distributions=['gamma', 'cauchy', 'laplace',
|
|
||||||
# 'beta', 'uniform', 'lognorm'])
|
|
||||||
# f.fit()
|
|
||||||
# # f.summary()
|
|
||||||
# dist = list(f.get_best().items())[0][0]
|
|
||||||
# params = f.get_best()[dist]
|
|
||||||
# upper_q = getattr(spy.stats, list(f.get_best().items())[0][0]).ppf(0.999, **params)
|
|
||||||
# lower_q = getattr(spy.stats, list(f.get_best().items())[0][0]).ppf(0.001, **params)
|
|
||||||
|
|
||||||
# return ft, upper_q, lower_q, dist
|
|
||||||
|
|
||||||
# quantiles_tuple = Parallel(n_jobs=-1)(
|
|
||||||
# delayed(compute_quantiles)(ft) for ft in self.data_dictionary[
|
|
||||||
# 'train_features'].columns)
|
|
||||||
|
|
||||||
# df = pd.DataFrame(quantiles_tuple, columns=['features', 'upper_quantiles',
|
|
||||||
# 'lower_quantiles', 'dist'])
|
|
||||||
# self.data_dictionary['upper_quantiles'] = df['upper_quantiles']
|
|
||||||
# self.data_dictionary['lower_quantiles'] = df['lower_quantiles']
|
|
||||||
|
|
||||||
# return
|
|
||||||
|
|
||||||
# def remove_outliers(self, predict: bool) -> None:
|
|
||||||
# """
|
|
||||||
# Remove data that looks like an outlier based on the distribution of each
|
|
||||||
# variable.
|
|
||||||
# :params:
|
|
||||||
# :predict: boolean which tells the function if this is prediction data or
|
|
||||||
# training data coming in.
|
|
||||||
# """
|
|
||||||
|
|
||||||
# lower_quantile = self.data_dictionary["lower_quantiles"].to_numpy()
|
|
||||||
# upper_quantile = self.data_dictionary["upper_quantiles"].to_numpy()
|
|
||||||
|
|
||||||
# if predict:
|
|
||||||
|
|
||||||
# df = self.data_dictionary["prediction_features"][
|
|
||||||
# (self.data_dictionary["prediction_features"] < upper_quantile)
|
|
||||||
# & (self.data_dictionary["prediction_features"] > lower_quantile)
|
|
||||||
# ]
|
|
||||||
# drop_index = pd.isnull(df).any(1)
|
|
||||||
# self.data_dictionary["prediction_features"].fillna(0, inplace=True)
|
|
||||||
# drop_index = ~drop_index
|
|
||||||
# do_predict = np.array(drop_index.replace(True, 1).replace(False, 0))
|
|
||||||
|
|
||||||
# logger.info(
|
|
||||||
# "remove_outliers() tossed %s predictions",
|
|
||||||
# len(do_predict) - do_predict.sum(),
|
|
||||||
# )
|
|
||||||
# self.do_predict += do_predict
|
|
||||||
# self.do_predict -= 1
|
|
||||||
|
|
||||||
# else:
|
|
||||||
|
|
||||||
# filter_train_df = self.data_dictionary["train_features"][
|
|
||||||
# (self.data_dictionary["train_features"] < upper_quantile)
|
|
||||||
# & (self.data_dictionary["train_features"] > lower_quantile)
|
|
||||||
# ]
|
|
||||||
# drop_index = pd.isnull(filter_train_df).any(1)
|
|
||||||
# drop_index = drop_index.replace(True, 1).replace(False, 0)
|
|
||||||
# self.data_dictionary["train_features"] = self.data_dictionary["train_features"][
|
|
||||||
# (drop_index == 0)
|
|
||||||
# ]
|
|
||||||
# self.data_dictionary["train_labels"] = self.data_dictionary["train_labels"][
|
|
||||||
# (drop_index == 0)
|
|
||||||
# ]
|
|
||||||
# self.data_dictionary["train_weights"] = self.data_dictionary["train_weights"][
|
|
||||||
# (drop_index == 0)
|
|
||||||
# ]
|
|
||||||
|
|
||||||
# logger.info(
|
|
||||||
# f'remove_outliers() tossed {drop_index.sum()}'
|
|
||||||
# f' training points from {len(filter_train_df)}'
|
|
||||||
# )
|
|
||||||
|
|
||||||
# # do the same for the test data
|
|
||||||
# filter_test_df = self.data_dictionary["test_features"][
|
|
||||||
# (self.data_dictionary["test_features"] < upper_quantile)
|
|
||||||
# & (self.data_dictionary["test_features"] > lower_quantile)
|
|
||||||
# ]
|
|
||||||
# drop_index = pd.isnull(filter_test_df).any(1)
|
|
||||||
# drop_index = drop_index.replace(True, 1).replace(False, 0)
|
|
||||||
# self.data_dictionary["test_labels"] = self.data_dictionary["test_labels"][
|
|
||||||
# (drop_index == 0)
|
|
||||||
# ]
|
|
||||||
# self.data_dictionary["test_features"] = self.data_dictionary["test_features"][
|
|
||||||
# (drop_index == 0)
|
|
||||||
# ]
|
|
||||||
# self.data_dictionary["test_weights"] = self.data_dictionary["test_weights"][
|
|
||||||
# (drop_index == 0)
|
|
||||||
# ]
|
|
||||||
|
|
||||||
# logger.info(
|
|
||||||
# f'remove_outliers() tossed {drop_index.sum()}'
|
|
||||||
# f' test points from {len(filter_test_df)}'
|
|
||||||
# )
|
|
||||||
|
|
||||||
# return
|
|
||||||
|
|
||||||
# def standardize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
|
|
||||||
# """
|
|
||||||
# standardize all data in the data_dictionary according to the training dataset
|
|
||||||
# :params:
|
|
||||||
# :data_dictionary: dictionary containing the cleaned and split training/test data/labels
|
|
||||||
# :returns:
|
|
||||||
# :data_dictionary: updated dictionary with standardized values.
|
|
||||||
# """
|
|
||||||
# # standardize the data by training stats
|
|
||||||
# train_mean = data_dictionary["train_features"].mean()
|
|
||||||
# train_std = data_dictionary["train_features"].std()
|
|
||||||
# data_dictionary["train_features"] = (
|
|
||||||
# data_dictionary["train_features"] - train_mean
|
|
||||||
# ) / train_std
|
|
||||||
# data_dictionary["test_features"] = (
|
|
||||||
# data_dictionary["test_features"] - train_mean
|
|
||||||
# ) / train_std
|
|
||||||
|
|
||||||
# train_labels_std = data_dictionary["train_labels"].std()
|
|
||||||
# train_labels_mean = data_dictionary["train_labels"].mean()
|
|
||||||
# data_dictionary["train_labels"] = (
|
|
||||||
# data_dictionary["train_labels"] - train_labels_mean
|
|
||||||
# ) / train_labels_std
|
|
||||||
# data_dictionary["test_labels"] = (
|
|
||||||
# data_dictionary["test_labels"] - train_labels_mean
|
|
||||||
# ) / train_labels_std
|
|
||||||
|
|
||||||
# for item in train_std.keys():
|
|
||||||
# self.data[item + "_std"] = train_std[item]
|
|
||||||
# self.data[item + "_mean"] = train_mean[item]
|
|
||||||
|
|
||||||
# self.data["labels_std"] = train_labels_std
|
|
||||||
# self.data["labels_mean"] = train_labels_mean
|
|
||||||
|
|
||||||
# return data_dictionary
|
|
||||||
|
|
||||||
# def standardize_data_from_metadata(self, df: DataFrame) -> DataFrame:
|
|
||||||
# """
|
|
||||||
# Normalizes a set of data using the mean and standard deviation from
|
|
||||||
# the associated training data.
|
|
||||||
# :params:
|
|
||||||
# :df: Dataframe to be standardized
|
|
||||||
# """
|
|
||||||
|
|
||||||
# for item in df.keys():
|
|
||||||
# df[item] = (df[item] - self.data[item + "_mean"]) / self.data[item + "_std"]
|
|
||||||
|
|
||||||
# return df
|
|
||||||
|
@ -491,9 +491,6 @@ class IFreqaiModel(ABC):
|
|||||||
|
|
||||||
model = self.train(unfiltered_dataframe, pair, dk)
|
model = self.train(unfiltered_dataframe, pair, dk)
|
||||||
|
|
||||||
dk.get_current_trade_database()
|
|
||||||
self.analyze_trade_database(dk, pair)
|
|
||||||
|
|
||||||
self.dd.pair_dict[pair]["trained_timestamp"] = new_trained_timerange.stopts
|
self.dd.pair_dict[pair]["trained_timestamp"] = new_trained_timerange.stopts
|
||||||
dk.set_new_model_names(pair, new_trained_timerange)
|
dk.set_new_model_names(pair, new_trained_timerange)
|
||||||
self.dd.pair_dict[pair]["first"] = False
|
self.dd.pair_dict[pair]["first"] = False
|
||||||
@ -612,20 +609,3 @@ class IFreqaiModel(ABC):
|
|||||||
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
|
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
|
||||||
data (NaNs) or felt uncertain about data (i.e. SVM and/or DI index)
|
data (NaNs) or felt uncertain about data (i.e. SVM and/or DI index)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def analyze_trade_database(self, dk: FreqaiDataKitchen, pair: str) -> None:
|
|
||||||
"""
|
|
||||||
User analyzes the trade database here and returns summary stats which will be passed back
|
|
||||||
to the strategy for reinforcement learning or for additional adaptive metrics for use
|
|
||||||
in entry/exit signals. Store these metrics in dk.data['extra_returns_per_train'] and
|
|
||||||
they will format themselves into the dataframe as an additional column in the user
|
|
||||||
strategy. User has access to the current trade database in dk.trade_database_df.
|
|
||||||
"""
|
|
||||||
# if dk.trade_database_df.empty:
|
|
||||||
# logger.warning(f'No trades found for {pair} to analyze DB')
|
|
||||||
# return
|
|
||||||
|
|
||||||
# total_profit = dk.trade_database_df['close_profit_abs'].sum()
|
|
||||||
# dk.data['extra_returns_per_train']['total_profit'] = total_profit
|
|
||||||
|
|
||||||
return
|
|
||||||
|
Loading…
Reference in New Issue
Block a user