remove trade database analyzer, clean up a bit
This commit is contained in:
parent
91d0c91287
commit
2cae3c42e6
@ -663,79 +663,13 @@ The user needs to set the standard dictionary in the config so FreqAI can return
|
||||
These values will likely be overridden by the user prediction model, but in the case where the user model has yet to set them, or needs
|
||||
a default initial value - this is the value that will be returned.
|
||||
|
||||
## Analyzing the trade live database
|
||||
|
||||
Users can analyze the live trade database by calling `analyze_trade_database()` in their custom prediction model. FreqAI already has the
|
||||
database setup in a pandas dataframe and ready to be analyzed. Here is an example usecase:
|
||||
|
||||
```python
|
||||
def analyze_trade_database(self, dk: FreqaiDataKitchen, pair: str) -> None:
|
||||
"""
|
||||
User analyzes the trade database here and returns summary stats which will be passed back
|
||||
to the strategy for reinforcement learning or for additional adaptive metrics for use
|
||||
in entry/exit signals. Store these metrics in dk.data['extra_returns_per_train'] and
|
||||
they will format themselves into the dataframe as an additional column in the user
|
||||
strategy. User has access to the current trade database in dk.trade_database_df.
|
||||
"""
|
||||
total_profit = dk.trade_database_df['close_profit_abs'].sum()
|
||||
dk.data['extra_returns_per_train']['total_profit'] = total_profit
|
||||
|
||||
return
|
||||
```
|
||||
## Building an IFreqaiModel
|
||||
|
||||
FreqAI has multiple example prediction model based libraries such as `Catboost` regression (`freqai/prediction_models/CatboostRegressor.py`) and `LightGBM` regression.
|
||||
However, users can customize and create their own prediction models using the `IFreqaiModel` class.
|
||||
Users are encouraged to inherit `train()` and `predict()` to let them customize various aspects of their training procedures.
|
||||
|
||||
<!-- ## Dynamic target expectation
|
||||
|
||||
The labels used for model training have a unique statistical distribution for each separate model training.
|
||||
We can use this information to know if our current prediction is in the realm of what the model was trained on,
|
||||
and if so, what is the statistical probability of the current prediction. With this information, we can
|
||||
make more informed prediction.
|
||||
FreqAI builds this label distribution and provides a quantile to the strategy, which can be optionally used as a
|
||||
dynamic threshold. The `target_quantile: X` means that X% of the labels are below this value. So setting:
|
||||
|
||||
```json
|
||||
"freqai": {
|
||||
"feature_parameters" : {
|
||||
"target_quantile": 0.9
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Means the user will get back in the strategy the label threshold at which 90% of the labels were
|
||||
below this value. An example usage in the strategy may look something like:
|
||||
|
||||
```python
|
||||
|
||||
def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
|
||||
|
||||
# ... #
|
||||
|
||||
(
|
||||
dataframe["prediction"],
|
||||
dataframe["do_predict"],
|
||||
dataframe["target_upper_quantile"],
|
||||
dataframe["target_lower_quantile"],
|
||||
) = self.freqai.start(dataframe, metadata, self)
|
||||
|
||||
return dataframe
|
||||
|
||||
def populate_buy_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
|
||||
|
||||
buy_conditions = [
|
||||
(dataframe["prediction"] > dataframe["target_upper_quantile"]) & (dataframe["do_predict"] == 1)
|
||||
]
|
||||
|
||||
if buy_conditions:
|
||||
dataframe.loc[reduce(lambda x, y: x | y, buy_conditions), "buy"] = 1
|
||||
|
||||
return dataframe
|
||||
|
||||
``` -->
|
||||
|
||||
## Additional information
|
||||
|
||||
### Common pitfalls
|
||||
|
@ -2,9 +2,8 @@ import copy
|
||||
import datetime
|
||||
import logging
|
||||
import shutil
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from typing import Any, Dict, List, Tuple
|
||||
|
||||
import numpy as np
|
||||
import numpy.typing as npt
|
||||
@ -88,20 +87,6 @@ class FreqaiDataKitchen:
|
||||
config["freqai"]["backtest_period_days"],
|
||||
)
|
||||
|
||||
self.database_path: Optional[Path] = None
|
||||
|
||||
if self.live:
|
||||
db_url = self.config.get('db_url', None)
|
||||
self.database_path = Path(db_url)
|
||||
if 'sqlite' not in self.database_path.parts[0]:
|
||||
self.database_path = None
|
||||
logger.warning('FreqAI database analyzer only available for sqlite dbs. '
|
||||
' FreqAI will still run, but user cannot use database analyzer.')
|
||||
else:
|
||||
self.database_name = Path(*self.database_path.parts[1:])
|
||||
|
||||
self.trade_database_df: DataFrame = pd.DataFrame()
|
||||
|
||||
self.data['extra_returns_per_train'] = self.freqai_config.get('extra_returns_per_train', {})
|
||||
self.thread_count = self.freqai_config.get("data_kitchen_thread_count", -1)
|
||||
self.train_dates: DataFrame = pd.DataFrame()
|
||||
@ -1007,13 +992,6 @@ class FreqaiDataKitchen:
|
||||
f = spy.stats.norm.fit(self.data_dictionary["train_labels"][label])
|
||||
self.data["labels_mean"][label], self.data["labels_std"][label] = f[0], f[1]
|
||||
|
||||
# KEEPME incase we want to let user start to grab quantiles.
|
||||
# upper_q = spy.stats.norm.ppf(self.freqai_config['feature_parameters'][
|
||||
# 'target_quantile'], *f)
|
||||
# lower_q = spy.stats.norm.ppf(1 - self.freqai_config['feature_parameters'][
|
||||
# 'target_quantile'], *f)
|
||||
# self.data["upper_quantile"] = upper_q
|
||||
# self.data["lower_quantile"] = lower_q
|
||||
return
|
||||
|
||||
def remove_features_from_df(self, dataframe: DataFrame) -> DataFrame:
|
||||
@ -1025,181 +1003,3 @@ class FreqaiDataKitchen:
|
||||
col for col in dataframe.columns if not col.startswith("%") or col.startswith("%%")
|
||||
]
|
||||
return dataframe[to_keep]
|
||||
|
||||
def get_current_trade_database(self) -> None:
|
||||
|
||||
if self.database_path is None:
|
||||
logger.warning('No trade database found. Skipping analysis.')
|
||||
return
|
||||
|
||||
data = sqlite3.connect(self.database_name)
|
||||
query = data.execute("SELECT * From trades")
|
||||
cols = [column[0] for column in query.description]
|
||||
df = pd.DataFrame.from_records(data=query.fetchall(), columns=cols)
|
||||
self.trade_database_df = df.dropna(subset='close_date')
|
||||
data.close()
|
||||
|
||||
def np_encoder(self, object):
|
||||
if isinstance(object, np.generic):
|
||||
return object.item()
|
||||
|
||||
# Functions containing useful data manipulation examples. but not actively in use.
|
||||
|
||||
# Possibly phasing these outlier removal methods below out in favor of
|
||||
# use_SVM_to_remove_outliers (computationally more efficient and apparently higher performance).
|
||||
# But these have good data manipulation examples, so keep them commented here for now.
|
||||
|
||||
# def determine_statistical_distributions(self) -> None:
|
||||
# from fitter import Fitter
|
||||
|
||||
# logger.info('Determining best model for all features, may take some time')
|
||||
|
||||
# def compute_quantiles(ft):
|
||||
# f = Fitter(self.data_dictionary["train_features"][ft],
|
||||
# distributions=['gamma', 'cauchy', 'laplace',
|
||||
# 'beta', 'uniform', 'lognorm'])
|
||||
# f.fit()
|
||||
# # f.summary()
|
||||
# dist = list(f.get_best().items())[0][0]
|
||||
# params = f.get_best()[dist]
|
||||
# upper_q = getattr(spy.stats, list(f.get_best().items())[0][0]).ppf(0.999, **params)
|
||||
# lower_q = getattr(spy.stats, list(f.get_best().items())[0][0]).ppf(0.001, **params)
|
||||
|
||||
# return ft, upper_q, lower_q, dist
|
||||
|
||||
# quantiles_tuple = Parallel(n_jobs=-1)(
|
||||
# delayed(compute_quantiles)(ft) for ft in self.data_dictionary[
|
||||
# 'train_features'].columns)
|
||||
|
||||
# df = pd.DataFrame(quantiles_tuple, columns=['features', 'upper_quantiles',
|
||||
# 'lower_quantiles', 'dist'])
|
||||
# self.data_dictionary['upper_quantiles'] = df['upper_quantiles']
|
||||
# self.data_dictionary['lower_quantiles'] = df['lower_quantiles']
|
||||
|
||||
# return
|
||||
|
||||
# def remove_outliers(self, predict: bool) -> None:
|
||||
# """
|
||||
# Remove data that looks like an outlier based on the distribution of each
|
||||
# variable.
|
||||
# :params:
|
||||
# :predict: boolean which tells the function if this is prediction data or
|
||||
# training data coming in.
|
||||
# """
|
||||
|
||||
# lower_quantile = self.data_dictionary["lower_quantiles"].to_numpy()
|
||||
# upper_quantile = self.data_dictionary["upper_quantiles"].to_numpy()
|
||||
|
||||
# if predict:
|
||||
|
||||
# df = self.data_dictionary["prediction_features"][
|
||||
# (self.data_dictionary["prediction_features"] < upper_quantile)
|
||||
# & (self.data_dictionary["prediction_features"] > lower_quantile)
|
||||
# ]
|
||||
# drop_index = pd.isnull(df).any(1)
|
||||
# self.data_dictionary["prediction_features"].fillna(0, inplace=True)
|
||||
# drop_index = ~drop_index
|
||||
# do_predict = np.array(drop_index.replace(True, 1).replace(False, 0))
|
||||
|
||||
# logger.info(
|
||||
# "remove_outliers() tossed %s predictions",
|
||||
# len(do_predict) - do_predict.sum(),
|
||||
# )
|
||||
# self.do_predict += do_predict
|
||||
# self.do_predict -= 1
|
||||
|
||||
# else:
|
||||
|
||||
# filter_train_df = self.data_dictionary["train_features"][
|
||||
# (self.data_dictionary["train_features"] < upper_quantile)
|
||||
# & (self.data_dictionary["train_features"] > lower_quantile)
|
||||
# ]
|
||||
# drop_index = pd.isnull(filter_train_df).any(1)
|
||||
# drop_index = drop_index.replace(True, 1).replace(False, 0)
|
||||
# self.data_dictionary["train_features"] = self.data_dictionary["train_features"][
|
||||
# (drop_index == 0)
|
||||
# ]
|
||||
# self.data_dictionary["train_labels"] = self.data_dictionary["train_labels"][
|
||||
# (drop_index == 0)
|
||||
# ]
|
||||
# self.data_dictionary["train_weights"] = self.data_dictionary["train_weights"][
|
||||
# (drop_index == 0)
|
||||
# ]
|
||||
|
||||
# logger.info(
|
||||
# f'remove_outliers() tossed {drop_index.sum()}'
|
||||
# f' training points from {len(filter_train_df)}'
|
||||
# )
|
||||
|
||||
# # do the same for the test data
|
||||
# filter_test_df = self.data_dictionary["test_features"][
|
||||
# (self.data_dictionary["test_features"] < upper_quantile)
|
||||
# & (self.data_dictionary["test_features"] > lower_quantile)
|
||||
# ]
|
||||
# drop_index = pd.isnull(filter_test_df).any(1)
|
||||
# drop_index = drop_index.replace(True, 1).replace(False, 0)
|
||||
# self.data_dictionary["test_labels"] = self.data_dictionary["test_labels"][
|
||||
# (drop_index == 0)
|
||||
# ]
|
||||
# self.data_dictionary["test_features"] = self.data_dictionary["test_features"][
|
||||
# (drop_index == 0)
|
||||
# ]
|
||||
# self.data_dictionary["test_weights"] = self.data_dictionary["test_weights"][
|
||||
# (drop_index == 0)
|
||||
# ]
|
||||
|
||||
# logger.info(
|
||||
# f'remove_outliers() tossed {drop_index.sum()}'
|
||||
# f' test points from {len(filter_test_df)}'
|
||||
# )
|
||||
|
||||
# return
|
||||
|
||||
# def standardize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
|
||||
# """
|
||||
# standardize all data in the data_dictionary according to the training dataset
|
||||
# :params:
|
||||
# :data_dictionary: dictionary containing the cleaned and split training/test data/labels
|
||||
# :returns:
|
||||
# :data_dictionary: updated dictionary with standardized values.
|
||||
# """
|
||||
# # standardize the data by training stats
|
||||
# train_mean = data_dictionary["train_features"].mean()
|
||||
# train_std = data_dictionary["train_features"].std()
|
||||
# data_dictionary["train_features"] = (
|
||||
# data_dictionary["train_features"] - train_mean
|
||||
# ) / train_std
|
||||
# data_dictionary["test_features"] = (
|
||||
# data_dictionary["test_features"] - train_mean
|
||||
# ) / train_std
|
||||
|
||||
# train_labels_std = data_dictionary["train_labels"].std()
|
||||
# train_labels_mean = data_dictionary["train_labels"].mean()
|
||||
# data_dictionary["train_labels"] = (
|
||||
# data_dictionary["train_labels"] - train_labels_mean
|
||||
# ) / train_labels_std
|
||||
# data_dictionary["test_labels"] = (
|
||||
# data_dictionary["test_labels"] - train_labels_mean
|
||||
# ) / train_labels_std
|
||||
|
||||
# for item in train_std.keys():
|
||||
# self.data[item + "_std"] = train_std[item]
|
||||
# self.data[item + "_mean"] = train_mean[item]
|
||||
|
||||
# self.data["labels_std"] = train_labels_std
|
||||
# self.data["labels_mean"] = train_labels_mean
|
||||
|
||||
# return data_dictionary
|
||||
|
||||
# def standardize_data_from_metadata(self, df: DataFrame) -> DataFrame:
|
||||
# """
|
||||
# Normalizes a set of data using the mean and standard deviation from
|
||||
# the associated training data.
|
||||
# :params:
|
||||
# :df: Dataframe to be standardized
|
||||
# """
|
||||
|
||||
# for item in df.keys():
|
||||
# df[item] = (df[item] - self.data[item + "_mean"]) / self.data[item + "_std"]
|
||||
|
||||
# return df
|
||||
|
@ -491,9 +491,6 @@ class IFreqaiModel(ABC):
|
||||
|
||||
model = self.train(unfiltered_dataframe, pair, dk)
|
||||
|
||||
dk.get_current_trade_database()
|
||||
self.analyze_trade_database(dk, pair)
|
||||
|
||||
self.dd.pair_dict[pair]["trained_timestamp"] = new_trained_timerange.stopts
|
||||
dk.set_new_model_names(pair, new_trained_timerange)
|
||||
self.dd.pair_dict[pair]["first"] = False
|
||||
@ -612,20 +609,3 @@ class IFreqaiModel(ABC):
|
||||
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
|
||||
data (NaNs) or felt uncertain about data (i.e. SVM and/or DI index)
|
||||
"""
|
||||
|
||||
def analyze_trade_database(self, dk: FreqaiDataKitchen, pair: str) -> None:
|
||||
"""
|
||||
User analyzes the trade database here and returns summary stats which will be passed back
|
||||
to the strategy for reinforcement learning or for additional adaptive metrics for use
|
||||
in entry/exit signals. Store these metrics in dk.data['extra_returns_per_train'] and
|
||||
they will format themselves into the dataframe as an additional column in the user
|
||||
strategy. User has access to the current trade database in dk.trade_database_df.
|
||||
"""
|
||||
# if dk.trade_database_df.empty:
|
||||
# logger.warning(f'No trades found for {pair} to analyze DB')
|
||||
# return
|
||||
|
||||
# total_profit = dk.trade_database_df['close_profit_abs'].sum()
|
||||
# dk.data['extra_returns_per_train']['total_profit'] = total_profit
|
||||
|
||||
return
|
||||
|
Loading…
Reference in New Issue
Block a user