From 4b28d0495f46337700686f1b430827336285511a Mon Sep 17 00:00:00 2001 From: robcaulk Date: Tue, 6 Sep 2022 19:46:58 +0200 Subject: [PATCH] fix timestamping, move imports, add words to doc --- docs/freqai.md | 2 +- freqtrade/freqai/data_kitchen.py | 26 ++++++++++++-------------- freqtrade/freqai/freqai_interface.py | 15 +++++++-------- tests/freqai/conftest.py | 12 +++++------- 4 files changed, 25 insertions(+), 30 deletions(-) diff --git a/docs/freqai.md b/docs/freqai.md index d504c93d6..a8379106a 100644 --- a/docs/freqai.md +++ b/docs/freqai.md @@ -116,7 +116,7 @@ Mandatory parameters are marked as **Required**, which means that they are requi | `outlier_protection_percentage` | If more than `outlier_protection_percentage` fraction of points are removed as outliers, FreqAI will log a warning message and ignore outlier detection while keeping the original dataset intact.
**Datatype:** float. Default: `30` | `reverse_train_test_order` | If true, FreqAI will train on the latest data split and test on historical split of the data. This allows the model to be trained up to the most recent data point, while avoiding overfitting. However, users should be careful to understand unorthodox nature of this parameter before employing it.
**Datatype:** bool. Default: False | `inlier_metric_window` | If set, FreqAI will add the `inlier_metric` to the training feature set and set the lookback to be the `inlier_metric_window`. Details of how the `inlier_metric` is computed can be found [here](#using-the-inliermetric)
**Datatype:** int. Default: 0 -| `noise_standard_deviation` | If > 0, FreqAI adds noise to the training features. FreqAI generates random deviates from a gaussian distribution with a standard deviation of `noise_standard_deviation` and adds them to all data points. Value should be kept relative to the normalized space between -1 and 1). In other words, since data is always normalized between -1 and 1 in FreqAI, the user can expect a `noise_standard_deviation: 0.05` to see 32% of data randomly increased/decreased by more than 2.5%. Good for preventing overfitting.
**Datatype:** int. Default: 0 +| `noise_standard_deviation` | If > 0, FreqAI adds noise to the training features. FreqAI generates random deviates from a gaussian distribution with a standard deviation of `noise_standard_deviation` and adds them to all data points. Value should be kept relative to the normalized space between -1 and 1). In other words, since data is always normalized between -1 and 1 in FreqAI, the user can expect a `noise_standard_deviation: 0.05` to see 32% of data randomly increased/decreased by more than 2.5% (i.e. the percent of data falling within the first standard deviation). Good for preventing overfitting.
**Datatype:** int. Default: 0 | | **Data split parameters** | `data_split_parameters` | Include any additional parameters available from Scikit-learn `test_train_split()`, which are shown [here](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html) (external website).
**Datatype:** Dictionary. | `test_size` | Fraction of data that should be used for testing instead of training.
**Datatype:** Positive float < 1. diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index fce9e8480..8ef2d6aea 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -2,12 +2,14 @@ import copy import logging import shutil from datetime import datetime, timezone +from math import cos, sin from pathlib import Path from typing import Any, Dict, List, Tuple import numpy as np import numpy.typing as npt import pandas as pd +import scipy.stats as stats from pandas import DataFrame from sklearn import linear_model from sklearn.cluster import DBSCAN @@ -401,8 +403,8 @@ class FreqaiDataKitchen: timerange_train.stopts = timerange_train.startts + train_period_days first = False - start = datetime.utcfromtimestamp(timerange_train.startts) - stop = datetime.utcfromtimestamp(timerange_train.stopts) + start = datetime.fromtimestamp(timerange_train.startts, tz=timezone.utc) + stop = datetime.fromtimestamp(timerange_train.stopts, tz=timezone.utc) tr_training_list.append(start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d")) tr_training_list_timerange.append(copy.deepcopy(timerange_train)) @@ -415,8 +417,8 @@ class FreqaiDataKitchen: if timerange_backtest.stopts > config_timerange.stopts: timerange_backtest.stopts = config_timerange.stopts - start = datetime.utcfromtimestamp(timerange_backtest.startts) - stop = datetime.utcfromtimestamp(timerange_backtest.stopts) + start = datetime.fromtimestamp(timerange_backtest.startts, tz=timezone.utc) + stop = datetime.fromtimestamp(timerange_backtest.stopts, tz=timezone.utc) tr_backtesting_list.append(start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d")) tr_backtesting_list_timerange.append(copy.deepcopy(timerange_backtest)) @@ -630,8 +632,6 @@ class FreqaiDataKitchen: is an outlier. """ - from math import cos, sin - if predict: if not self.data['DBSCAN_eps']: return @@ -732,8 +732,6 @@ class FreqaiDataKitchen: into previous timepoints. """ - import scipy.stats as ss - no_prev_pts = self.freqai_config["feature_parameters"]["inlier_metric_window"] if set_ == 'train': @@ -778,8 +776,8 @@ class FreqaiDataKitchen: inliers = pd.DataFrame(index=distances.index) for key in distances.keys(): current_distances = distances[key].dropna() - fit_params = ss.weibull_min.fit(current_distances) - quantiles = ss.weibull_min.cdf(current_distances, *fit_params) + fit_params = stats.weibull_min.fit(current_distances) + quantiles = stats.weibull_min.cdf(current_distances, *fit_params) df_inlier = pd.DataFrame( {key: quantiles}, index=distances.index @@ -794,8 +792,8 @@ class FreqaiDataKitchen: index=compute_df.index ) - inlier_metric = 2 * (inlier_metric - inlier_metric.min()) / \ - (inlier_metric.max() - inlier_metric.min()) - 1 + inlier_metric = (2 * (inlier_metric - inlier_metric.min()) / + (inlier_metric.max() - inlier_metric.min()) - 1) if set_ in ('train', 'test'): inlier_metric = inlier_metric.iloc[no_prev_pts:] @@ -956,8 +954,8 @@ class FreqaiDataKitchen: backtest_timerange.startts = ( backtest_timerange.startts - backtest_period_days * SECONDS_IN_DAY ) - start = datetime.utcfromtimestamp(backtest_timerange.startts) - stop = datetime.utcfromtimestamp(backtest_timerange.stopts) + start = datetime.fromtimestamp(backtest_timerange.startts, tz=timezone.utc) + stop = datetime.fromtimestamp(backtest_timerange.stopts, tz=timezone.utc) full_timerange = start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d") self.full_path = Path( diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index fd0554248..9b3e853ef 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -1,10 +1,9 @@ -# import contextlib import logging import shutil import threading import time from abc import ABC, abstractmethod -from datetime import datetime +from datetime import datetime, timezone from pathlib import Path from threading import Lock from typing import Any, Dict, Tuple @@ -214,12 +213,12 @@ class IFreqaiModel(ABC): dataframe_backtest = dk.slice_dataframe(tr_backtest, dataframe) trained_timestamp = tr_train - tr_train_startts_str = datetime.utcfromtimestamp(tr_train.startts).strftime( - "%Y-%m-%d %H:%M:%S" - ) - tr_train_stopts_str = datetime.utcfromtimestamp(tr_train.stopts).strftime( - "%Y-%m-%d %H:%M:%S" - ) + tr_train_startts_str = datetime.fromtimestamp( + tr_train.startts, + tz=timezone.utc).strftime("%Y-%m-%d %H:%M:%S") + tr_train_stopts_str = datetime.fromtimestamp( + tr_train.stopts, + tz=timezone.utc).strftime("%Y-%m-%d %H:%M:%S") logger.info( f"Training {metadata['pair']}, {self.pair_it}/{self.total_pairs} pairs" f" from {tr_train_startts_str} to {tr_train_stopts_str}, {train_it}/{total_trains} " diff --git a/tests/freqai/conftest.py b/tests/freqai/conftest.py index 6528347e8..ffdc52ebc 100644 --- a/tests/freqai/conftest.py +++ b/tests/freqai/conftest.py @@ -93,12 +93,11 @@ def make_unfiltered_dataframe(mocker, freqai_conf): freqai.live = True freqai.dk = FreqaiDataKitchen(freqai_conf) freqai.dk.pair = "ADA/BTC" - timerange = TimeRange.parse_timerange("20180110-20180130") - freqai.dd.load_all_pair_histories(timerange, freqai.dk) + data_load_timerange = TimeRange.parse_timerange("20180110-20180130") + freqai.dd.load_all_pair_histories(data_load_timerange, freqai.dk) freqai.dd.pair_dict = MagicMock() - - data_load_timerange = TimeRange.parse_timerange("20180110-20180130") + new_timerange = TimeRange.parse_timerange("20180120-20180130") corr_dataframes, base_dataframes = freqai.dd.get_base_and_corr_dataframes( @@ -125,12 +124,11 @@ def make_data_dictionary(mocker, freqai_conf): freqai.live = True freqai.dk = FreqaiDataKitchen(freqai_conf) freqai.dk.pair = "ADA/BTC" - timerange = TimeRange.parse_timerange("20180110-20180130") - freqai.dd.load_all_pair_histories(timerange, freqai.dk) + data_load_timerange = TimeRange.parse_timerange("20180110-20180130") + freqai.dd.load_all_pair_histories(data_load_timerange, freqai.dk) freqai.dd.pair_dict = MagicMock() - data_load_timerange = TimeRange.parse_timerange("20180110-20180130") new_timerange = TimeRange.parse_timerange("20180120-20180130") corr_dataframes, base_dataframes = freqai.dd.get_base_and_corr_dataframes(