fix timestamping, move imports, add words to doc

This commit is contained in:
robcaulk 2022-09-06 19:46:58 +02:00
parent fa8d5b9834
commit 4b28d0495f
4 changed files with 25 additions and 30 deletions

View File

@ -116,7 +116,7 @@ Mandatory parameters are marked as **Required**, which means that they are requi
| `outlier_protection_percentage` | If more than `outlier_protection_percentage` fraction of points are removed as outliers, FreqAI will log a warning message and ignore outlier detection while keeping the original dataset intact. <br> **Datatype:** float. Default: `30` | `outlier_protection_percentage` | If more than `outlier_protection_percentage` fraction of points are removed as outliers, FreqAI will log a warning message and ignore outlier detection while keeping the original dataset intact. <br> **Datatype:** float. Default: `30`
| `reverse_train_test_order` | If true, FreqAI will train on the latest data split and test on historical split of the data. This allows the model to be trained up to the most recent data point, while avoiding overfitting. However, users should be careful to understand unorthodox nature of this parameter before employing it. <br> **Datatype:** bool. Default: False | `reverse_train_test_order` | If true, FreqAI will train on the latest data split and test on historical split of the data. This allows the model to be trained up to the most recent data point, while avoiding overfitting. However, users should be careful to understand unorthodox nature of this parameter before employing it. <br> **Datatype:** bool. Default: False
| `inlier_metric_window` | If set, FreqAI will add the `inlier_metric` to the training feature set and set the lookback to be the `inlier_metric_window`. Details of how the `inlier_metric` is computed can be found [here](#using-the-inliermetric) <br> **Datatype:** int. Default: 0 | `inlier_metric_window` | If set, FreqAI will add the `inlier_metric` to the training feature set and set the lookback to be the `inlier_metric_window`. Details of how the `inlier_metric` is computed can be found [here](#using-the-inliermetric) <br> **Datatype:** int. Default: 0
| `noise_standard_deviation` | If > 0, FreqAI adds noise to the training features. FreqAI generates random deviates from a gaussian distribution with a standard deviation of `noise_standard_deviation` and adds them to all data points. Value should be kept relative to the normalized space between -1 and 1). In other words, since data is always normalized between -1 and 1 in FreqAI, the user can expect a `noise_standard_deviation: 0.05` to see 32% of data randomly increased/decreased by more than 2.5%. Good for preventing overfitting. <br> **Datatype:** int. Default: 0 | `noise_standard_deviation` | If > 0, FreqAI adds noise to the training features. FreqAI generates random deviates from a gaussian distribution with a standard deviation of `noise_standard_deviation` and adds them to all data points. Value should be kept relative to the normalized space between -1 and 1). In other words, since data is always normalized between -1 and 1 in FreqAI, the user can expect a `noise_standard_deviation: 0.05` to see 32% of data randomly increased/decreased by more than 2.5% (i.e. the percent of data falling within the first standard deviation). Good for preventing overfitting. <br> **Datatype:** int. Default: 0
| | **Data split parameters** | | **Data split parameters**
| `data_split_parameters` | Include any additional parameters available from Scikit-learn `test_train_split()`, which are shown [here](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html) (external website). <br> **Datatype:** Dictionary. | `data_split_parameters` | Include any additional parameters available from Scikit-learn `test_train_split()`, which are shown [here](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html) (external website). <br> **Datatype:** Dictionary.
| `test_size` | Fraction of data that should be used for testing instead of training. <br> **Datatype:** Positive float < 1. | `test_size` | Fraction of data that should be used for testing instead of training. <br> **Datatype:** Positive float < 1.

View File

@ -2,12 +2,14 @@ import copy
import logging import logging
import shutil import shutil
from datetime import datetime, timezone from datetime import datetime, timezone
from math import cos, sin
from pathlib import Path from pathlib import Path
from typing import Any, Dict, List, Tuple from typing import Any, Dict, List, Tuple
import numpy as np import numpy as np
import numpy.typing as npt import numpy.typing as npt
import pandas as pd import pandas as pd
import scipy.stats as stats
from pandas import DataFrame from pandas import DataFrame
from sklearn import linear_model from sklearn import linear_model
from sklearn.cluster import DBSCAN from sklearn.cluster import DBSCAN
@ -401,8 +403,8 @@ class FreqaiDataKitchen:
timerange_train.stopts = timerange_train.startts + train_period_days timerange_train.stopts = timerange_train.startts + train_period_days
first = False first = False
start = datetime.utcfromtimestamp(timerange_train.startts) start = datetime.fromtimestamp(timerange_train.startts, tz=timezone.utc)
stop = datetime.utcfromtimestamp(timerange_train.stopts) stop = datetime.fromtimestamp(timerange_train.stopts, tz=timezone.utc)
tr_training_list.append(start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d")) tr_training_list.append(start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d"))
tr_training_list_timerange.append(copy.deepcopy(timerange_train)) tr_training_list_timerange.append(copy.deepcopy(timerange_train))
@ -415,8 +417,8 @@ class FreqaiDataKitchen:
if timerange_backtest.stopts > config_timerange.stopts: if timerange_backtest.stopts > config_timerange.stopts:
timerange_backtest.stopts = config_timerange.stopts timerange_backtest.stopts = config_timerange.stopts
start = datetime.utcfromtimestamp(timerange_backtest.startts) start = datetime.fromtimestamp(timerange_backtest.startts, tz=timezone.utc)
stop = datetime.utcfromtimestamp(timerange_backtest.stopts) stop = datetime.fromtimestamp(timerange_backtest.stopts, tz=timezone.utc)
tr_backtesting_list.append(start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d")) tr_backtesting_list.append(start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d"))
tr_backtesting_list_timerange.append(copy.deepcopy(timerange_backtest)) tr_backtesting_list_timerange.append(copy.deepcopy(timerange_backtest))
@ -630,8 +632,6 @@ class FreqaiDataKitchen:
is an outlier. is an outlier.
""" """
from math import cos, sin
if predict: if predict:
if not self.data['DBSCAN_eps']: if not self.data['DBSCAN_eps']:
return return
@ -732,8 +732,6 @@ class FreqaiDataKitchen:
into previous timepoints. into previous timepoints.
""" """
import scipy.stats as ss
no_prev_pts = self.freqai_config["feature_parameters"]["inlier_metric_window"] no_prev_pts = self.freqai_config["feature_parameters"]["inlier_metric_window"]
if set_ == 'train': if set_ == 'train':
@ -778,8 +776,8 @@ class FreqaiDataKitchen:
inliers = pd.DataFrame(index=distances.index) inliers = pd.DataFrame(index=distances.index)
for key in distances.keys(): for key in distances.keys():
current_distances = distances[key].dropna() current_distances = distances[key].dropna()
fit_params = ss.weibull_min.fit(current_distances) fit_params = stats.weibull_min.fit(current_distances)
quantiles = ss.weibull_min.cdf(current_distances, *fit_params) quantiles = stats.weibull_min.cdf(current_distances, *fit_params)
df_inlier = pd.DataFrame( df_inlier = pd.DataFrame(
{key: quantiles}, index=distances.index {key: quantiles}, index=distances.index
@ -794,8 +792,8 @@ class FreqaiDataKitchen:
index=compute_df.index index=compute_df.index
) )
inlier_metric = 2 * (inlier_metric - inlier_metric.min()) / \ inlier_metric = (2 * (inlier_metric - inlier_metric.min()) /
(inlier_metric.max() - inlier_metric.min()) - 1 (inlier_metric.max() - inlier_metric.min()) - 1)
if set_ in ('train', 'test'): if set_ in ('train', 'test'):
inlier_metric = inlier_metric.iloc[no_prev_pts:] inlier_metric = inlier_metric.iloc[no_prev_pts:]
@ -956,8 +954,8 @@ class FreqaiDataKitchen:
backtest_timerange.startts = ( backtest_timerange.startts = (
backtest_timerange.startts - backtest_period_days * SECONDS_IN_DAY backtest_timerange.startts - backtest_period_days * SECONDS_IN_DAY
) )
start = datetime.utcfromtimestamp(backtest_timerange.startts) start = datetime.fromtimestamp(backtest_timerange.startts, tz=timezone.utc)
stop = datetime.utcfromtimestamp(backtest_timerange.stopts) stop = datetime.fromtimestamp(backtest_timerange.stopts, tz=timezone.utc)
full_timerange = start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d") full_timerange = start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d")
self.full_path = Path( self.full_path = Path(

View File

@ -1,10 +1,9 @@
# import contextlib
import logging import logging
import shutil import shutil
import threading import threading
import time import time
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from datetime import datetime from datetime import datetime, timezone
from pathlib import Path from pathlib import Path
from threading import Lock from threading import Lock
from typing import Any, Dict, Tuple from typing import Any, Dict, Tuple
@ -214,12 +213,12 @@ class IFreqaiModel(ABC):
dataframe_backtest = dk.slice_dataframe(tr_backtest, dataframe) dataframe_backtest = dk.slice_dataframe(tr_backtest, dataframe)
trained_timestamp = tr_train trained_timestamp = tr_train
tr_train_startts_str = datetime.utcfromtimestamp(tr_train.startts).strftime( tr_train_startts_str = datetime.fromtimestamp(
"%Y-%m-%d %H:%M:%S" tr_train.startts,
) tz=timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
tr_train_stopts_str = datetime.utcfromtimestamp(tr_train.stopts).strftime( tr_train_stopts_str = datetime.fromtimestamp(
"%Y-%m-%d %H:%M:%S" tr_train.stopts,
) tz=timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
logger.info( logger.info(
f"Training {metadata['pair']}, {self.pair_it}/{self.total_pairs} pairs" f"Training {metadata['pair']}, {self.pair_it}/{self.total_pairs} pairs"
f" from {tr_train_startts_str} to {tr_train_stopts_str}, {train_it}/{total_trains} " f" from {tr_train_startts_str} to {tr_train_stopts_str}, {train_it}/{total_trains} "

View File

@ -93,12 +93,11 @@ def make_unfiltered_dataframe(mocker, freqai_conf):
freqai.live = True freqai.live = True
freqai.dk = FreqaiDataKitchen(freqai_conf) freqai.dk = FreqaiDataKitchen(freqai_conf)
freqai.dk.pair = "ADA/BTC" freqai.dk.pair = "ADA/BTC"
timerange = TimeRange.parse_timerange("20180110-20180130") data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
freqai.dd.load_all_pair_histories(timerange, freqai.dk) freqai.dd.load_all_pair_histories(data_load_timerange, freqai.dk)
freqai.dd.pair_dict = MagicMock() freqai.dd.pair_dict = MagicMock()
data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
new_timerange = TimeRange.parse_timerange("20180120-20180130") new_timerange = TimeRange.parse_timerange("20180120-20180130")
corr_dataframes, base_dataframes = freqai.dd.get_base_and_corr_dataframes( corr_dataframes, base_dataframes = freqai.dd.get_base_and_corr_dataframes(
@ -125,12 +124,11 @@ def make_data_dictionary(mocker, freqai_conf):
freqai.live = True freqai.live = True
freqai.dk = FreqaiDataKitchen(freqai_conf) freqai.dk = FreqaiDataKitchen(freqai_conf)
freqai.dk.pair = "ADA/BTC" freqai.dk.pair = "ADA/BTC"
timerange = TimeRange.parse_timerange("20180110-20180130") data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
freqai.dd.load_all_pair_histories(timerange, freqai.dk) freqai.dd.load_all_pair_histories(data_load_timerange, freqai.dk)
freqai.dd.pair_dict = MagicMock() freqai.dd.pair_dict = MagicMock()
data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
new_timerange = TimeRange.parse_timerange("20180120-20180130") new_timerange = TimeRange.parse_timerange("20180120-20180130")
corr_dataframes, base_dataframes = freqai.dd.get_base_and_corr_dataframes( corr_dataframes, base_dataframes = freqai.dd.get_base_and_corr_dataframes(