Merge pull request #7759 from wizrds/fix/pd-mem-leak

Fix Pandas to_json memory leak
This commit is contained in:
Matthias 2022-11-18 20:24:16 +01:00 committed by GitHub
commit 4de9a46618
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 33 additions and 21 deletions

View File

@ -10,7 +10,8 @@ from typing import Any, Dict, Iterator, List, Mapping, Union
from typing.io import IO
from urllib.parse import urlparse
import pandas
import orjson
import pandas as pd
import rapidjson
from freqtrade.constants import DECIMAL_PER_COIN_FALLBACK, DECIMALS_PER_COIN
@ -256,29 +257,37 @@ def parse_db_uri_for_logging(uri: str):
return parsed_db_uri.geturl().replace(f':{pwd}@', ':*****@')
def dataframe_to_json(dataframe: pandas.DataFrame) -> str:
def dataframe_to_json(dataframe: pd.DataFrame) -> str:
"""
Serialize a DataFrame for transmission over the wire using JSON
:param dataframe: A pandas DataFrame
:returns: A JSON string of the pandas DataFrame
"""
return dataframe.to_json(orient='split')
# https://github.com/pandas-dev/pandas/issues/24889
# https://github.com/pandas-dev/pandas/issues/40443
# We need to convert to a dict to avoid mem leak
def default(z):
if isinstance(z, pd.Timestamp):
return z.timestamp() * 1e3
raise TypeError
return str(orjson.dumps(dataframe.to_dict(orient='split'), default=default), 'utf-8')
def json_to_dataframe(data: str) -> pandas.DataFrame:
def json_to_dataframe(data: str) -> pd.DataFrame:
"""
Deserialize JSON into a DataFrame
:param data: A JSON string
:returns: A pandas DataFrame from the JSON string
"""
dataframe = pandas.read_json(data, orient='split')
dataframe = pd.read_json(data, orient='split')
if 'date' in dataframe.columns:
dataframe['date'] = pandas.to_datetime(dataframe['date'], unit='ms', utc=True)
dataframe['date'] = pd.to_datetime(dataframe['date'], unit='ms', utc=True)
return dataframe
def remove_entry_exit_signals(dataframe: pandas.DataFrame):
def remove_entry_exit_signals(dataframe: pd.DataFrame):
"""
Remove Entry and Exit signals from a DataFrame

View File

@ -77,21 +77,24 @@ class WebSocketChannel:
# until self.drain_timeout for the relay to drain the outgoing queue
# We can't use asyncio.wait_for here because the queue may have been created with a
# different eventloop
start = time.time()
while self.queue.full():
await asyncio.sleep(1)
if (time.time() - start) > self.drain_timeout:
if not self.is_closed():
start = time.time()
while self.queue.full():
await asyncio.sleep(1)
if (time.time() - start) > self.drain_timeout:
return False
# If for some reason the queue is still full, just return False
try:
self.queue.put_nowait(data)
except asyncio.QueueFull:
return False
# If for some reason the queue is still full, just return False
try:
self.queue.put_nowait(data)
except asyncio.QueueFull:
# If we got here everything is ok
return True
else:
return False
# If we got here everything is ok
return True
async def recv(self):
"""
Receive data on the wrapped websocket
@ -109,14 +112,14 @@ class WebSocketChannel:
Close the WebSocketChannel
"""
self._closed.set()
self._relay_task.cancel()
try:
await self.raw_websocket.close()
except Exception:
pass
self._closed.set()
self._relay_task.cancel()
def is_closed(self) -> bool:
"""
Closed flag