386 lines
13 KiB
Python
386 lines
13 KiB
Python
"""
|
|
This module manages replicate mode communication
|
|
"""
|
|
import asyncio
|
|
import logging
|
|
import secrets
|
|
import socket
|
|
from threading import Thread
|
|
from typing import Any, Coroutine, Dict, Union
|
|
|
|
import websockets
|
|
from fastapi import Depends
|
|
from fastapi import WebSocket as FastAPIWebSocket
|
|
from fastapi import WebSocketDisconnect, status
|
|
|
|
from freqtrade.enums import LeaderMessageType, ReplicateModeType, RPCMessageType
|
|
from freqtrade.rpc import RPC, RPCHandler
|
|
from freqtrade.rpc.replicate.channel import ChannelManager
|
|
from freqtrade.rpc.replicate.thread_queue import Queue as ThreadedQueue
|
|
from freqtrade.rpc.replicate.utils import is_websocket_alive
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class ReplicateController(RPCHandler):
|
|
""" This class handles all websocket communication """
|
|
|
|
def __init__(
|
|
self,
|
|
rpc: RPC,
|
|
config: Dict[str, Any],
|
|
api_server: Union[Any, None] = None
|
|
) -> None:
|
|
"""
|
|
Init the ReplicateRPC class, and init the super class RPCHandler
|
|
:param rpc: instance of RPC Helper class
|
|
:param config: Configuration object
|
|
:return: None
|
|
"""
|
|
super().__init__(rpc, config)
|
|
|
|
self.api_server = api_server
|
|
|
|
if not self.api_server:
|
|
raise RuntimeError("The API server must be enabled for replicate to work")
|
|
|
|
self._loop = None
|
|
self._running = False
|
|
self._thread = None
|
|
self._queue = None
|
|
|
|
self.channel_manager = ChannelManager()
|
|
|
|
self.replicate_config = config.get('replicate', {})
|
|
|
|
# What the config should look like
|
|
# "replicate": {
|
|
# "enabled": true,
|
|
# "mode": "follower",
|
|
# "leaders": [
|
|
# {
|
|
# "url": "ws://localhost:8080/replicate/ws",
|
|
# "token": "test"
|
|
# }
|
|
# ]
|
|
# }
|
|
|
|
# "replicate": {
|
|
# "enabled": true,
|
|
# "mode": "leader",
|
|
# "api_key": "test"
|
|
# }
|
|
|
|
self.mode = ReplicateModeType[self.replicate_config.get('mode', 'leader').lower()]
|
|
|
|
self.leaders_list = self.replicate_config.get('leaders', [])
|
|
self.push_throttle_secs = self.replicate_config.get('push_throttle_secs', 1)
|
|
|
|
self.reply_timeout = self.replicate_config.get('follower_reply_timeout', 10)
|
|
self.ping_timeout = self.replicate_config.get('follower_ping_timeout', 2)
|
|
self.sleep_time = self.replicate_config.get('follower_sleep_time', 1)
|
|
|
|
if self.mode == ReplicateModeType.follower and len(self.leaders_list) == 0:
|
|
raise ValueError("You must specify at least 1 leader in follower mode.")
|
|
|
|
# This is only used by the leader, the followers use the tokens specified
|
|
# in each of the leaders
|
|
# If you do not specify an API key in the config, one will be randomly
|
|
# generated and logged on startup
|
|
default_api_key = secrets.token_urlsafe(16)
|
|
self.secret_api_key = self.replicate_config.get('api_key', default_api_key)
|
|
|
|
self.start_threaded_loop()
|
|
|
|
if self.mode == ReplicateModeType.follower:
|
|
self.start_follower_mode()
|
|
elif self.mode == ReplicateModeType.leader:
|
|
self.start_leader_mode()
|
|
|
|
def start_threaded_loop(self):
|
|
"""
|
|
Start the main internal loop in another thread to run coroutines
|
|
"""
|
|
self._loop = asyncio.new_event_loop()
|
|
|
|
if not self._thread:
|
|
self._thread = Thread(target=self._loop.run_forever)
|
|
self._thread.start()
|
|
self._running = True
|
|
else:
|
|
raise RuntimeError("A loop is already running")
|
|
|
|
def submit_coroutine(self, coroutine: Coroutine):
|
|
"""
|
|
Submit a coroutine to the threaded loop
|
|
"""
|
|
if not self._running:
|
|
raise RuntimeError("Cannot schedule new futures after shutdown")
|
|
|
|
if not self._loop or not self._loop.is_running():
|
|
raise RuntimeError("Loop must be started before any function can"
|
|
" be submitted")
|
|
|
|
logger.debug(f"Running coroutine {repr(coroutine)} in loop")
|
|
try:
|
|
return asyncio.run_coroutine_threadsafe(coroutine, self._loop)
|
|
except Exception as e:
|
|
logger.error(f"Error running coroutine - {str(e)}")
|
|
return None
|
|
|
|
def cleanup(self) -> None:
|
|
"""
|
|
Cleanup pending module resources.
|
|
"""
|
|
if self._thread:
|
|
if self._loop.is_running():
|
|
|
|
self._running = False
|
|
|
|
# Tell all coroutines submitted to the loop they're cancelled
|
|
pending = asyncio.all_tasks(loop=self._loop)
|
|
for task in pending:
|
|
task.cancel()
|
|
|
|
self._loop.call_soon_threadsafe(self.channel_manager.disconnect_all)
|
|
# This must be called threadsafe, otherwise would not work
|
|
self._loop.call_soon_threadsafe(self._loop.stop)
|
|
|
|
self._thread.join()
|
|
|
|
def send_msg(self, msg: Dict[str, Any]) -> None:
|
|
""" Push message through """
|
|
|
|
if msg["type"] == RPCMessageType.EMIT_DATA:
|
|
self._send_message(
|
|
{
|
|
"type": msg["data_type"],
|
|
"content": msg["data"]
|
|
}
|
|
)
|
|
|
|
# ----------------------- LEADER LOGIC ------------------------------
|
|
|
|
def start_leader_mode(self):
|
|
"""
|
|
Register the endpoint and start the leader loop
|
|
"""
|
|
|
|
logger.info("Running rpc.replicate in Leader mode")
|
|
logger.info("-" * 15)
|
|
logger.info(f"API_KEY: {self.secret_api_key}")
|
|
logger.info("-" * 15)
|
|
|
|
self.register_leader_endpoint()
|
|
self.submit_coroutine(self.leader_loop())
|
|
|
|
async def leader_loop(self):
|
|
"""
|
|
Main leader coroutine
|
|
At the moment this just broadcasts data that's in the queue to the followers
|
|
"""
|
|
try:
|
|
await self._broadcast_queue_data()
|
|
except Exception as e:
|
|
logger.error("Exception occurred in leader loop: ")
|
|
logger.exception(e)
|
|
|
|
def _send_message(self, data: Dict[Any, Any]):
|
|
"""
|
|
Add data to the internal queue to be broadcasted. This func will block
|
|
if the queue is full. This is meant to be called in the main thread.
|
|
"""
|
|
|
|
if self._queue:
|
|
self._queue.put(data)
|
|
else:
|
|
logger.warning("Can not send data, leader loop has not started yet!")
|
|
|
|
async def _broadcast_queue_data(self):
|
|
"""
|
|
Loop over queue data and broadcast it
|
|
"""
|
|
# Instantiate the queue in this coroutine so it's attached to our loop
|
|
self._queue = ThreadedQueue()
|
|
async_queue = self._queue.async_q
|
|
|
|
try:
|
|
while self._running:
|
|
# Get data from queue
|
|
data = await async_queue.get()
|
|
|
|
# Broadcast it to everyone
|
|
await self.channel_manager.broadcast(data)
|
|
|
|
# Sleep
|
|
await asyncio.sleep(self.push_throttle_secs)
|
|
except asyncio.CancelledError:
|
|
# Silently stop
|
|
pass
|
|
|
|
async def get_api_token(
|
|
self,
|
|
websocket: FastAPIWebSocket,
|
|
token: Union[str, None] = None
|
|
):
|
|
"""
|
|
Extract the API key from query param. Must match the
|
|
set secret_api_key or the websocket connection will be closed.
|
|
"""
|
|
if token == self.secret_api_key:
|
|
return token
|
|
else:
|
|
logger.info("Denying websocket request...")
|
|
await websocket.close(code=status.WS_1008_POLICY_VIOLATION)
|
|
|
|
def register_leader_endpoint(self, path: str = "/replicate/ws"):
|
|
"""
|
|
Attach and start the main leader loop to the ApiServer
|
|
|
|
:param path: The endpoint path
|
|
"""
|
|
|
|
if not self.api_server:
|
|
raise RuntimeError("The leader needs the ApiServer to be active")
|
|
|
|
# The endpoint function for running the main leader loop
|
|
@self.api_server.app.websocket(path)
|
|
async def leader_endpoint(
|
|
websocket: FastAPIWebSocket,
|
|
api_key: str = Depends(self.get_api_token)
|
|
):
|
|
await self.leader_endpoint_loop(websocket)
|
|
|
|
async def leader_endpoint_loop(self, websocket: FastAPIWebSocket):
|
|
"""
|
|
The WebSocket endpoint served by the ApiServer. This handles connections,
|
|
and adding them to the channel manager.
|
|
"""
|
|
try:
|
|
if is_websocket_alive(websocket):
|
|
logger.info(f"Follower connected - {websocket.client}")
|
|
channel = await self.channel_manager.on_connect(websocket)
|
|
|
|
# Send initial data here
|
|
|
|
# Keep connection open until explicitly closed, and sleep
|
|
try:
|
|
while not channel.is_closed():
|
|
await channel.recv()
|
|
|
|
except WebSocketDisconnect:
|
|
# Handle client disconnects
|
|
logger.info(f"Follower disconnected - {websocket.client}")
|
|
await self.channel_manager.on_disconnect(websocket)
|
|
except Exception as e:
|
|
logger.info(f"Follower connection failed - {websocket.client}")
|
|
logger.exception(e)
|
|
# Handle cases like -
|
|
# RuntimeError('Cannot call "send" once a closed message has been sent')
|
|
await self.channel_manager.on_disconnect(websocket)
|
|
|
|
except Exception:
|
|
logger.error(f"Failed to serve - {websocket.client}")
|
|
await self.channel_manager.on_disconnect(websocket)
|
|
|
|
# -------------------------------FOLLOWER LOGIC----------------------------
|
|
|
|
def start_follower_mode(self):
|
|
"""
|
|
Start the ReplicateController in Follower mode
|
|
"""
|
|
logger.info("Starting rpc.replicate in Follower mode")
|
|
|
|
self.submit_coroutine(self.follower_loop())
|
|
|
|
async def follower_loop(self):
|
|
"""
|
|
Main follower coroutine
|
|
|
|
This starts all of the leader connection coros
|
|
"""
|
|
try:
|
|
await self._connect_to_leaders()
|
|
except Exception as e:
|
|
logger.error("Exception occurred in follower loop: ")
|
|
logger.exception(e)
|
|
|
|
async def _connect_to_leaders(self):
|
|
rpc_lock = asyncio.Lock()
|
|
|
|
logger.info("Starting connections to Leaders...")
|
|
await asyncio.wait(
|
|
[
|
|
self._handle_leader_connection(leader, rpc_lock)
|
|
for leader in self.leaders_list
|
|
]
|
|
)
|
|
|
|
async def _handle_leader_connection(self, leader, lock):
|
|
"""
|
|
Given a leader, connect and wait on data. If connection is lost,
|
|
it will attempt to reconnect.
|
|
"""
|
|
url, token = leader["url"], leader["token"]
|
|
|
|
websocket_url = f"{url}?token={token}"
|
|
|
|
logger.info(f"Attempting to connect to leader at: {url}")
|
|
# TODO: limit the amount of connection retries
|
|
while True:
|
|
try:
|
|
async with websockets.connect(websocket_url) as ws:
|
|
channel = await self.channel_manager.on_connect(ws)
|
|
while True:
|
|
try:
|
|
data = await asyncio.wait_for(
|
|
channel.recv(),
|
|
timeout=self.reply_timeout
|
|
)
|
|
except (asyncio.TimeoutError, websockets.exceptions.ConnectionClosed):
|
|
# We haven't received data yet. Just check the connection and continue.
|
|
try:
|
|
# ping
|
|
ping = await channel.ping()
|
|
await asyncio.wait_for(ping, timeout=self.ping_timeout)
|
|
logger.info(f"Connection to {url} still alive...")
|
|
continue
|
|
except Exception:
|
|
logger.info(f"Ping error {url} - retrying in {self.sleep_time}s")
|
|
asyncio.sleep(self.sleep_time)
|
|
break
|
|
|
|
with lock:
|
|
# Should we have a lock here?
|
|
await self._handle_leader_message(data)
|
|
|
|
except socket.gaierror:
|
|
logger.info(f"Socket error - retrying connection in {self.sleep_time}s")
|
|
await asyncio.sleep(self.sleep_time)
|
|
continue
|
|
except ConnectionRefusedError:
|
|
logger.info(f"Connection Refused - retrying connection in {self.sleep_time}s")
|
|
await asyncio.sleep(self.sleep_time)
|
|
continue
|
|
|
|
async def _handle_leader_message(self, message):
|
|
type = message.get("type")
|
|
|
|
message_type_handlers = {
|
|
LeaderMessageType.Pairlist.value: self._handle_pairlist_message,
|
|
LeaderMessageType.Dataframe.value: self._handle_dataframe_message
|
|
}
|
|
|
|
handler = message_type_handlers.get(type, self._handle_default_message)
|
|
return await handler(message)
|
|
|
|
async def _handle_default_message(self, message):
|
|
logger.info(f"Default message handled: {message}")
|
|
|
|
async def _handle_pairlist_message(self, message):
|
|
logger.info(f"Pairlist message handled: {message}")
|
|
|
|
async def _handle_dataframe_message(self, message):
|
|
logger.info(f"Dataframe message handled: {message}")
|