stable/freqtrade/rpc/replicate/__init__.py

"""
This module manages replicate mode communication
"""
import asyncio
import logging
import secrets
import socket
from threading import Event, Thread
from typing import Any, Coroutine, Dict, Union

import websockets
from fastapi import Depends
from fastapi import WebSocket as FastAPIWebSocket
from fastapi import WebSocketDisconnect, status

from freqtrade.enums import LeaderMessageType, ReplicateModeType, RPCMessageType
from freqtrade.rpc import RPC, RPCHandler
from freqtrade.rpc.replicate.channel import ChannelManager
from freqtrade.rpc.replicate.thread_queue import Queue as ThreadedQueue
from freqtrade.rpc.replicate.utils import is_websocket_alive


logger = logging.getLogger(__name__)


class ReplicateController(RPCHandler):
    """  This class handles all websocket communication """

    def __init__(
        self,
        rpc: RPC,
        config: Dict[str, Any],
        api_server: Union[Any, None] = None
    ) -> None:
        """
        Init the ReplicateRPC class, and init the super class RPCHandler
        :param rpc: instance of RPC Helper class
        :param config: Configuration object
        :return: None
        """
        super().__init__(rpc, config)

        self.freqtrade = rpc._freqtrade
        self.api_server = api_server

        if not self.api_server:
            raise RuntimeError("The API server must be enabled for replicate to work")

        self._loop = None
        self._running = False
        self._thread = None
        self._queue = None

        self._stop_event = Event()
        self._follower_tasks = None

        self.channel_manager = ChannelManager()

        self.replicate_config = config.get('replicate', {})

        # What the config should look like
        # "replicate": {
        #     "enabled": true,
        #     "mode": "follower",
        #     "leaders": [
        #       {
        #         "url": "ws://localhost:8080/replicate/ws",
        #         "token": "test"
        #       }
        #     ]
        # }

        # "replicate": {
        #     "enabled": true,
        #     "mode": "leader",
        #     "api_key": "test"
        # }

        self.mode = ReplicateModeType[self.replicate_config.get('mode', 'leader').lower()]

        self.leaders_list = self.replicate_config.get('leaders', [])
        self.push_throttle_secs = self.replicate_config.get('push_throttle_secs', 1)

        self.reply_timeout = self.replicate_config.get('follower_reply_timeout', 10)
        self.ping_timeout = self.replicate_config.get('follower_ping_timeout', 2)
        self.sleep_time = self.replicate_config.get('follower_sleep_time', 1)

        if self.mode == ReplicateModeType.follower and len(self.leaders_list) == 0:
            raise ValueError("You must specify at least 1 leader in follower mode.")

        # This is only used by the leader, the followers use the tokens specified
        # in each of the leaders
        # If you do not specify an API key in the config, one will be randomly
        # generated and logged on startup
        default_api_key = secrets.token_urlsafe(16)
        self.secret_api_key = self.replicate_config.get('api_key', default_api_key)

        self.start_threaded_loop()

        self.start()

    def start_threaded_loop(self):
        """
        Start the main internal loop in another thread to run coroutines
        """
        self._loop = asyncio.new_event_loop()

        if not self._thread:
            self._thread = Thread(target=self._loop.run_forever)
            self._thread.start()
            self._running = True
        else:
            raise RuntimeError("A loop is already running")

    def submit_coroutine(self, coroutine: Coroutine):
        """
        Submit a coroutine to the threaded loop
        """
        if not self._running:
            raise RuntimeError("Cannot schedule new futures after shutdown")

        if not self._loop or not self._loop.is_running():
            raise RuntimeError("Loop must be started before any function can"
                               " be submitted")

        try:
            return asyncio.run_coroutine_threadsafe(coroutine, self._loop)
        except Exception as e:
            logger.error(f"Error running coroutine - {str(e)}")
            return None

    async def main_loop(self):
        """
        Main loop coro

        Start the loop based on what mode we're in
        """
        try:
            if self.mode == ReplicateModeType.leader:
                await self.leader_loop()
            elif self.mode == ReplicateModeType.follower:
                await self.follower_loop()

        except asyncio.CancelledError:
            pass
        finally:
            self._loop.stop()

    def start(self):
        """
        Start the controller main loop
        """
        self.submit_coroutine(self.main_loop())

    def cleanup(self) -> None:
        """
        Cleanup pending module resources.
        """
        if self._thread:
            if self._loop.is_running():

                self._running = False

                # Tell all coroutines submitted to the loop they're cancelled
                pending = asyncio.all_tasks(loop=self._loop)
                for task in pending:
                    task.cancel()

                self._loop.call_soon_threadsafe(self.channel_manager.disconnect_all)

            self._thread.join()

    def send_msg(self, msg: Dict[str, Any]) -> None:
        """
        Support RPC calls
        """
        if msg["type"] == RPCMessageType.EMIT_DATA:
            self.send_message(
                {
                    "data_type": msg.get("data_type"),
                    "data": msg.get("data")
                }
            )

    def send_message(self, msg: Dict[str, Any]) -> None:
        """ Push message through """

        # We should probably do some type of schema validation here

        if self.channel_manager.has_channels():
            self._send_message(msg)
        else:
            logger.debug("No listening followers, skipping...")
            pass

    def _send_message(self, msg: Dict[Any, Any]):
        """
        Add data to the internal queue to be broadcasted. This func will block
        if the queue is full. This is meant to be called in the main thread.
        """

        if self._queue:
            queue = self._queue.sync_q
            queue.put(msg)  # This will block if the queue is full
        else:
            logger.warning("Can not send data, leader loop has not started yet!")

    def is_leader(self):
        """
        Leader flag
        """
        return self.enabled() and self.mode == ReplicateModeType.leader

    def enabled(self):
        """
        Enabled flag
        """
        return self.replicate_config.get('enabled', False)

    # ----------------------- LEADER LOGIC ------------------------------

    async def leader_loop(self):
        """
        Main leader coroutine

        This starts all of the leader coros and registers the endpoint on
        the ApiServer
        """

        logger.info("Running rpc.replicate in Leader mode")
        logger.info("-" * 15)
        logger.info(f"API_KEY: {self.secret_api_key}")
        logger.info("-" * 15)

        self.register_leader_endpoint()

        try:
            await self._broadcast_queue_data()
        except Exception as e:
            logger.error("Exception occurred in Leader loop: ")
            logger.exception(e)

    async def _broadcast_queue_data(self):
        """
        Loop over queue data and broadcast it
        """
        # Instantiate the queue in this coroutine so it's attached to our loop
        self._queue = ThreadedQueue()
        async_queue = self._queue.async_q

        try:
            while self._running:
                # Get data from queue
                data = await async_queue.get()

                logger.info(f"Found data - broadcasting: {data}")

                # Broadcast it to everyone
                await self.channel_manager.broadcast(data)

                # Sleep
                await asyncio.sleep(self.push_throttle_secs)
        except asyncio.CancelledError:
            # Silently stop
            pass

    async def get_api_token(
        self,
        websocket: FastAPIWebSocket,
        token: Union[str, None] = None
    ):
        """
        Extract the API key from query param. Must match the
        set secret_api_key or the websocket connection will be closed.
        """
        if token == self.secret_api_key:
            return token
        else:
            logger.info("Denying websocket request...")
            await websocket.close(code=status.WS_1008_POLICY_VIOLATION)

    def register_leader_endpoint(self, path: str = "/replicate/ws"):
        """
        Attach and start the main leader loop to the ApiServer

        :param path: The endpoint path
        """

        if not self.api_server:
            raise RuntimeError("The leader needs the ApiServer to be active")

        # The endpoint function for running the main leader loop
        @self.api_server.app.websocket(path)
        async def leader_endpoint(
            websocket: FastAPIWebSocket,
            api_key: str = Depends(self.get_api_token)
        ):
            await self.leader_endpoint_loop(websocket)

    async def leader_endpoint_loop(self, websocket: FastAPIWebSocket):
        """
        The WebSocket endpoint served by the ApiServer. This handles connections,
        and adding them to the channel manager.
        """
        try:
            if is_websocket_alive(websocket):
                logger.info(f"Follower connected - {websocket.client}")
                channel = await self.channel_manager.on_connect(websocket)

                # Send initial data here
                # Data is being broadcasted right away as soon as startup,
                # we may not have to send initial data at all. Further testing
                # required.

                # Keep connection open until explicitly closed, and sleep
                try:
                    while not channel.is_closed():
                        await channel.recv()

                except WebSocketDisconnect:
                    # Handle client disconnects
                    logger.info(f"Follower disconnected - {websocket.client}")
                    await self.channel_manager.on_disconnect(websocket)
                except Exception as e:
                    logger.info(f"Follower connection failed - {websocket.client}")
                    logger.exception(e)
                    # Handle cases like -
                    # RuntimeError('Cannot call "send" once a closed message has been sent')
                    await self.channel_manager.on_disconnect(websocket)

        except Exception:
            logger.error(f"Failed to serve - {websocket.client}")
            await self.channel_manager.on_disconnect(websocket)

    # -------------------------------FOLLOWER LOGIC----------------------------

    async def follower_loop(self):
        """
        Main follower coroutine

        This starts all of the follower connection coros
        """

        logger.info("Starting rpc.replicate in Follower mode")

        try:
            results = await self._connect_to_leaders()
        except Exception as e:
            logger.error("Exception occurred in Follower loop: ")
            logger.exception(e)
        finally:
            for result in results:
                if isinstance(result, Exception):
                    logger.debug(f"Exception in Follower loop: {result}")

    async def _connect_to_leaders(self):
        """
        For each leader in `self.leaders_list` create a connection and
        listen for data.
        """
        rpc_lock = asyncio.Lock()

        logger.info("Starting connections to Leaders...")

        self.follower_tasks = [
            self._loop.create_task(self._handle_leader_connection(leader, rpc_lock))
            for leader in self.leaders_list
        ]
        return await asyncio.gather(*self.follower_tasks, return_exceptions=True)

    async def _handle_leader_connection(self, leader, lock):
        """
        Given a leader, connect and wait on data. If connection is lost,
        it will attempt to reconnect.
        """
        try:
            url, token = leader["url"], leader["token"]

            websocket_url = f"{url}?token={token}"

            logger.info(f"Attempting to connect to Leader at: {url}")
            # TODO: limit the amount of connection retries
            while True:
                try:
                    async with websockets.connect(websocket_url) as ws:
                        channel = await self.channel_manager.on_connect(ws)
                        while True:
                            try:
                                data = await asyncio.wait_for(
                                    channel.recv(),
                                    timeout=self.reply_timeout
                                )
                            except (asyncio.TimeoutError, websockets.exceptions.ConnectionClosed):
                                # We haven't received data yet. Check the connection and continue.
                                try:
                                    # ping
                                    ping = await channel.ping()
                                    await asyncio.wait_for(ping, timeout=self.ping_timeout)
                                    logger.debug(f"Connection to {url} still alive...")
                                    continue
                                except Exception:
                                    logger.info(
                                        f"Ping error {url} - retrying in {self.sleep_time}s")
                                    asyncio.sleep(self.sleep_time)
                                    break

                            async with lock:
                                # Acquire lock so only 1 coro handling at a time
                                # as we might call the RPC module in the main thread
                                await self._handle_leader_message(data)

                except socket.gaierror:
                    logger.info(f"Socket error - retrying connection in {self.sleep_time}s")
                    await asyncio.sleep(self.sleep_time)
                    continue
                except ConnectionRefusedError:
                    logger.info(f"Connection Refused - retrying connection in {self.sleep_time}s")
                    await asyncio.sleep(self.sleep_time)
                    continue

        except asyncio.CancelledError:
            pass

    async def _handle_leader_message(self, message: Dict[str, Any]):
        type = message.get('data_type')
        data = message.get('data')

        logger.info(f"Received message from Leader: {type} - {data}")

        if type == LeaderMessageType.pairlist:
            # Add the data to the ExternalPairlist
            self.freqtrade.pairlists._pairlist_handlers[0].add_pairlist_data(data)