Coverage for source/environment/trading

1# environment/trading environment.py

3from gym import Env

4from gym.spaces import Discrete, Box

5import pandas as pd

6import numpy as np

7from sklearn.preprocessing import StandardScaler

8import math

9import random

10from types import SimpleNamespace

11from typing import Optional

12import copy

14from .broker import Broker

15from .reward_validator_base import RewardValidatorBase

17class TradingEnvironment(Env):

18 """

19 Implements stock market environment that actor can perform actions (place orders) in.

20 It is used to train Neural Network models with reinforcement learning approach. Can be

21 configure to award points and impose a penalty in a several way.

22 """

24 TRAIN_MODE = 'train'

25 TEST_MODE = 'test'

27 def __init__(self, data_path: str, initial_budget: float, max_amount_of_trades: int, window_size: int,

28 validator: RewardValidatorBase, sell_stop_loss: float, sell_take_profit: float,

29 buy_stop_loss: float, buy_take_profit: float, test_ratio: float = 0.2, penalty_starts: int = 0,

30 penalty_stops: int = 10, static_reward_adjustment: float = 1) -> None:

31 """

32 Class constructor. Allows to define all crucial constans, reward validation methods,

33 environmental penalty policies, etc.

35 Parameters:

36 data_path (str): Path to CSV data that should be used as enivronmental stock market.

37 initial_budget (float): Initial budget constant for trader to start from.

38 max_amount_of_trades (int): Max amount of trades that can be ongoing at the same time.

39 Seting this constant prevents traders from placing orders randomly and defines

40 amount of money that can be assigned to a single order at certain iteration.

41 window_size (int): Constant defining how far in the past trader will be able to look

42 into at certain iteration.

43 validator (RewardValidatorBase): Validator implementing policy used to award points

44 for closed trades.

45 sell_stop_loss (float): Constant used to define losing boundary at which sell order

46 (short) is closed.

47 sell_take_profit (float): Constant used to define winning boundary at which sell order

48 (short) is closed.

49 buy_stop_loss (float): Constant used to define losing boundary at which buy order

50 (long) is closed.

51 buy_take_profit (float): Constant used to define winning boundary at which buy order

52 (long) is closed.

53 test_ratio (float): Ratio of data that should be used for testing purposes.

54 penalty_starts (int): Constant defining how many trading periods can trader go without placing

55 an order until penalty is imposed. Penalty at range between start and stop constant

56 is calculated as percentile of positive reward, and subtracted from the actual reward.

57 penalty_stops (int): Constant defining at which trading period penalty will no longer be increased.

58 Reward for trading periods exceeding penalty stop constant will equal minus static reward adjustment.

59 static_reward_adjustment (float): Constant use to penalize trader for bad choices or

60 reward it for good one.

61 """

63 if test_ratio < 0.0 or test_ratio >= 1.0:

64 raise ValueError(f"Invalid test_ratio: {test_ratio}. It should be in range [0, 1).")

66 self.__data: dict[pd.DataFrame, pd.DataFrame] = self.__load_data(data_path, test_ratio)

67 self.__mode = TradingEnvironment.TRAIN_MODE

68 self.__broker: Broker = Broker()

69 self.__validator: RewardValidatorBase = validator

71 self.__trading_data: SimpleNamespace = SimpleNamespace()

72 self.__trading_data.current_budget: float = initial_budget

73 self.__trading_data.currently_invested: float = 0

74 self.__trading_data.no_trades_placed_for: int = 0

75 self.__trading_data.currently_placed_trades: int = 0

77 self.__trading_consts = SimpleNamespace()

78 self.__trading_consts.INITIAL_BUDGET: float = initial_budget

79 self.__trading_consts.MAX_AMOUNT_OF_TRADES: int = max_amount_of_trades

80 self.__trading_consts.WINDOW_SIZE: int = window_size

81 self.__trading_consts.SELL_STOP_LOSS: float = sell_stop_loss

82 self.__trading_consts.SELL_TAKE_PROFIT: float = sell_take_profit

83 self.__trading_consts.BUY_STOP_LOSS: float = buy_stop_loss

84 self.__trading_consts.BUY_TAKE_PROFIT: float = buy_take_profit

85 self.__trading_consts.STATIC_REWARD_ADJUSTMENT: float = static_reward_adjustment

86 self.__trading_consts.PENALTY_STARTS: int = penalty_starts

87 self.__trading_consts.PENALTY_STOPS: int = penalty_stops

88 self.__trading_consts.PROFITABILITY_FUNCTION = lambda x: -1.0 * math.exp(-x + 1) + 1

89 self.__trading_consts.PENALTY_FUNCTION = lambda x: \

90 min(1, 1 - math.tanh(-3.0 * (x - penalty_stops) / (penalty_stops - penalty_starts)))

92 self.current_iteration: int = self.__trading_consts.WINDOW_SIZE

93 self.state: list[float] = self.__prepare_state_data()

94 self.action_space: Discrete = Discrete(3)

95 self.observation_space: Box = Box(low = np.ones(len(self.state)) * -3,

96 high = np.ones(len(self.state)) * 3,

97 dtype=np.float64)

99 def __load_data(self, data_path: str, test_size: float) -> dict[pd.DataFrame, pd.DataFrame]:

100 """

101 Loads data from CSV file and splits it into training and testing sets based on the

102 specified test size ratio.

103

104 Parameters:

105 data_path (str): Path to the CSV file containing the stock market data.

106 test_size (float): Ratio of the data to be used for testing.

107

108 Returns:

109 (dict[pd.DataFrame, pd.DataFrame]): Dictionary containing training and testing data frames.

110 """

111

112 data_frame = pd.read_csv(data_path)

113 dividing_index = int(len(data_frame) * (1 - test_size))

114

115 return {

116 TradingEnvironment.TRAIN_MODE: data_frame.iloc[:dividing_index],

117 TradingEnvironment.TEST_MODE: data_frame.iloc[dividing_index:]

118 }

119

120 def __prepare_state_data(self) -> list[float]:

121 """

122 Calculates state data as a list of floats representing current iteration's observation.

123 Observations contains all input data refined to window size and couple of coefficients

124 giving an insight into current budget and orders situation.

125

126 Returns:

127 (list[float]): List with current observations for environment.

128 """

129

130 current_market_data = self.__data[self.__mode].iloc[self.current_iteration - self.__trading_consts.WINDOW_SIZE : self.current_iteration]

131 current_market_data_no_index = current_market_data.select_dtypes(include = [np.number])

132 normalized_current_market_data_values = pd.DataFrame(StandardScaler().fit_transform(current_market_data_no_index),

133 columns = current_market_data_no_index.columns).values

134 current_marked_data_list = normalized_current_market_data_values.ravel().tolist()

135

136 current_normalized_budget = 1.0 * self.__trading_data.current_budget / self.__trading_consts.INITIAL_BUDGET

137 current_profitability_coeff = self.__trading_consts.PROFITABILITY_FUNCTION(current_normalized_budget)

138 current_trades_occupancy_coeff = 1.0 * self.__trading_data.currently_placed_trades / self.__trading_consts.MAX_AMOUNT_OF_TRADES

139 current_no_trades_penalty_coeff = self.__trading_consts.PENALTY_FUNCTION(self.__trading_data.no_trades_placed_for)

140 current_inner_state_list = [current_profitability_coeff, current_trades_occupancy_coeff, current_no_trades_penalty_coeff]

141

142 return current_marked_data_list + current_inner_state_list

143

144 def set_mode(self, mode: str) -> None:

145 """

146 Sets the mode of the environment to either TRAIN_MODE or TEST_MODE.

147

148 Parameters:

149 mode (str): Mode to set for the environment.

150

151 Raises:

152 ValueError: If the provided mode is not valid.

153 """

154

155 if mode not in [TradingEnvironment.TRAIN_MODE, TradingEnvironment.TEST_MODE]:

156 raise ValueError(f"Invalid mode: {mode}. Use TradingEnvironment.TRAIN_MODE or TradingEnvironment.TEST_MODE.")

157 self.__mode = mode

158

159 def get_mode(self) -> str:

160 """

161 Mode getter.

162

163 Returns:

164 (str): Current mode of the environment.

165 """

166

167 return copy.copy(self.__mode)

168

169 def get_trading_data(self) -> SimpleNamespace:

170 """

171 Trading data getter.

172

173 Returns:

174 (SimpleNamespace): Copy of the namespace with all trading data.

175 """

176

177 return copy.copy(self.__trading_data)

178

179 def get_trading_consts(self) -> SimpleNamespace:

180 """

181 Trading constants getter.

182

183 Returns:

184 (SimpleNamespace): Copy of the namespace with all trading constants.

185 """

186

187 return copy.copy(self.__trading_consts)

188

189 def get_broker(self) -> Broker:

190 """

191 Broker getter.

192

193 Returns:

194 (Broker): Copy of the broker used by environment.

195 """

196

197 return copy.copy(self.__broker)

198

199 def get_environment_length(self) -> int:

200 """

201 Environment length getter.

202

203 Returns:

204 (Int): Length of environment.

205 """

206

207 return len(self.__data[self.__mode])

208

209 def get_environment_spatial_data_dimension(self) -> tuple[int, int]:

210 """

211 Environment spatial data dimensionality getter.

212

213 Returns:

214 (Int): Dimension of spatial data in environment.

215 """

216

217 return (self.__trading_consts.WINDOW_SIZE, self.__data[self.__mode].shape[1] - 1)

218

219 def get_data_for_iteration(self, columns: list[str], start: int, stop: int, step: int = 1) -> list[float]:

220 """

221 Data for certain iterations getter.

222

223 Parameters:

224 columns (list[str]): List of column names to extract from data.

225 start (int): Start iteration index.

226 stop (int): Stop iteration index.

227 step (int): Step between iterations. Default is 1.

228

229 Returns:

230 (list[float]): Copy of part of data with specified columns

231 over specified iterations.

232 """

233

234 return copy.copy(self.__data[self.__mode].loc[start:stop:step, columns].values.ravel().tolist())

235

236 def step(self, action: int) -> tuple[list[float], float, bool, dict]:

237 """

238 Performs specified action on environment. It results in generation of the new

239 observations. This function causes trades to be handled, reward to be calculated and

240 environment to be updated.

241

242 Parameters:

243 action (int): Number specifing action. Possible values are 0 for buy action,

244 1 for wait action and 2 for sell action.

245

246 Returns:

247 (tuple[list[float], float, bool, dict]): Tuple containing next observation

248 state, reward, finish indication and additional info dictionary.

249 """

250

251 self.current_iteration += 1

252 self.state = self.__prepare_state_data()

253

254 close_changes = self.__data[self.__mode].iloc[self.current_iteration - 2 : self.current_iteration]['close'].values

255 stock_change_coeff = 1 + (close_changes[1] - close_changes[0]) / close_changes[0]

256 closed_orders= self.__broker.update_orders(stock_change_coeff)

257

258 reward = self.__validator.validate_orders(closed_orders)

259 self.__trading_data.currently_placed_trades -= len(closed_orders)

260 self.__trading_data.current_budget += np.sum([trade.current_value for trade in closed_orders])

261 self.__trading_data.currently_invested -= np.sum([trade.initial_value for trade in closed_orders])

262

263 number_of_possible_trades = self.__trading_consts.MAX_AMOUNT_OF_TRADES - self.__trading_data.currently_placed_trades

264 money_to_trade = 0

265 if number_of_possible_trades > 0:

266 money_to_trade = 1.0 / number_of_possible_trades * self.__trading_data.current_budget

267

268 if action == 0:

269 is_buy_order = True

270 stop_loss = self.__trading_consts.SELL_STOP_LOSS

271 take_profit = self.__trading_consts.SELL_TAKE_PROFIT

272 elif action == 2:

273 is_buy_order = False

274 stop_loss = self.__trading_consts.BUY_STOP_LOSS

275 take_profit = self.__trading_consts.BUY_TAKE_PROFIT

276

277 if action != 1:

278 if number_of_possible_trades > 0:

279 self.__trading_data.current_budget -= money_to_trade

280 self.__trading_data.currently_invested += money_to_trade

281 self.__broker.place_order(money_to_trade, is_buy_order, stop_loss, take_profit)

282 self.__trading_data.currently_placed_trades += 1

283 self.__trading_data.no_trades_placed_for = 0

284 reward += self.__trading_consts.STATIC_REWARD_ADJUSTMENT

285 else:

286 self.__trading_data.no_trades_placed_for += 1

287 reward -= self.__trading_consts.STATIC_REWARD_ADJUSTMENT

288 else:

289 self.__trading_data.no_trades_placed_for += 1

290 if number_of_possible_trades == 0:

291 reward += self.__trading_consts.STATIC_REWARD_ADJUSTMENT

292

293 if number_of_possible_trades > 0:

294 reward *= (1 - self.__trading_consts.PENALTY_FUNCTION(self.__trading_data.no_trades_placed_for)) \

295 if reward > 0 else 1

296 if self.__trading_consts.PENALTY_STOPS < self.__trading_data.no_trades_placed_for:

297 reward -= self.__trading_consts.STATIC_REWARD_ADJUSTMENT

298

299 if (self.current_iteration >= len(self.__data[self.__mode]) or

300 self.__trading_data.current_budget > 10 * self.__trading_consts.INITIAL_BUDGET or

301 (self.__trading_data.current_budget + self.__trading_data.currently_invested) / self.__trading_consts.INITIAL_BUDGET < 0.8):

302 done = True

303 else:

304 done = False

305

306 info = {'coeff': stock_change_coeff,

307 'iteration': self.current_iteration,

308 'number_of_closed_orders': len(closed_orders),

309 'money_to_trade': money_to_trade,

310 'action': action,

311 'current_budget': self.__trading_data.current_budget,

312 'currently_invested': self.__trading_data.currently_invested,

313 'no_trades_placed_for': self.__trading_data.no_trades_placed_for,

314 'currently_placed_trades': self.__trading_data.currently_placed_trades}

315

316 return self.state, reward, done, info

317

318 def render(self) -> None:

319 """

320 Renders environment visualization. Will be implemented later.

321 """

322

323 #TODO: Visualization to be implemented

324 pass

325

326 def reset(self, randkey: Optional[int] = None) -> list[float]:

327 """

328 Resets environment. Used typically if environemnt is finished,

329 i.e. when ther is no more steps to be taken within environemnt

330 or finish conditions are fulfilled.

331

332 Parameters:

333 randkey (Optional[int]): Value indicating what iteration

334 should be trated as starting point after reset.

335

336 Returns:

337 (list[float]): Current iteration observation state.

338 """

339

340 if randkey is None:

341 randkey = random.randint(self.__trading_consts.WINDOW_SIZE, len(self.__data[self.__mode]) - 1)

342 self.__trading_data.current_budget = self.__trading_consts.INITIAL_BUDGET

343 self.__trading_data.currently_invested = 0

344 self.__trading_data.no_trades_placed_for = 0

345 self.__trading_data.currently_placed_trades = 0

346 self.__broker.reset()

347 self.current_iteration = randkey

348 self.state = self.__prepare_state_data()

349

350 return self.state

Coverage for source/environment/trading_environment.py: 93%

134 statements