Coverage for source/environment/trading_environment.py: 23%

155 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-06-06 12:00 +0000

1# environment/trading environment.py 

2 

3# global imports 

4from gym import Env 

5from gym.spaces import Discrete, Box 

6import pandas as pd 

7import numpy as np 

8from sklearn.preprocessing import StandardScaler 

9import math 

10import random 

11from types import SimpleNamespace 

12from typing import Optional 

13import copy 

14from tensorflow.keras.utils import to_categorical 

15 

16# local imports 

17from source.environment import Broker 

18from source.environment import RewardValidatorBase 

19from source.environment import LabelAnnotatorBase 

20 

21class TradingEnvironment(Env): 

22 """ 

23 Implements stock market environment that actor can perform actions (place orders) in. 

24 It is used to train Neural Network models with reinforcement learning approach. Can be 

25 configure to award points and impose a penalty in a several way. 

26 """ 

27 

28 TRAIN_MODE = 'train' 

29 TEST_MODE = 'test' 

30 

31 def __init__(self, data_path: str, initial_budget: float, max_amount_of_trades: int, window_size: int, 

32 validator: RewardValidatorBase, label_annotator: LabelAnnotatorBase, sell_stop_loss: float, 

33 sell_take_profit: float, buy_stop_loss: float, buy_take_profit: float, test_ratio: float = 0.2, 

34 penalty_starts: int = 0, penalty_stops: int = 10, static_reward_adjustment: float = 1) -> None: 

35 """ 

36 Class constructor. Allows to define all crucial constans, reward validation methods, 

37 environmental penalty policies, etc. 

38 

39 Parameters: 

40 data_path (str): Path to CSV data that should be used as enivronmental stock market. 

41 initial_budget (float): Initial budget constant for trader to start from. 

42 max_amount_of_trades (int): Max amount of trades that can be ongoing at the same time. 

43 Seting this constant prevents traders from placing orders randomly and defines 

44 amount of money that can be assigned to a single order at certain iteration. 

45 window_size (int): Constant defining how far in the past trader will be able to look 

46 into at certain iteration. 

47 validator (RewardValidatorBase): Validator implementing policy used to award points 

48 for closed trades. 

49 label_annotator (LabelAnnotatorBase): Annotator implementing policy used to label 

50 data with target values. It is used to provide supervised agents with information 

51 about what is the target class value for certain iteration. 

52 sell_stop_loss (float): Constant used to define losing boundary at which sell order 

53 (short) is closed. 

54 sell_take_profit (float): Constant used to define winning boundary at which sell order 

55 (short) is closed. 

56 buy_stop_loss (float): Constant used to define losing boundary at which buy order 

57 (long) is closed. 

58 buy_take_profit (float): Constant used to define winning boundary at which buy order 

59 (long) is closed. 

60 test_ratio (float): Ratio of data that should be used for testing purposes. 

61 penalty_starts (int): Constant defining how many trading periods can trader go without placing 

62 an order until penalty is imposed. Penalty at range between start and stop constant 

63 is calculated as percentile of positive reward, and subtracted from the actual reward. 

64 penalty_stops (int): Constant defining at which trading period penalty will no longer be increased. 

65 Reward for trading periods exceeding penalty stop constant will equal minus static reward adjustment. 

66 static_reward_adjustment (float): Constant use to penalize trader for bad choices or 

67 reward it for good one. 

68 """ 

69 

70 if test_ratio < 0.0 or test_ratio >= 1.0: 

71 raise ValueError(f"Invalid test_ratio: {test_ratio}. It should be in range [0, 1).") 

72 

73 self.__data: dict[pd.DataFrame, pd.DataFrame] = self.__load_data(data_path, test_ratio) 

74 self.__mode = TradingEnvironment.TRAIN_MODE 

75 self.__broker: Broker = Broker() 

76 self.__validator: RewardValidatorBase = validator 

77 self.__label_annotator: LabelAnnotatorBase = label_annotator 

78 

79 self.__trading_data: SimpleNamespace = SimpleNamespace() 

80 self.__trading_data.current_budget: float = initial_budget 

81 self.__trading_data.currently_invested: float = 0 

82 self.__trading_data.no_trades_placed_for: int = 0 

83 self.__trading_data.currently_placed_trades: int = 0 

84 

85 self.__trading_consts = SimpleNamespace() 

86 self.__trading_consts.INITIAL_BUDGET: float = initial_budget 

87 self.__trading_consts.MAX_AMOUNT_OF_TRADES: int = max_amount_of_trades 

88 self.__trading_consts.WINDOW_SIZE: int = window_size 

89 self.__trading_consts.SELL_STOP_LOSS: float = sell_stop_loss 

90 self.__trading_consts.SELL_TAKE_PROFIT: float = sell_take_profit 

91 self.__trading_consts.BUY_STOP_LOSS: float = buy_stop_loss 

92 self.__trading_consts.BUY_TAKE_PROFIT: float = buy_take_profit 

93 self.__trading_consts.STATIC_REWARD_ADJUSTMENT: float = static_reward_adjustment 

94 self.__trading_consts.PENALTY_STARTS: int = penalty_starts 

95 self.__trading_consts.PENALTY_STOPS: int = penalty_stops 

96 self.__trading_consts.PROFITABILITY_FUNCTION = lambda x: -1.0 * math.exp(-x + 1) + 1 

97 self.__trading_consts.PENALTY_FUNCTION = lambda x: \ 

98 min(1, 1 - math.tanh(-3.0 * (x - penalty_stops) / (penalty_stops - penalty_starts))) 

99 self.__trading_consts.OUTPUT_CLASSES: int = vars(self.__label_annotator.get_output_classes()) 

100 

101 self.current_iteration: int = self.__trading_consts.WINDOW_SIZE 

102 self.state: list[float] = self.__prepare_state_data() 

103 self.action_space: Discrete = Discrete(3) 

104 self.observation_space: Box = Box(low = np.ones(len(self.state)) * -3, 

105 high = np.ones(len(self.state)) * 3, 

106 dtype=np.float64) 

107 

108 def __load_data(self, data_path: str, test_size: float) -> dict[pd.DataFrame, pd.DataFrame]: 

109 """ 

110 Loads data from CSV file and splits it into training and testing sets based on the 

111 specified test size ratio. 

112 

113 Parameters: 

114 data_path (str): Path to the CSV file containing the stock market data. 

115 test_size (float): Ratio of the data to be used for testing. 

116 

117 Returns: 

118 (dict[pd.DataFrame, pd.DataFrame]): Dictionary containing training and testing data frames. 

119 """ 

120 

121 data_frame = pd.read_csv(data_path) 

122 dividing_index = int(len(data_frame) * (1 - test_size)) 

123 

124 return { 

125 TradingEnvironment.TRAIN_MODE: data_frame.iloc[:dividing_index].reset_index(drop=True), 

126 TradingEnvironment.TEST_MODE: data_frame.iloc[dividing_index:].reset_index(drop=True) 

127 } 

128 

129 def __prepare_labeled_data(self) -> pd.DataFrame: 

130 """""" 

131 

132 new_rows = [] 

133 for i in range(self.current_iteration, self.get_environment_length()): 

134 data_row = self.__prepare_state_data(slice(i - self.__trading_consts.WINDOW_SIZE, i), include_trading_data = False) 

135 new_rows.append(data_row) 

136 

137 new_data = pd.DataFrame(new_rows, columns=[f"feature_{i}" for i in range(len(new_rows[0]))]) 

138 labels = self.__label_annotator.annotate(self.__data[self.__mode]).shift(-self.current_iteration) 

139 

140 return new_data, labels.dropna() 

141 

142 def __prepare_state_data(self, index: Optional[slice] = None, include_trading_data: bool = True) -> list[float]: 

143 """ 

144 Calculates state data as a list of floats representing current iteration's observation. 

145 Observations contains all input data refined to window size and couple of coefficients 

146 giving an insight into current budget and orders situation. 

147 

148 Returns: 

149 (list[float]): List with current observations for environment. 

150 """ 

151 

152 if index is None: 

153 index = slice(self.current_iteration - self.__trading_consts.WINDOW_SIZE, self.current_iteration) 

154 

155 current_market_data = self.__data[self.__mode].iloc[index] 

156 current_market_data_no_index = current_market_data.select_dtypes(include = [np.number]) 

157 normalized_current_market_data_values = pd.DataFrame(StandardScaler().fit_transform(current_market_data_no_index), 

158 columns = current_market_data_no_index.columns).values 

159 current_marked_data_list = normalized_current_market_data_values.ravel().tolist() 

160 

161 if include_trading_data: 

162 current_normalized_budget = 1.0 * self.__trading_data.current_budget / self.__trading_consts.INITIAL_BUDGET 

163 current_profitability_coeff = self.__trading_consts.PROFITABILITY_FUNCTION(current_normalized_budget) 

164 current_trades_occupancy_coeff = 1.0 * self.__trading_data.currently_placed_trades / self.__trading_consts.MAX_AMOUNT_OF_TRADES 

165 current_no_trades_penalty_coeff = self.__trading_consts.PENALTY_FUNCTION(self.__trading_data.no_trades_placed_for) 

166 current_inner_state_list = [current_profitability_coeff, current_trades_occupancy_coeff, current_no_trades_penalty_coeff] 

167 current_marked_data_list += current_inner_state_list 

168 

169 return current_marked_data_list 

170 

171 def set_mode(self, mode: str) -> None: 

172 """ 

173 Sets the mode of the environment to either TRAIN_MODE or TEST_MODE. 

174 

175 Parameters: 

176 mode (str): Mode to set for the environment. 

177 

178 Raises: 

179 ValueError: If the provided mode is not valid. 

180 """ 

181 

182 if mode not in [TradingEnvironment.TRAIN_MODE, TradingEnvironment.TEST_MODE]: 

183 raise ValueError(f"Invalid mode: {mode}. Use TradingEnvironment.TRAIN_MODE or TradingEnvironment.TEST_MODE.") 

184 self.__mode = mode 

185 

186 def get_mode(self) -> str: 

187 """ 

188 Mode getter. 

189 

190 Returns: 

191 (str): Current mode of the environment. 

192 """ 

193 

194 return copy.copy(self.__mode) 

195 

196 def get_trading_data(self) -> SimpleNamespace: 

197 """ 

198 Trading data getter. 

199 

200 Returns: 

201 (SimpleNamespace): Copy of the namespace with all trading data. 

202 """ 

203 

204 return copy.copy(self.__trading_data) 

205 

206 def get_trading_consts(self) -> SimpleNamespace: 

207 """ 

208 Trading constants getter. 

209 

210 Returns: 

211 (SimpleNamespace): Copy of the namespace with all trading constants. 

212 """ 

213 

214 return copy.copy(self.__trading_consts) 

215 

216 def get_broker(self) -> Broker: 

217 """ 

218 Broker getter. 

219 

220 Returns: 

221 (Broker): Copy of the broker used by environment. 

222 """ 

223 

224 return copy.copy(self.__broker) 

225 

226 def get_environment_length(self) -> int: 

227 """ 

228 Environment length getter. 

229 

230 Returns: 

231 (Int): Length of environment. 

232 """ 

233 

234 return len(self.__data[self.__mode]) 

235 

236 def get_environment_spatial_data_dimension(self) -> tuple[int, int]: 

237 """ 

238 Environment spatial data dimensionality getter. 

239 

240 Returns: 

241 (Int): Dimension of spatial data in environment. 

242 """ 

243 

244 return (self.__trading_consts.WINDOW_SIZE, self.__data[self.__mode].shape[1] - 1) 

245 

246 def get_labeled_data(self) -> tuple[np.ndarray, np.ndarray]: 

247 """""" 

248 

249 input_data, output_data = self.__prepare_labeled_data() 

250 input_data = np.expand_dims(np.array(input_data), axis = 1) 

251 output_data = to_categorical(np.array(output_data), 

252 num_classes = len(self.__trading_consts.OUTPUT_CLASSES)) 

253 return copy.copy((input_data, output_data)) 

254 

255 def get_data_for_iteration(self, columns: list[str], start: int, stop: int, step: int = 1) -> list[float]: 

256 """ 

257 Data for certain iterations getter. 

258 

259 Parameters: 

260 columns (list[str]): List of column names to extract from data. 

261 start (int): Start iteration index. 

262 stop (int): Stop iteration index. 

263 step (int): Step between iterations. Default is 1. 

264 

265 Returns: 

266 (list[float]): Copy of part of data with specified columns 

267 over specified iterations. 

268 """ 

269 

270 return copy.copy(self.__data[self.__mode].loc[start:stop:step, columns].values.ravel().tolist()) 

271 

272 def step(self, action: int) -> tuple[list[float], float, bool, dict]: 

273 """ 

274 Performs specified action on environment. It results in generation of the new 

275 observations. This function causes trades to be handled, reward to be calculated and 

276 environment to be updated. 

277 

278 Parameters: 

279 action (int): Number specifing action. Possible values are 0 for buy action, 

280 1 for wait action and 2 for sell action. 

281 

282 Returns: 

283 (tuple[list[float], float, bool, dict]): Tuple containing next observation 

284 state, reward, finish indication and additional info dictionary. 

285 """ 

286 

287 self.current_iteration += 1 

288 self.state = self.__prepare_state_data() 

289 

290 close_changes = self.__data[self.__mode].iloc[self.current_iteration - 2 : self.current_iteration]['close'].values 

291 stock_change_coeff = 1 + (close_changes[1] - close_changes[0]) / close_changes[0] 

292 closed_orders= self.__broker.update_orders(stock_change_coeff) 

293 

294 reward = self.__validator.validate_orders(closed_orders) 

295 self.__trading_data.currently_placed_trades -= len(closed_orders) 

296 self.__trading_data.current_budget += np.sum([trade.current_value for trade in closed_orders]) 

297 self.__trading_data.currently_invested -= np.sum([trade.initial_value for trade in closed_orders]) 

298 

299 number_of_possible_trades = self.__trading_consts.MAX_AMOUNT_OF_TRADES - self.__trading_data.currently_placed_trades 

300 money_to_trade = 0 

301 if number_of_possible_trades > 0: 

302 money_to_trade = 1.0 / number_of_possible_trades * self.__trading_data.current_budget 

303 

304 if action == 0: 

305 is_buy_order = True 

306 stop_loss = self.__trading_consts.SELL_STOP_LOSS 

307 take_profit = self.__trading_consts.SELL_TAKE_PROFIT 

308 elif action == 2: 

309 is_buy_order = False 

310 stop_loss = self.__trading_consts.BUY_STOP_LOSS 

311 take_profit = self.__trading_consts.BUY_TAKE_PROFIT 

312 

313 if action != 1: 

314 if number_of_possible_trades > 0: 

315 self.__trading_data.current_budget -= money_to_trade 

316 self.__trading_data.currently_invested += money_to_trade 

317 self.__broker.place_order(money_to_trade, is_buy_order, stop_loss, take_profit) 

318 self.__trading_data.currently_placed_trades += 1 

319 self.__trading_data.no_trades_placed_for = 0 

320 reward += self.__trading_consts.STATIC_REWARD_ADJUSTMENT 

321 else: 

322 self.__trading_data.no_trades_placed_for += 1 

323 reward -= self.__trading_consts.STATIC_REWARD_ADJUSTMENT 

324 else: 

325 self.__trading_data.no_trades_placed_for += 1 

326 if number_of_possible_trades == 0: 

327 reward += self.__trading_consts.STATIC_REWARD_ADJUSTMENT 

328 

329 if number_of_possible_trades > 0: 

330 reward *= (1 - self.__trading_consts.PENALTY_FUNCTION(self.__trading_data.no_trades_placed_for)) \ 

331 if reward > 0 else 1 

332 if self.__trading_consts.PENALTY_STOPS < self.__trading_data.no_trades_placed_for: 

333 reward -= self.__trading_consts.STATIC_REWARD_ADJUSTMENT 

334 

335 if (self.current_iteration >= self.get_environment_length() - 1 or 

336 self.__trading_data.current_budget > 10 * self.__trading_consts.INITIAL_BUDGET or 

337 (self.__trading_data.current_budget + self.__trading_data.currently_invested) / self.__trading_consts.INITIAL_BUDGET < 0.8): 

338 done = True 

339 else: 

340 done = False 

341 

342 info = {'coeff': stock_change_coeff, 

343 'iteration': self.current_iteration, 

344 'number_of_closed_orders': len(closed_orders), 

345 'money_to_trade': money_to_trade, 

346 'action': action, 

347 'current_budget': self.__trading_data.current_budget, 

348 'currently_invested': self.__trading_data.currently_invested, 

349 'no_trades_placed_for': self.__trading_data.no_trades_placed_for, 

350 'currently_placed_trades': self.__trading_data.currently_placed_trades} 

351 

352 return self.state, reward, done, info 

353 

354 def render(self) -> None: 

355 """ 

356 Renders environment visualization. Will be implemented later. 

357 """ 

358 

359 #TODO: Visualization to be implemented 

360 pass 

361 

362 def reset(self, randkey: Optional[int] = None) -> list[float]: 

363 """ 

364 Resets environment. Used typically if environemnt is finished, 

365 i.e. when ther is no more steps to be taken within environemnt 

366 or finish conditions are fulfilled. 

367 

368 Parameters: 

369 randkey (Optional[int]): Value indicating what iteration 

370 should be trated as starting point after reset. 

371 

372 Returns: 

373 (list[float]): Current iteration observation state. 

374 """ 

375 

376 if randkey is None: 

377 randkey = random.randint(self.__trading_consts.WINDOW_SIZE, self.get_environment_length() - 1) 

378 self.__trading_data.current_budget = self.__trading_consts.INITIAL_BUDGET 

379 self.__trading_data.currently_invested = 0 

380 self.__trading_data.no_trades_placed_for = 0 

381 self.__trading_data.currently_placed_trades = 0 

382 self.__broker.reset() 

383 self.current_iteration = randkey 

384 self.state = self.__prepare_state_data() 

385 

386 return self.state