Mungojerrie  1.0
Mungojerrie
Gym.hh
Go to the documentation of this file.
1 #ifndef GYM_HH_
2 #define GYM_HH_
3 
46 #include "Model.hh"
47 #include <boost/functional/hash.hpp>
48 #include <boost/archive/text_oarchive.hpp>
49 #include <boost/archive/text_iarchive.hpp>
50 #include <boost/serialization/vector.hpp>
51 
52 using GymObservation = struct GymObservation {
53  friend class boost::serialization::access;
54 
55  Node first;
56  State second;
57  short int trackerState;
58  std::vector<State> third; // Additional automata states
59 
60  GymObservation(Node first, State second, short int trackerState=0, std::vector<State> third={}) : first(first),
61  second(second),
63  third(third) {}
64  GymObservation(void) {
65  first = 0;
66  second = 0;
67  trackerState = 0;
68  third = {};
69  }
70  bool operator==(GymObservation const & other) const
71  {
72  if (third.size() != other.third.size()) {
73  return false;
74  }
75  bool isEqual = true;
76  for (size_t index = 0; index < third.size(); index++) {
77  isEqual = isEqual && (third[index] == other.third[index]);
78  }
79  return (first == other.first &&
80  second == other.second &&
81  trackerState == other.trackerState &&
82  isEqual);
83  }
84  bool operator<(GymObservation const & other) const
85  {
86  bool thirdL = false;
87  if (third.size() < other.third.size()) {
88  thirdL = true;
89  } else if (third.size() > other.third.size()) {
90  thirdL = false;
91  } else {
92  for (size_t index = 0; index < third.size(); index++) {
93  if (third[index] < other.third[index]) {
94  thirdL = true;
95  break;
96  }
97  }
98  }
99  return (first < other.first) ||
100  (first == other.first && second < other.second) ||
101  (first == other.first && second == other.second && trackerState < other.trackerState) ||
102  (first == other.first && second == other.second && trackerState == other.trackerState && thirdL);
103  }
104  template<class Archive>
105  void serialize(Archive & ar, const unsigned int version) {
106  (void)version;
107  ar & first;
108  ar & second;
109  ar & trackerState;
110  ar & third;
111  }
112 };
113 
115  std::size_t operator()(GymObservation const & observation) const
116  {
117  std::size_t seed = 0;
118  boost::hash_combine(seed, boost::hash_value(observation.first));
119  boost::hash_combine(seed, boost::hash_value(observation.second));
120  boost::hash_combine(seed, boost::hash_value(observation.trackerState));
121  boost::hash_combine(seed, boost::hash_value(observation.third));
122  return seed;
123  }
124 };
125 
127  bool operator()(GymObservation const & first, GymObservation const & second) const
128  {
129  return first < second;
130  // std::tuple<Node, State, short int>{first.first, first.second, first.trackerState} <
131  // std::tuple<Node, State, short int>{second.first, second.second, second.trackerState};
132  }
133 };
134 
135 using GymAction = struct GymAction {
136  friend class boost::serialization::access;
137 
138  std::pair<Action, State> first;
139  State second;
140  std::vector<State> third;
141 
142  GymAction(std::pair<Action, State> first, State second, std::vector<State> third={}) : first(first),
143  second(second),
144  third(third) {}
145 
146  GymAction(void) {
147  first = {0, 0};
148  second = 0;
149  third = {};
150  }
151 
152  bool operator==(GymAction const & other) const {
153  if (third.size() != other.third.size()) {
154  return false;
155  }
156  bool isEqual = true;
157  for (size_t index = 0; index < third.size(); index++) {
158  isEqual = isEqual && (third[index] == other.third[index]);
159  }
160  return (first == other.first && second == other.second && isEqual);
161  }
162 
163  bool operator<(GymAction const & other) const {
164  bool thirdL = false;
165  if (third.size() < other.third.size()) {
166  thirdL = true;
167  } else if (third.size() > other.third.size()) {
168  thirdL = false;
169  } else{
170  for (size_t index = 0; index < third.size(); index++) {
171  if (third[index] < other.third[index]) {
172  thirdL = true;
173  break;
174  }
175  }
176  }
177  return (first < other.first) ||
178  (first == other.first && second < other.second) ||
179  (first == other.first && second == other.second && thirdL);
180  }
181 
182  template<class Archive>
183  void serialize(Archive & ar, const unsigned int version) {
184  (void)version;
185  ar & first;
186  ar & second;
187  ar & third;
188  }
189 };
190 
191 using GymTransitionTuple = struct GymTransitionTuple {
192  GymObservation S;
193  GymAction A;
194  Reward R;
195  GymObservation sPrime;
196 };
197 
198 // using GymAction = std::pair<std::pair<Action, State>, State>; /// ((model action, automaton next state), automaton epsilon)
199 static State constexpr invalidState = ~0;
200 static std::pair<Action, State> constexpr invalidAction = {-1, -1};
201 static GymObservation const terminalState = {(Node)-1, (State)-1, (short int)-1};
202 
203 using Qtype = std::unordered_map<GymObservation, std::map<GymAction, double>, GymObservationHasher>;
204 using OrderedQtype = std::map<GymObservation, std::map<GymAction, double>, GymObservationCompare>;
205 
206 struct GymOptions {
207  GymOptions() {
208  episodeLength = 30;
209  zeta = 0.99;
210  tolerance = std::vector<double>{0.01};
211  rewardType = GymRewardTypes::default_type;
212  priEpsilon = 0.001;
213  concatActionsInCSV = false;
214  fInvScale = 0.01;
215  randInit = false;
216  }
217  enum class GymRewardTypes {
218  default_type = 0,
219  prism = 1,
221  zeta_reach = 2,
222  zeta_acc = 3,
224  zeta_discount = 4,
226  reward_on_acc = 5,
227  multi_discount = 6,
228  parity = 7,
229  pri_tracker = 8,
230  lexo = 9,
231  avg = 10
232  };
233  unsigned int episodeLength;
234  double zeta;
235  double gammaB;
236  std::vector<double> tolerance;
237  double priEpsilon;
238  GymRewardTypes rewardType;
246  bool concatActionsInCSV;
247  double fInvScale;
248  double resetPenalty;
249  bool randInit;
250 };
251 
253 class Gym {
254 public:
256  struct GymInfo{
257  GymObservation observation;
258  Reward reward;
259  BDD letter;
260  bool done;
261  std::vector<GymAction> actions;
262  Player player;
263  bool discountOverride = false;
264  double discount = 0.0;
265  bool terminationOverride = false;
266  };
267 
269  Gym(Model model, Parity dpw, GymOptions options, std::vector<Parity> exdpws={});
271  void resetStats(void);
273  void printStats(void) const;
277  void printDotLearn(Qtype const & Q, std::string filename = std::string("-")) const;
281  void printPrismLearn(Qtype const & Q, std::string filename = std::string("-")) const;
287  std::map<double, std::map<unsigned int, double>> getProbabilityOfSat(Qtype const & Q, bool statsOn) const;
298  Model QtoModel(Qtype const & Q, double tolerance, bool p1strategic, bool statsOn, double priEpsilon=0.001, unsigned int objIndex=0) const;
300  std::vector<GymTransitionTuple> getParallelUpdates(GymObservation S, GymAction A, GymObservation sPrime);
302  std::vector<GymAction> getAllActions(GymObservation obs);
304  std::vector<GymAction> getActions(void);
306  GymInfo reset(void);
308  GymInfo step(GymAction);
310  Model const & getModel(void) const {
311  return model;
312  }
314  Player getPlayer(GymObservation const & observation) const;
316  std::string toString(GymObservation const & observation) const;
318  std::string toString(GymAction const & action) const;
320  std::string toString(Qtype const & Q) const;
322  void saveQ(Qtype const & Q, std::string saveType, std::string filename) const;
324  void saveQ(Qtype const & Q1, Qtype const & Q2, std::string saveType, std::string filename) const;
326  void saveQ(Qtype const & Q, double Rbar, std::string saveType, std::string filename) const;
328  void loadQ(Qtype & Q, std::string saveType, std::string filename) const;
330  void loadQ(Qtype & Q1, Qtype & Q2, std::string saveType, std::string filename) const;
332  void loadQ(Qtype & Q, double & Rbar, std::string saveType, std::string filename) const;
334  void saveStrat(Qtype & Q, std::string filename) const;
336  void saveStratBDP(Qtype & Q, std::string filename) const;
337 
338 private:
342  std::size_t getProductID(void) const;
343 
344  Model model;
345  Parity automaton;
346  std::vector<Parity> exauto;
347  GymOptions::GymRewardTypes rewardType;
348  unsigned int episodeStep;
349  unsigned int episodeLength;
350  double zeta;
351  double gammaB;
352  double fScale;
353  double resetPenalty;
354  std::vector<double> tolerance;
355  double priEpsilon;
356  Priority maxPriority;
357  int numAccept;
358  int acceptingEps;
359  int trappedEps;
360  long numSteps;
361  double cumulativeRew;
362  long numEpisodes;
363  bool resetOnAcc;
364  bool noTerminalUpdate;
365  bool p1strategic;
366  bool concatActionsInCSV;
367  bool randInit;
368  Node modelState;
369  State autoState;
370  std::vector<State> exAutoState;
371  short trackerState;
372  StateSet traps;
373 };
374 
375 #endif
Gym::GymInfo
Class for encapsulation of return values of Gym.
Definition: Gym.hh:256
GymObservationCompare
Definition: Gym.hh:126
GymOptions::GymRewardTypes::zeta_acc
@ zeta_acc
Player
int Player
Type of players.
Definition: Model.hh:68
GymOptions::GymRewardTypes::multi_discount
@ multi_discount
GymOptions::GymRewardTypes::prism
@ prism
GymOptions
Definition: Gym.hh:206
GymOptions::noResetOnAcc
bool noResetOnAcc
Definition: Gym.hh:239
Gym::toString
std::string toString(GymObservation const &observation) const
Returns a string representing GymObservation.
Definition: Gym.cc:1037
Gym::resetStats
void resetStats(void)
Resets statistics.
Definition: Gym.cc:215
Gym::reset
GymInfo reset(void)
Resets environment.
Definition: Gym.cc:703
GymOptions::GymRewardTypes::parity
@ parity
Gym::getAllActions
std::vector< GymAction > getAllActions(GymObservation obs)
Returns all available actions from a particular state.
Definition: Gym.cc:580
GymOptions::GymRewardTypes::default_type
@ default_type
GymOptions::GymRewardTypes
GymRewardTypes
Definition: Gym.hh:217
Node
unsigned long Node
Type of nodes.
Definition: Model.hh:58
GymObservationHasher
Definition: Gym.hh:114
Gym::printPrismLearn
void printPrismLearn(Qtype const &Q, std::string filename=std::string("-")) const
Prints the PRISM representation of learned pruned product graph.
Definition: Gym.cc:272
Gym::printDotLearn
void printDotLearn(Qtype const &Q, std::string filename=std::string("-")) const
Prints the dot representation of learned pruned product graph.
Definition: Gym.cc:250
Gym::saveQ
void saveQ(Qtype const &Q, std::string saveType, std::string filename) const
Saves Q-table.
Gym::getProbabilityOfSat
std::map< double, std::map< unsigned int, double > > getProbabilityOfSat(Qtype const &Q, bool statsOn) const
Returns the probability of satisfaction in the learned pruned product graph for each tolerance and pr...
Definition: Gym.cc:294
Model.hh
Markov Decision Processes and Stochastic Games.
GymOptions::GymRewardTypes::reward_on_acc
@ reward_on_acc
Gym::step
GymInfo step(GymAction)
Takes step in environment according to action.
Definition: Gym.cc:753
GymOptions::GymRewardTypes::zeta_reach
@ zeta_reach
Gym::saveStratBDP
void saveStratBDP(Qtype &Q, std::string filename) const
Saves mixed strategy from Q-table to file for all visited states.
Definition: Gym.cc:1256
GymOptions::GymRewardTypes::pri_tracker
@ pri_tracker
Gym
Class for a learning interface.
Definition: Gym.hh:253
GymOptions::GymRewardTypes::lexo
@ lexo
Gym::getParallelUpdates
std::vector< GymTransitionTuple > getParallelUpdates(GymObservation S, GymAction A, GymObservation sPrime)
Returns all transition tuples that were possible in the product given that the MDP transition is fixe...
Definition: Gym.cc:620
Gym::getActions
std::vector< GymAction > getActions(void)
Returns available actions from current state, excluding special actions.
Definition: Gym.cc:678
Model
Class for MDPs and related structures.
Definition: Model.hh:82
GymOptions::GymRewardTypes::zeta_discount
@ zeta_discount
Gym::loadQ
void loadQ(Qtype &Q, std::string saveType, std::string filename) const
Loads Q-table.
Gym::Gym
Gym(Model model, Parity dpw, GymOptions options, std::vector< Parity > exdpws={})
Constructor from model.
Definition: Gym.cc:54
GymOptions::p1NotStrategic
bool p1NotStrategic
Definition: Gym.hh:243
Gym::QtoModel
Model QtoModel(Qtype const &Q, double tolerance, bool p1strategic, bool statsOn, double priEpsilon=0.001, unsigned int objIndex=0) const
Constructs Markov chain from Q-table.
Definition: Gym.cc:320
Reward
double Reward
Type of rewards.
Definition: Model.hh:75
Gym::getModel
const Model & getModel(void) const
Returns a reference to model.
Definition: Gym.hh:310
GymOptions::terminalUpdateOnTimeLimit
bool terminalUpdateOnTimeLimit
Definition: Gym.hh:241
Gym::saveStrat
void saveStrat(Qtype &Q, std::string filename) const
Saves mixed strategy from Q-table to CSV file for all visited states.
Definition: Gym.cc:1183
Parity
Class of parity automata.
Definition: Parity.hh:85
Gym::printStats
void printStats(void) const
Prints statistics about a run.
Definition: Gym.cc:225
trackerState
short int trackerState
Automaton state.
Definition: Gym.hh:57
second
State second
Model state.
Definition: Gym.hh:56
third
std::vector< State > third
Priority-tracker state (0 if unused)
Definition: Gym.hh:58
Gym::getPlayer
Player getPlayer(GymObservation const &observation) const
Returns the player controlling state GymObservation.
Definition: Gym.cc:1030