docs/v1_0/Gym_8hh_source.html

#ifndef GYM_HH_

#define GYM_HH_


#include "Model.hh"

#include <boost/functional/hash.hpp>

#include <boost/archive/text_oarchive.hpp>

#include <boost/archive/text_iarchive.hpp>

#include <boost/serialization/vector.hpp>


using GymObservation = struct GymObservation {

  friend class boost::serialization::access;


  Node first;

  State second;

  short int trackerState;

  std::vector<State> third; // Additional automata states


  GymObservation(Node first, State second, short int trackerState=0, std::vector<State> third={}) : first(first),

                                                                                                    second(second),

                                                                                                    trackerState(trackerState),

                                                                                                    third(third) {}

  GymObservation(void) {

    first = 0;

    second = 0;

    trackerState = 0;

    third = {};

  }

  bool operator==(GymObservation const & other) const

  {

    if (third.size() != other.third.size()) {

      return false;

    }

    bool isEqual = true;

    for (size_t index = 0; index < third.size(); index++) {

      isEqual = isEqual && (third[index] == other.third[index]);

    }

    return (first == other.first &&

            second == other.second &&

            trackerState == other.trackerState &&

            isEqual);

  }

  bool operator<(GymObservation const & other) const

  {

    bool thirdL = false;

    if (third.size() < other.third.size()) {

      thirdL = true;

    } else if (third.size() > other.third.size()) {

      thirdL = false;

    } else {

      for (size_t index = 0; index < third.size(); index++) {

        if (third[index] < other.third[index]) {

          thirdL = true;

          break;

        }

      }

    }

    return (first < other.first) ||

           (first == other.first && second < other.second) ||

           (first == other.first && second == other.second && trackerState < other.trackerState) ||

           (first == other.first && second == other.second && trackerState == other.trackerState && thirdL);

  }

  template<class Archive>

  void serialize(Archive & ar, const unsigned int version) {

    (void)version;

    ar & first;

    ar & second;

    ar & trackerState;

    ar & third;

  }

};


struct GymObservationHasher {

  std::size_t operator()(GymObservation const & observation) const

  {

    std::size_t seed = 0;

    boost::hash_combine(seed, boost::hash_value(observation.first));

    boost::hash_combine(seed, boost::hash_value(observation.second));

    boost::hash_combine(seed, boost::hash_value(observation.trackerState));

    boost::hash_combine(seed, boost::hash_value(observation.third));

    return seed;

  }

};


struct GymObservationCompare {

  bool operator()(GymObservation const & first, GymObservation const & second) const

  {

    return first < second;

    // std::tuple<Node, State, short int>{first.first, first.second, first.trackerState} <

    //   std::tuple<Node, State, short int>{second.first, second.second, second.trackerState};

  }

};


using GymAction = struct GymAction {

  friend class boost::serialization::access;


  std::pair<Action, State> first;

  State second;

  std::vector<State> third;


  GymAction(std::pair<Action, State> first, State second, std::vector<State> third={}) : first(first),

                                                                                         second(second),

                                                                                         third(third) {}


  GymAction(void) {

    first = {0, 0};

    second = 0;

    third = {};

  }


  bool operator==(GymAction const & other) const {

    if (third.size() != other.third.size()) {

      return false;

    }

    bool isEqual = true;

    for (size_t index = 0; index < third.size(); index++) {

      isEqual = isEqual && (third[index] == other.third[index]);

    }

    return (first == other.first && second == other.second && isEqual);

  }


  bool operator<(GymAction const & other) const {

    bool thirdL = false;

    if (third.size() < other.third.size()) {

      thirdL = true;

    } else if (third.size() > other.third.size()) {

      thirdL = false;

    } else{

      for (size_t index = 0; index < third.size(); index++) {

        if (third[index] < other.third[index]) {

          thirdL = true;

          break;

        }

      }

    }

    return (first < other.first) ||

           (first == other.first && second < other.second) ||

           (first == other.first && second == other.second && thirdL);

  }


  template<class Archive>

  void serialize(Archive & ar, const unsigned int version) {

    (void)version;

    ar & first;

    ar & second;

    ar & third;

  }

};


using GymTransitionTuple = struct GymTransitionTuple {

  GymObservation S;

  GymAction A;

  Reward R;

  GymObservation sPrime;

};


// using GymAction = std::pair<std::pair<Action, State>, State>; /// ((model action, automaton next state), automaton epsilon)

static State constexpr invalidState = ~0;

static std::pair<Action, State> constexpr invalidAction = {-1, -1};

static GymObservation const terminalState = {(Node)-1, (State)-1, (short int)-1};


using Qtype = std::unordered_map<GymObservation, std::map<GymAction, double>, GymObservationHasher>;

using OrderedQtype = std::map<GymObservation, std::map<GymAction, double>, GymObservationCompare>;


struct GymOptions {

  GymOptions() {

    episodeLength = 30;

    zeta = 0.99;

    tolerance = std::vector<double>{0.01};

    rewardType = GymRewardTypes::default_type;

    priEpsilon = 0.001;

    concatActionsInCSV = false;

    fInvScale = 0.01;

    randInit = false;

  }

  enum class GymRewardTypes {

    default_type = 0,

    prism = 1,

    zeta_reach = 2,

    zeta_acc = 3,

    zeta_discount = 4,

    reward_on_acc = 5,

    multi_discount = 6,

    parity = 7,

    pri_tracker = 8,

    lexo = 9,

    avg = 10

  };

  unsigned int episodeLength;

  double zeta;

  double gammaB;

  std::vector<double> tolerance;

  double priEpsilon;

  GymRewardTypes rewardType;

  bool noResetOnAcc;

  bool terminalUpdateOnTimeLimit;

  bool p1NotStrategic;

  bool concatActionsInCSV;

  double fInvScale;

  double resetPenalty;

  bool randInit;

};


class Gym {

public:

  struct GymInfo{

    GymObservation observation;

    Reward reward;

    BDD letter;

    bool done;

    std::vector<GymAction> actions;

    Player player;

    bool discountOverride = false;

    double discount = 0.0;

    bool terminationOverride = false;

  };


  Gym(Model model, Parity dpw, GymOptions options, std::vector<Parity> exdpws={});

  void resetStats(void);

  void printStats(void) const;

  void printDotLearn(Qtype const & Q, std::string filename = std::string("-")) const;

  void printPrismLearn(Qtype const & Q, std::string filename = std::string("-")) const;

  std::map<double, std::map<unsigned int, double>> getProbabilityOfSat(Qtype const & Q, bool statsOn) const;

  Model QtoModel(Qtype const & Q, double tolerance, bool p1strategic, bool statsOn, double priEpsilon=0.001, unsigned int objIndex=0) const;

  std::vector<GymTransitionTuple> getParallelUpdates(GymObservation S, GymAction A, GymObservation sPrime);

  std::vector<GymAction> getAllActions(GymObservation obs);

  std::vector<GymAction> getActions(void);

  GymInfo reset(void);

  GymInfo step(GymAction);

  Model const & getModel(void) const {

    return model;

  }

  Player getPlayer(GymObservation const & observation) const;

  std::string toString(GymObservation const & observation) const;

  std::string toString(GymAction const & action) const;

  std::string toString(Qtype const & Q) const;

  void saveQ(Qtype const & Q, std::string saveType, std::string filename) const;

  void saveQ(Qtype const & Q1, Qtype const & Q2, std::string saveType, std::string filename) const;

  void saveQ(Qtype const & Q, double Rbar, std::string saveType, std::string filename) const;

  void loadQ(Qtype & Q, std::string saveType, std::string filename) const;

  void loadQ(Qtype & Q1, Qtype & Q2, std::string saveType, std::string filename) const;

  void loadQ(Qtype & Q, double & Rbar, std::string saveType, std::string filename) const;

  void saveStrat(Qtype & Q, std::string filename) const;

  void saveStratBDP(Qtype & Q, std::string filename) const;


private:

  std::size_t getProductID(void) const;


  Model model;

  Parity automaton;

  std::vector<Parity> exauto;

  GymOptions::GymRewardTypes rewardType;

  unsigned int episodeStep;

  unsigned int episodeLength;

  double zeta;

  double gammaB;

  double fScale;

  double resetPenalty;

  std::vector<double> tolerance;

  double priEpsilon;

  Priority maxPriority;

  int numAccept;

  int acceptingEps;

  int trappedEps;

  long numSteps;

  double cumulativeRew;

  long numEpisodes;

  bool resetOnAcc;

  bool noTerminalUpdate;

  bool p1strategic;

  bool concatActionsInCSV;

  bool randInit;

  Node modelState;

  State autoState;

  std::vector<State> exAutoState;

  short trackerState;

  StateSet traps;

};


#endif