1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 2 /* */ 3 /* This file is part of the program and library */ 4 /* SCIP --- Solving Constraint Integer Programs */ 5 /* */ 6 /* Copyright (c) 2002-2023 Zuse Institute Berlin (ZIB) */ 7 /* */ 8 /* Licensed under the Apache License, Version 2.0 (the "License"); */ 9 /* you may not use this file except in compliance with the License. */ 10 /* You may obtain a copy of the License at */ 11 /* */ 12 /* http://www.apache.org/licenses/LICENSE-2.0 */ 13 /* */ 14 /* Unless required by applicable law or agreed to in writing, software */ 15 /* distributed under the License is distributed on an "AS IS" BASIS, */ 16 /* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ 17 /* See the License for the specific language governing permissions and */ 18 /* limitations under the License. */ 19 /* */ 20 /* You should have received a copy of the Apache-2.0 license */ 21 /* along with SCIP; see the file LICENSE. If not visit scipopt.org. */ 22 /* */ 23 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 24 25 /**@file pub_bandit_epsgreedy.h 26 * @ingroup PublicBanditMethods 27 * @brief public methods for the epsilon greedy bandit selector 28 * @author Gregor Hendel 29 */ 30 31 /*---+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+----0----+----1----+----2*/ 32 33 #ifndef SRC_SCIP_PUB_BANDIT_EPSGREEDY_H_ 34 #define SRC_SCIP_PUB_BANDIT_EPSGREEDY_H_ 35 36 37 #include "scip/def.h" 38 #include "scip/type_scip.h" 39 #include "scip/type_bandit.h" 40 41 #ifdef __cplusplus 42 extern "C" { 43 #endif 44 45 /**@addtogroup PublicBanditMethods 46 * 47 * ## Epsilon greedy 48 * 49 * Epsilon greedy is a randomized algorithm for the multi-armed bandit problem. 50 * 51 * In every iteration, it either 52 * selects an action uniformly at random with 53 * probability \f$ \varepsilon_t\f$ 54 * or it greedily exploits the best action seen so far with 55 * probability \f$ 1 - \varepsilon_t \f$. 56 * In this implementation, \f$ \varepsilon_t \f$ decreases over time 57 * (number of selections performed), controlled by the epsilon parameter. 58 * 59 * @{ 60 */ 61 62 /** create and resets an epsilon greedy bandit algorithm */ 63 SCIP_EXPORT 64 SCIP_RETCODE SCIPcreateBanditEpsgreedy( 65 SCIP* scip, /**< SCIP data structure */ 66 SCIP_BANDIT** epsgreedy, /**< pointer to store the epsilon greedy bandit algorithm */ 67 SCIP_Real* priorities, /**< nonnegative priorities for each action, or NULL if not needed */ 68 SCIP_Real eps, /**< parameter to increase probability for exploration between all actions */ 69 SCIP_Bool usemodification, /**< TRUE if modified eps greedy should be used */ 70 SCIP_Bool preferrecent, /**< should the weights be updated in an exponentially decaying way? */ 71 SCIP_Real decayfactor, /**< the factor to reduce the weight of older observations if exponential decay is enabled */ 72 int avglim, /**< nonnegative limit on observation number before the exponential decay starts, 73 * only relevant if exponential decay is enabled 74 */ 75 int nactions, /**< the number of possible actions */ 76 unsigned int initseed /**< initial seed for random number generation */ 77 ); 78 79 /** get weights array of epsilon greedy bandit algorithm */ 80 SCIP_EXPORT 81 SCIP_Real* SCIPgetWeightsEpsgreedy( 82 SCIP_BANDIT* epsgreedy /**< epsilon greedy bandit algorithm */ 83 ); 84 85 /** set epsilon parameter of epsilon greedy bandit algorithm */ 86 SCIP_EXPORT 87 void SCIPsetEpsilonEpsgreedy( 88 SCIP_BANDIT* epsgreedy, /**< epsilon greedy bandit algorithm */ 89 SCIP_Real eps /**< parameter to increase probability for exploration between all actions */ 90 ); 91 92 /** @} */ 93 94 95 96 #ifdef __cplusplus 97 } 98 #endif 99 100 #endif 101