1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 2 /* */ 3 /* This file is part of the program and library */ 4 /* SCIP --- Solving Constraint Integer Programs */ 5 /* */ 6 /* Copyright (c) 2002-2023 Zuse Institute Berlin (ZIB) */ 7 /* */ 8 /* Licensed under the Apache License, Version 2.0 (the "License"); */ 9 /* you may not use this file except in compliance with the License. */ 10 /* You may obtain a copy of the License at */ 11 /* */ 12 /* http://www.apache.org/licenses/LICENSE-2.0 */ 13 /* */ 14 /* Unless required by applicable law or agreed to in writing, software */ 15 /* distributed under the License is distributed on an "AS IS" BASIS, */ 16 /* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ 17 /* See the License for the specific language governing permissions and */ 18 /* limitations under the License. */ 19 /* */ 20 /* You should have received a copy of the Apache-2.0 license */ 21 /* along with SCIP; see the file LICENSE. If not visit scipopt.org. */ 22 /* */ 23 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 24 25 /**@file pub_bandit_exp3.h 26 * @ingroup PublicBanditMethods 27 * @brief public methods for Exp.3 28 * @author Gregor Hendel 29 */ 30 31 /*---+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+----0----+----1----+----2*/ 32 33 #ifndef SRC_SCIP_PUB_BANDIT_EXP3_H_ 34 #define SRC_SCIP_PUB_BANDIT_EXP3_H_ 35 36 #include "scip/def.h" 37 #include "scip/type_bandit.h" 38 #include "scip/type_retcode.h" 39 #include "scip/type_scip.h" 40 41 #ifdef __cplusplus 42 extern "C" { 43 #endif 44 45 /**@addtogroup PublicBanditMethods 46 * 47 * ## Exp.3 48 * 49 * Exp.3 is a randomized selection method for the multi-armed bandit problem 50 * 51 * Exp3 maintains a probability distribution 52 * according to which an action is drawn 53 * in every iteration. 54 * The probability distribution is a mixture between 55 * a uniform distribution and a softmax distribution 56 * based on the cumulative rewards of the actions. 57 * The weight of the uniform distribution in the mixture 58 * is controlled by the parameter \f$ \gamma \f$, ie., 59 * setting \f$ \gamma = 1\f$ uses a uniform distribution 60 * in every selection step. 61 * The cumulative reward for the actions can be 62 * fine-tuned by adding a general bias for all actions. 63 * The bias is given by the parameter \f$ \beta \f$. 64 * 65 * @{ 66 */ 67 68 /** creates and resets an Exp.3 bandit algorithm using \p scip pointer */ 69 SCIP_EXPORT 70 SCIP_RETCODE SCIPcreateBanditExp3( 71 SCIP* scip, /**< SCIP data structure */ 72 SCIP_BANDIT** exp3, /**< pointer to store bandit algorithm */ 73 SCIP_Real* priorities, /**< nonnegative priorities for each action, or NULL if not needed */ 74 SCIP_Real gammaparam, /**< weight between uniform (gamma ~ 1) and weight driven (gamma ~ 0) probability distribution */ 75 SCIP_Real beta, /**< gain offset between 0 and 1 at every observation */ 76 int nactions, /**< the positive number of actions for this bandit algorithm */ 77 unsigned int initseed /**< initial seed for random number generation */ 78 ); 79 80 /** set gamma parameter of Exp.3 bandit algorithm to increase weight of uniform distribution */ 81 SCIP_EXPORT 82 void SCIPsetGammaExp3( 83 SCIP_BANDIT* exp3, /**< bandit algorithm */ 84 SCIP_Real gammaparam /**< weight between uniform (gamma ~ 1) and weight driven (gamma ~ 0) probability distribution */ 85 ); 86 87 /** set beta parameter of Exp.3 bandit algorithm to increase gain offset for actions that were not played */ 88 SCIP_EXPORT 89 void SCIPsetBetaExp3( 90 SCIP_BANDIT* exp3, /**< bandit algorithm */ 91 SCIP_Real beta /**< gain offset between 0 and 1 at every observation */ 92 ); 93 94 /** returns probability to play an action */ 95 SCIP_EXPORT 96 SCIP_Real SCIPgetProbabilityExp3( 97 SCIP_BANDIT* exp3, /**< bandit algorithm */ 98 int action /**< index of the requested action */ 99 ); 100 101 /** @}*/ 102 103 #ifdef __cplusplus 104 } 105 #endif 106 107 #endif 108