1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 2 /* */ 3 /* This file is part of the program and library */ 4 /* SCIP --- Solving Constraint Integer Programs */ 5 /* */ 6 /* Copyright (c) 2002-2023 Zuse Institute Berlin (ZIB) */ 7 /* */ 8 /* Licensed under the Apache License, Version 2.0 (the "License"); */ 9 /* you may not use this file except in compliance with the License. */ 10 /* You may obtain a copy of the License at */ 11 /* */ 12 /* http://www.apache.org/licenses/LICENSE-2.0 */ 13 /* */ 14 /* Unless required by applicable law or agreed to in writing, software */ 15 /* distributed under the License is distributed on an "AS IS" BASIS, */ 16 /* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ 17 /* See the License for the specific language governing permissions and */ 18 /* limitations under the License. */ 19 /* */ 20 /* You should have received a copy of the Apache-2.0 license */ 21 /* along with SCIP; see the file LICENSE. If not visit scipopt.org. */ 22 /* */ 23 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 24 25 /**@file pub_bandit_ucb.h 26 * @ingroup PublicBanditMethods 27 * @brief public methods for UCB bandit selection 28 * @author Gregor Hendel 29 */ 30 31 /*---+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+----0----+----1----+----2*/ 32 33 #ifndef SRC_SCIP_PUB_BANDIT_UCB_H_ 34 #define SRC_SCIP_PUB_BANDIT_UCB_H_ 35 36 #include "scip/def.h" 37 #include "scip/type_bandit.h" 38 #include "scip/type_retcode.h" 39 #include "scip/type_scip.h" 40 41 #ifdef __cplusplus 42 extern "C" { 43 #endif 44 45 46 /**@addtogroup PublicBanditMethods 47 * 48 * ## Upper Confidence Bounds (UCB) 49 * 50 * UCB (Upper confidence bounds) is a deterministic 51 * selection algorithm for the multi-armed bandit problem. 52 * In every iteration, UCB selects the action that maximizes 53 * a tradeoff between its performance in the past 54 * and a variance term. 55 * The influence of the variance (confidence width) can be 56 * controlled by the parameter \f$ \alpha \f$. 57 * 58 * @{ 59 */ 60 61 62 /** create and reset UCB bandit algorithm */ 63 SCIP_EXPORT 64 SCIP_RETCODE SCIPcreateBanditUcb( 65 SCIP* scip, /**< SCIP data structure */ 66 SCIP_BANDIT** ucb, /**< pointer to store bandit algorithm */ 67 SCIP_Real* priorities, /**< nonnegative priorities for each action, or NULL if not needed */ 68 SCIP_Real alpha, /**< parameter to increase confidence width */ 69 int nactions, /**< the positive number of actions for this bandit algorithm */ 70 unsigned int initseed /**< initial random number seed */ 71 ); 72 73 /** returns the upper confidence bound of a selected action */ 74 SCIP_EXPORT 75 SCIP_Real SCIPgetConfidenceBoundUcb( 76 SCIP_BANDIT* ucb, /**< UCB bandit algorithm */ 77 int action /**< index of the queried action */ 78 ); 79 80 /** return start permutation of the UCB bandit algorithm */ 81 SCIP_EXPORT 82 int* SCIPgetStartPermutationUcb( 83 SCIP_BANDIT* ucb /**< UCB bandit algorithm */ 84 ); 85 86 /** @}*/ 87 88 89 #ifdef __cplusplus 90 } 91 #endif 92 93 #endif 94