1    	/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2    	/*                                                                           */
3    	/*                  This file is part of the program and library             */
4    	/*         SCIP --- Solving Constraint Integer Programs                      */
5    	/*                                                                           */
6    	/*  Copyright (c) 2002-2023 Zuse Institute Berlin (ZIB)                      */
7    	/*                                                                           */
8    	/*  Licensed under the Apache License, Version 2.0 (the "License");          */
9    	/*  you may not use this file except in compliance with the License.         */
10   	/*  You may obtain a copy of the License at                                  */
11   	/*                                                                           */
12   	/*      http://www.apache.org/licenses/LICENSE-2.0                           */
13   	/*                                                                           */
14   	/*  Unless required by applicable law or agreed to in writing, software      */
15   	/*  distributed under the License is distributed on an "AS IS" BASIS,        */
16   	/*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */
17   	/*  See the License for the specific language governing permissions and      */
18   	/*  limitations under the License.                                           */
19   	/*                                                                           */
20   	/*  You should have received a copy of the Apache-2.0 license                */
21   	/*  along with SCIP; see the file LICENSE. If not visit scipopt.org.         */
22   	/*                                                                           */
23   	/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
24   	
25   	/**@file   pub_bandit_exp3.h
26   	 * @ingroup PublicBanditMethods
27   	 * @brief  public methods for Exp.3
28   	 * @author Gregor Hendel
29   	 */
30   	
31   	/*---+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+----0----+----1----+----2*/
32   	
33   	#ifndef SRC_SCIP_PUB_BANDIT_EXP3_H_
34   	#define SRC_SCIP_PUB_BANDIT_EXP3_H_
35   	
36   	#include "scip/def.h"
37   	#include "scip/type_bandit.h"
38   	#include "scip/type_retcode.h"
39   	#include "scip/type_scip.h"
40   	
41   	#ifdef __cplusplus
42   	extern "C" {
43   	#endif
44   	
45   	/**@addtogroup PublicBanditMethods
46   	 *
47   	 * ## Exp.3
48   	 *
49   	 * Exp.3 is a randomized selection method for the multi-armed bandit problem
50   	 *
51   	 * Exp3 maintains a probability distribution
52   	 * according to which an action is drawn
53   	 * in every iteration.
54   	 * The probability distribution is a mixture between
55   	 * a uniform distribution and a softmax distribution
56   	 * based on the cumulative rewards of the actions.
57   	 * The weight of the uniform distribution in the mixture
58   	 * is controlled by the parameter \f$ \gamma \f$, ie.,
59   	 * setting \f$ \gamma = 1\f$ uses a uniform distribution
60   	 * in every selection step.
61   	 * The cumulative reward for the actions can be
62   	 * fine-tuned by adding a general bias for all actions.
63   	 * The bias is given by the parameter \f$ \beta \f$.
64   	 *
65   	 * @{
66   	 */
67   	
68   	/** creates and resets an Exp.3 bandit algorithm using \p scip pointer */
69   	SCIP_EXPORT
70   	SCIP_RETCODE SCIPcreateBanditExp3(
71   	   SCIP*                 scip,               /**< SCIP data structure */
72   	   SCIP_BANDIT**         exp3,               /**< pointer to store bandit algorithm */
73   	   SCIP_Real*            priorities,         /**< nonnegative priorities for each action, or NULL if not needed */
74   	   SCIP_Real             gammaparam,         /**< weight between uniform (gamma ~ 1) and weight driven (gamma ~ 0) probability distribution */
75   	   SCIP_Real             beta,               /**< gain offset between 0 and 1 at every observation */
76   	   int                   nactions,           /**< the positive number of actions for this bandit algorithm */
77   	   unsigned int          initseed            /**< initial seed for random number generation */
78   	   );
79   	
80   	/** set gamma parameter of Exp.3 bandit algorithm to increase weight of uniform distribution */
81   	SCIP_EXPORT
82   	void SCIPsetGammaExp3(
83   	   SCIP_BANDIT*          exp3,               /**< bandit algorithm */
84   	   SCIP_Real             gammaparam          /**< weight between uniform (gamma ~ 1) and weight driven (gamma ~ 0) probability distribution */
85   	   );
86   	
87   	/** set beta parameter of Exp.3 bandit algorithm to increase gain offset for actions that were not played */
88   	SCIP_EXPORT
89   	void SCIPsetBetaExp3(
90   	   SCIP_BANDIT*          exp3,               /**< bandit algorithm */
91   	   SCIP_Real             beta                /**< gain offset between 0 and 1 at every observation */
92   	   );
93   	
94   	/** returns probability to play an action */
95   	SCIP_EXPORT
96   	SCIP_Real SCIPgetProbabilityExp3(
97   	   SCIP_BANDIT*          exp3,               /**< bandit algorithm */
98   	   int                   action              /**< index of the requested action */
99   	   );
100  	
101  	/** @}*/
102  	
103  	#ifdef __cplusplus
104  	}
105  	#endif
106  	
107  	#endif
108