|
马上注册,结交更多好友,享用更多功能,让你轻松玩转社区。
您需要 登录 才可以下载或查看,没有账号?我要加入
x
下面的强化学习 代码
epsilon_greedy选择怎么改成 Boltzmann选择?- #include <stdio.h>
- #include <time.h>
- #include <stdlib.h>
- #include <math.h>
- int move(int a, int &x, int &y, int x_size);
- int xy2s(int x, int y, int x_size);
- int select_action(int s, int num_a, double** Qtable);
- double max_Qval(int s, int num_a, double** Qtable);
- int epsilon_greedy(int epsilon,int s, int num_a, double** Qtable);
- int main()
- {
- int x_size;//x軸方向,迷路大小(x_size=10 时,x=0~9)
- int y_size;
- double alpha, gamma;
- int x, y, x_init, y_init;
- int **maze;
- int num_step;//Q值更新次数
- int num_trial;//运行回数
- int i,j,k;
- int a,s,sd;
- int num_a;
- int num_s;
- double **Qtable;
- int reward;
- double Qmax;
- int epsilon;
-
- //初期值
- alpha=0.5;
- gamma=0.9;
- epsilon=10;
- x_size=10;
- y_size=9;
- x_init=1;
- y_init=1;
- num_step=100;
- num_trial=500;
- num_a=4;
- num_s=x_size*y_size;
- int *suc_sum;
- int *suc_sum2;
-
- suc_sum= new int [(int)(num_trial/100)];
- suc_sum2= new int [(int)(num_trial/100)];
- //初始化
- srand( (unsigned)time( NULL ) );
- //Q-table
- Qtable=new double*[num_s];
- for(i=0;i<num_s;i++){
- Qtable[i]=new double[num_a];
- }
-
- //Q-table初始化
- for(i=0;i<num_s;i++){
- for(j=0;j<num_a;j++){
- Qtable[i][j]=0.1;
- }
- }
-
- //迷路
- maze=new int*[x_size];
- for(i=0;i<x_size;i++){
- maze[i]=new int[y_size];
- }
-
- //迷路初始化(设置迷路墙壁)
- for(i=0;i<x_size;i++){
- for(j=0;j<y_size;j++){
- if(i==0 || j==0 || i==(x_size-1) || j==(y_size-1)){
- maze[i][j]=-1;
- for(k=0;k<num_a;k++){
- Qtable[xy2s(i,j,x_size)][k]=0.0;
- }
- }
- else{
- maze[i][j]=0;
- }
- }
- }
-
- //设置墙壁
- maze[1][2]=-1;
- maze[2][2]=-1;
- maze[3][2]=-1;
- maze[5][4]=-1;
- maze[6][4]=-1;
- maze[6][3]=-1;
- maze[7][3]=-1;
- maze[8][3]=-1;
- maze[2][6]=-1;
- maze[2][7]=-1;
- maze[3][6]=-1;
- maze[6][7]=-1;
- for(i=0;i<x_size;i++){
- for(j=0;j<y_size;j++){
- if (maze[i][j]==-1){
- for(k=0;k<num_a;k++){
- Qtable[xy2s(i,j,x_size)][k]=0.0;
- }
- }
- }
- }
- //设置maze
- maze[8][6]=10;
-
- for(i=0;i<x_size;i++){
- for(j=0;j<y_size;j++){
- printf("%3d",maze[i][j]);
- }
- printf("\n");
- }
- //初期设定
- x=x_init;
- y=y_init;
- s=xy2s(x,y,x_size);
- for(i=0;i<num_trial/100;i++){
- suc_sum[i]=0;
- }
- for(i=0;i<num_trial/100;i++){
- suc_sum2[i]=0;
- }
-
- //开始学习
- for(i=0;i<num_trial;i++){
- printf("trial=%d\n",i);
- for(j=0;j<num_step;j++){
- [size=3][color=red][b] a=epsilon_greedy(epsilon,s,num_a,Qtable);//<--改为 Boltzmann选择[/b][/color][/size]
- sd = move(a,x,y,x_size);
- reward=maze[x][y];
- Qmax=max_Qval(sd,num_a,Qtable);
- Qtable[s][a]=(1 - alpha) * Qtable[s][a] + alpha * ((double)reward + gamma * Qmax);
- if(reward<0){
- //失败
- x=x_init;
- y=y_init;
- s=xy2s(x,y,x_size);
- printf("失败\n");
- break;
- }
- else if(reward>0){
- //成功
- x=x_init;
- y=y_init;
- s=xy2s(x,y,x_size);
- printf("成功\n");
- suc_sum[i/100]++;
- break;
- }
- else{
- //继续
- s=sd;
- }
- }
-
- //验证
- if (i%100==0){
- for(k=0;k<100;k++){
- //初期设定
- x=x_init;
- y=y_init;
- s=xy2s(x,y,x_size);
- for(j=0;j<num_step;j++){
- a=select_action(s,num_a,Qtable);
- sd = move(a,x,y,x_size);
- reward=maze[x][y];
- if(reward<0){
- //失败
- x=x_init;
- y=y_init;
- s=xy2s(x,y,x_size);
- //printf("失败\n");
- break;
- }
- else if(reward>0){
- //成功
- x=x_init;
- y=y_init;
- s=xy2s(x,y,x_size);
- suc_sum2[i/100]++;
- //printf("成功\n");
- break;
- }
- else{
-
- s=sd;
- }
- }
- }
- }
- }
-
- //追加
- //方向
- for(x=0;x<x_size;x++){
- for(y=0;y<y_size;y++){
- s=xy2s(x,y,x_size);
- Qmax=max_Qval(s,num_a,Qtable);
- if(Qmax==0){
- printf("%3d",maze[x][y]);
- }
- else{
- a=select_action(s,num_a,Qtable);
- if(a==0){
- printf(" →");
- }
- else if(a==1){
- printf(" ↓");
- }
- else if(a==2){
- printf(" ←");
- }
- else{
- printf(" ↑");
- }
- }
- }
- printf("\n");
- }
- for(i=0;i<num_trial/100;i++){
- printf("%d-%d epsilon_greedy:%d/100, select_action:%d/100\n",i*100,i*100+99,suc_sum[i],suc_sum2[i]);
- }
- //追加到这里
- for(i=0;i<num_s;i++){
- delete[] Qtable[i];
- }
- delete[] Qtable;
- for(i=0;i<x_size;i++){
- delete[] maze[i];
- }
- delete[] maze;
- return 0;
- }
- int move(int a, int &x, int &y, int x_size){
- if( a == 0){
- y = y + 1;
- }
- else if( a == 1 ){
- x = x + 1;
- }
- else if( a == 2 ){
- y = y - 1;
- }
- else{
- x = x - 1;
- }
- int sd;
- sd = xy2s(x,y,x_size);
-
- return sd;
- }
- int xy2s(int x, int y, int x_size){
- int s;
- s = x + y * x_size;
- return s;
- }
- int select_action(int s, int num_a, double** Qtable){
- double max;
- int i=0;
- int* i_max = new int[num_a];
- int num_i_max=1;
- int a;
- i_max[0]=0;
- max=Qtable[s][0];
- for(i=1;i<num_a;i++){
- if (Qtable[s][i]>max){
- max=Qtable[s][i];
- num_i_max=1;
- i_max[0]=i;
- }
- else if(Qtable[s][i]==max){
- num_i_max++;
- i_max[num_i_max-1]=i;
- }
- }
-
-
- a= i_max[rand()%num_i_max];
- return a;
- }
- double max_Qval(int s, int num_a, double** Qtable){
- double max;
- int i=0;
-
- max=Qtable[s][0];
- for(i=1;i<num_a;i++){
- if (Qtable[s][i]>max){
- max=Qtable[s][i];
- }
- }
- return max;
- }
- int epsilon_greedy(int epsilon, int s, int num_a, double** Qtable){
- int a;
- if(epsilon > rand()%100){
-
- a=rand()%num_a;
- //printf("rand选择行动\n");
- }
- else{
- //选择最大Q值的行动
- a=select_action(s,num_a,Qtable);
- }
- return a;
- }
复制代码
[ 本帖最后由 风花雪月 于 2008-1-18 10:39 编辑 ] |
-
-
QL2.cpp
5.46 KB, 下载次数: 3
Boltzmann选择
|