Java GUI实战:Swing 实现可视化马尔可夫决策(三)

Java GUI实战:Swing 实现可视化马尔可夫决策(三)

寻路

根据问题描述,Agent从起点出发,在每一状态都向最大期望状态值方向行动,并在当前方格留下步数

寻路代码

showRoute.addActionListener(new ActionListener() {
    @Override
    public void actionPerformed(ActionEvent e) {
        stepCount[0]=0;
        for(int i=0;i<25;i++)
            if(rewards[i]==1||rewards[i]==-1)
                grids.get(i).setText(rewards[i]+"");
            else {
                grids.get(i).setText(values[i] + "");
            }
        int k = startIndex[0];
        double gamma = gammaa[0];
        while(rewards[k]!=1){   //not end!
            stepCount[0]++;
            int step = stepCount[0];
            grids.get(k).setText(step+"");
            if(rewards[k]==-1)
                k=startIndex[0];  //dead!
            else{ //not dead!
                double up = 0.0;
                double down = 0.0;
                double left = 0.0;
                double right = 0.0;
                int i = k/5;
                int j = k%5;
                //up
                if(checkBorder(i-1,j-1)&&rewards[k-6]!=2) {
                    up += 0.2 * (gamma*values[k - 6]+rewards[k-6]);
                    left+=0.2*(gamma*values[k-6]+rewards[k-6]);
                }
                else {
                    up += 0.2 * (gamma*values[k]+rewards[k]);
                    left+=0.2*(gamma*values[k]+rewards[k]);
                }
                if(checkBorder(i-1,j)&&rewards[k-5]!=2)
                    up+=0.6*(gamma*values[k-5]+rewards[k-5]);
                else
                    up+=0.6*(gamma*values[k]+rewards[k]);
                if(checkBorder(i-1,j+1)&&rewards[k-4]!=2) {
                    up += 0.2 * (gamma*values[k - 4]+rewards[k-4]);
                    right+=0.2*(gamma*values[k-4]+rewards[k-4]);
                }
                else {
                    up += 0.2 * (gamma*values[k]+rewards[k]);
                    right+=0.2*(gamma*values[k]+rewards[k]);
                }
                //down
                if(checkBorder(i+1,j-1)&&rewards[k+4]!=2) {
                    down += 0.2 * (gamma*values[k + 4]+rewards[k+4]);
                    left+=0.2*(gamma*values[k+4]+rewards[k+4]);
                }
                else {
                    down += 0.2 * (gamma*values[k]+rewards[k]);
                    left+=0.2*(gamma*values[k]+rewards[k]);
                }
                if(checkBorder(i+1,j)&&rewards[k+5]!=2)
                    down+=0.6*(gamma*values[k+5]+rewards[k+5]);
                else
                    down+=0.6*(gamma*values[k]+rewards[k]);
                if(checkBorder(i+1,j+1)&&rewards[k+6]!=2) {
                    down += 0.2 * (gamma*values[k + 6]+rewards[k+6]);
                    right+=0.2*(gamma*values[k+6]+rewards[k+6]);
                }
                else {
                    down += 0.2 * (gamma*values[k]+rewards[k]);
                    right+=0.2*(gamma*values[k]+rewards[k]);
                }
                //left
                if(checkBorder(i,j-1)&&rewards[k-1]!=2)
                    left+=0.6*(gamma*values[k-1]+rewards[k-1]);
                else
                    left+=0.6*(gamma*values[k]+rewards[k]);
                //right
                if(checkBorder(i,j+1)&&rewards[k+1]!=2)
                    right+=0.6*(gamma*values[k+1]+rewards[k+1]);
                else
                    right+=0.6*(gamma*values[k]+rewards[k]);
                double maxReward = maxInFour(up,down,right,left);
                if(maxReward==up){   //go up
                    double seed = Math.random();
                    if(seed<0.2){
                        if(checkBorder(i-1,j-1)&&rewards[k-6]!=2)
                            k=k-6;
                    }else if(seed<0.8){
                        if(checkBorder(i-1,j)&&rewards[k-5]!=2)
                            k=k-5;
                    }else{
                        if(checkBorder(i-1,j+1)&&rewards[k-4]!=2)
                            k=k-4;
                    }
                }
                else if(maxReward==down){  //go down
                    double seed = Math.random();
                    if(seed<0.2){
                        if(checkBorder(i+1,j-1)&&rewards[k+4]!=2)
                            k=k+4;
                    }else if(seed<0.8){
                        if(checkBorder(i+1,j)&&rewards[k+5]!=2)
                            k=k+5;
                    }else{
                        if(checkBorder(i+1,j+1)&&rewards[k+6]!=2)
                            k=k+6;
                    }
                }
                else if(maxReward==left){  //go left
                    double seed = Math.random();
                    if(seed<0.2){
                        if(checkBorder(i-1,j-1)&&rewards[k-6]!=2)
                            k=k-6;
                    }else if(seed<0.8){
                        if(checkBorder(i,j-1)&&rewards[k-1]!=2)
                            k=k-1;
                    }else{
                        if(checkBorder(i+1,j-1)&&rewards[k+4]!=2)
                            k=k+4;
                    }
                }
                else{  //go right
                    double seed = Math.random();
                    if(seed<0.2){
                        if(checkBorder(i-1,j+1)&&rewards[k-4]!=2)
                            k=k-4;
                    }else if(seed<0.8){
                        if(checkBorder(i,j+1)&&rewards[k+1]!=2)
                            k=k+1;
                    }else{
                        if(checkBorder(i+1,j+1)&&rewards[k+6]!=2)
                            k=k+6;
                    }
                }
            }
        }
        stepCountShow.setText(stepCount[0]+1+"");
    }
});

PLAY按钮绑定

start.addActionListener(new ActionListener() {
    @Override
    public void actionPerformed(ActionEvent e) {
        edit.setEnabled(false);
        next.setEnabled(true);
    }
});

整体代码

package com.company;

import javax.swing.*;
import java.awt.*;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.util.ArrayList;


public class Main {


    public static void main(String[] args) {
        JFrame mainWindow = new JFrame("Markov Show");
        mainWindow.setSize(500,700);
        mainWindow.setDefaultCloseOperation(WindowConstants.EXIT_ON_CLOSE);
        mainWindow.setLocationRelativeTo(null);

        GridLayout layout = new GridLayout(8,5);
        JPanel panel = new JPanel(layout);

        ArrayList<JLabel> grids = new ArrayList<>();
        for(int i=0;i<25;i++){
            JLabel label = new JLabel(i+"",SwingConstants.CENTER);
            label.setBorder(BorderFactory.createLineBorder(Color.BLACK));
            label.setBackground(Color.white);
            label.setOpaque(true);
            grids.add(label);
        }
        int[] startIndex = {0};
        grids.get(0).setBackground(Color.BLUE);//start
        grids.get(8).setBackground(Color.YELLOW);//Gold!
        grids.get(15).setBackground(Color.GRAY);
        grids.get(17).setBackground(Color.GRAY);
        grids.get(22).setBackground(Color.GRAY);
        grids.get(24).setBackground(Color.RED);//end
        for(int i=0;i<25;i++){
            panel.add(grids.get(i));
        }
        JButton next = new JButton("NEXT");
        JLabel blank = new JLabel("");
        JLabel nnn = new JLabel("N");
        JLabel equals = new JLabel("=");
        next.setEnabled(false);


        //counters
        int[] count={0};
        JLabel showN = new JLabel(count[0]+"");
        panel.add(next);
        panel.add(blank);
        panel.add(nnn);
        panel.add(equals);
        panel.add(showN);
        int[] stepCount = {0};

        //gamma
        double[] gammaa = {0.25};

        //maze editor
        JButton edit = new JButton("APPLY");
        JLabel blank1 = new JLabel("");
        JTextField editX = new JTextField("enter x");
        JTextField editY = new JTextField("enter y");
        JTextField editAttr = new JTextField("enter attribute");
        JButton start = new JButton("PLAY");
        JButton restart = new JButton("REPLAY");
        JButton showRoute = new JButton("SHOW ROUTE");
        JLabel totalStep = new JLabel("Total steps:");
        JLabel stepCountShow  =new JLabel(stepCount[0]+"");
        panel.add(edit);
        panel.add(blank1);
        panel.add(editX);
        panel.add(editY);
        panel.add(editAttr);
        panel.add(start);
        panel.add(restart);
        panel.add(showRoute);
        panel.add(totalStep);
        panel.add(stepCountShow);


        double[] values = new double[25];
        double[] rewards = new double[25];//奖励值
        for(int i=0;i<25;i++)
            values[i] = 0;
        rewards[8]=0.25;//gold
        rewards[15]=-1;
        rewards[17]=-1;
        rewards[22]=-1;
        rewards[24]=1;//end
        for(int i=0;i<25;i++){
            if(rewards[i]==1||rewards[i]==-1){
                grids.get(i).setText(rewards[i]+"");
            }else {
                grids.get(i).setText(values[i] + "");
            }
        }



        edit.addActionListener(new ActionListener() {
            @Override
            public void actionPerformed(ActionEvent e) {
                int x = Integer.parseInt(editX.getText());
                int y = Integer.parseInt(editY.getText());
                int attr = Integer.parseInt(editAttr.getText());//0 for empty,1 for enter,2 for exit, 3 for gold, 4 for hole, 5 for stone
                switch (attr){
                    case 0:
                        values[x*5+y]=0;
                        rewards[x*5+y]=0;
                        grids.get(x*5+y).setBackground(Color.white);
                        break;
                    case 1:
                        values[x*5+y]=0;
                        rewards[x*5+y]=0;
                        grids.get(x*5+y).setBackground(Color.BLUE);
                        startIndex[0]=x*5+y;                      //there should be only one start!
                        break;
                    case 2:
                        values[x*5+y]=0;
                        rewards[x*5+y]=1;
                        grids.get(x*5+y).setBackground(Color.RED);
                        break;
                    case 3:
                        values[x*5+y]=0;
                        rewards[x*5+y]=0.25;
                        grids.get(x*5+y).setBackground(Color.YELLOW);
                        break;
                    case 4:
                        values[x*5+y]=0;
                        rewards[x*5+y]=-1;
                        grids.get(x*5+y).setBackground(Color.GRAY);
                        break;
                    case 5:
                        values[x*5+y]=2;
                        rewards[x*5+y]=2;
                        grids.get(x*5+y).setBackground(Color.BLACK);
                    default:
                        break;
                }
                for(int i=0;i<25;i++)
                    if(rewards[i]==1||rewards[i]==-1)
                        grids.get(i).setText(rewards[i]+"");
                    else {
                        grids.get(i).setText(values[i] + "");
                    }
            }
        });

        start.addActionListener(new ActionListener() {
            @Override
            public void actionPerformed(ActionEvent e) {
                edit.setEnabled(false);
                next.setEnabled(true);
            }
        });



        next.addActionListener(new ActionListener() {
            @Override
            public void actionPerformed(ActionEvent e) {
                //CalcuNext cal = new CalcuNext();

                nextValues(values,rewards,gammaa[0]);
                for(int i=0;i<25;i++)
                    if(rewards[i]==1||rewards[i]==-1){
                        grids.get(i).setText(rewards[i]+"");
                    }else {
                        grids.get(i).setText(values[i] + "");
                    }
                count[0]++;
                showN.setText(count[0]+"");
            }
        });

        restart.addActionListener(new ActionListener() {
            @Override
            public void actionPerformed(ActionEvent e) {
                for(int i=0;i<25;i++){
                    values[i]=0;
                    rewards[i]=0;
                    grids.get(i).setBackground(Color.white);
                    grids.get(i).setText("0.0");
                }
                count[0]=0;
                stepCount[0]=0;
                showN.setText(count[0]+"");
                stepCountShow.setText(stepCount[0]+"");
                edit.setEnabled(true);
                next.setEnabled(false);
            }
        });

        showRoute.addActionListener(new ActionListener() {
            @Override
            public void actionPerformed(ActionEvent e) {
                stepCount[0]=0;
                for(int i=0;i<25;i++)
                    if(rewards[i]==1||rewards[i]==-1)
                        grids.get(i).setText(rewards[i]+"");
                    else {
                        grids.get(i).setText(values[i] + "");
                    }
                int k = startIndex[0];
                double gamma = gammaa[0];
                while(rewards[k]!=1){   //not end!
                    stepCount[0]++;
                    int step = stepCount[0];
                    grids.get(k).setText(step+"");
                    if(rewards[k]==-1)
                        k=startIndex[0];  //dead!
                    else{ //not dead!
                        double up = 0.0;
                        double down = 0.0;
                        double left = 0.0;
                        double right = 0.0;
                        int i = k/5;
                        int j = k%5;
                        //up
                        if(checkBorder(i-1,j-1)&&rewards[k-6]!=2) {
                            up += 0.2 * (gamma*values[k - 6]+rewards[k-6]);
                            left+=0.2*(gamma*values[k-6]+rewards[k-6]);
                        }
                        else {
                            up += 0.2 * (gamma*values[k]+rewards[k]);
                            left+=0.2*(gamma*values[k]+rewards[k]);
                        }
                        if(checkBorder(i-1,j)&&rewards[k-5]!=2)
                            up+=0.6*(gamma*values[k-5]+rewards[k-5]);
                        else
                            up+=0.6*(gamma*values[k]+rewards[k]);
                        if(checkBorder(i-1,j+1)&&rewards[k-4]!=2) {
                            up += 0.2 * (gamma*values[k - 4]+rewards[k-4]);
                            right+=0.2*(gamma*values[k-4]+rewards[k-4]);
                        }
                        else {
                            up += 0.2 * (gamma*values[k]+rewards[k]);
                            right+=0.2*(gamma*values[k]+rewards[k]);
                        }
                        //down
                        if(checkBorder(i+1,j-1)&&rewards[k+4]!=2) {
                            down += 0.2 * (gamma*values[k + 4]+rewards[k+4]);
                            left+=0.2*(gamma*values[k+4]+rewards[k+4]);
                        }
                        else {
                            down += 0.2 * (gamma*values[k]+rewards[k]);
                            left+=0.2*(gamma*values[k]+rewards[k]);
                        }
                        if(checkBorder(i+1,j)&&rewards[k+5]!=2)
                            down+=0.6*(gamma*values[k+5]+rewards[k+5]);
                        else
                            down+=0.6*(gamma*values[k]+rewards[k]);
                        if(checkBorder(i+1,j+1)&&rewards[k+6]!=2) {
                            down += 0.2 * (gamma*values[k + 6]+rewards[k+6]);
                            right+=0.2*(gamma*values[k+6]+rewards[k+6]);
                        }
                        else {
                            down += 0.2 * (gamma*values[k]+rewards[k]);
                            right+=0.2*(gamma*values[k]+rewards[k]);
                        }
                        //left
                        if(checkBorder(i,j-1)&&rewards[k-1]!=2)
                            left+=0.6*(gamma*values[k-1]+rewards[k-1]);
                        else
                            left+=0.6*(gamma*values[k]+rewards[k]);
                        //right
                        if(checkBorder(i,j+1)&&rewards[k+1]!=2)
                            right+=0.6*(gamma*values[k+1]+rewards[k+1]);
                        else
                            right+=0.6*(gamma*values[k]+rewards[k]);
                        double maxReward = maxInFour(up,down,right,left);
                        if(maxReward==up){   //go up
                            double seed = Math.random();
                            if(seed<0.2){
                                if(checkBorder(i-1,j-1)&&rewards[k-6]!=2)
                                    k=k-6;
                            }else if(seed<0.8){
                                if(checkBorder(i-1,j)&&rewards[k-5]!=2)
                                    k=k-5;
                            }else{
                                if(checkBorder(i-1,j+1)&&rewards[k-4]!=2)
                                    k=k-4;
                            }
                        }
                        else if(maxReward==down){  //go down
                            double seed = Math.random();
                            if(seed<0.2){
                                if(checkBorder(i+1,j-1)&&rewards[k+4]!=2)
                                    k=k+4;
                            }else if(seed<0.8){
                                if(checkBorder(i+1,j)&&rewards[k+5]!=2)
                                    k=k+5;
                            }else{
                                if(checkBorder(i+1,j+1)&&rewards[k+6]!=2)
                                    k=k+6;
                            }
                        }
                        else if(maxReward==left){  //go left
                            double seed = Math.random();
                            if(seed<0.2){
                                if(checkBorder(i-1,j-1)&&rewards[k-6]!=2)
                                    k=k-6;
                            }else if(seed<0.8){
                                if(checkBorder(i,j-1)&&rewards[k-1]!=2)
                                    k=k-1;
                            }else{
                                if(checkBorder(i+1,j-1)&&rewards[k+4]!=2)
                                    k=k+4;
                            }
                        }
                        else{  //go right
                            double seed = Math.random();
                            if(seed<0.2){
                                if(checkBorder(i-1,j+1)&&rewards[k-4]!=2)
                                    k=k-4;
                            }else if(seed<0.8){
                                if(checkBorder(i,j+1)&&rewards[k+1]!=2)
                                    k=k+1;
                            }else{
                                if(checkBorder(i+1,j+1)&&rewards[k+6]!=2)
                                    k=k+6;
                            }
                        }
                    }
                }
                stepCountShow.setText(stepCount[0]+1+"");
            }
        });

        mainWindow.setContentPane(panel);
        mainWindow.setVisible(true);
    }

   public static void nextValues(double[] values,double[] rewards,double gamma){
        double[] newVal = new double[25];
        for(int i=0;i<5;i++){
            for(int j=0;j<5;j++){
                int k = i*5+j;
                if(rewards[k]==1||rewards[k]==-1||rewards[k]==2)
                    newVal[k]=0;
                else{
                    double up=0;
                    double down=0;
                    double left = 0;
                    double right = 0;
                    //up
                    if(checkBorder(i-1,j-1)&&rewards[k-6]!=2) {
                        up += 0.2 * (gamma*values[k - 6]+rewards[k-6]);
                        left+=0.2*(gamma*values[k-6]+rewards[k-6]);
                    }
                    else {
                        up += 0.2 * (gamma*values[k]+rewards[k]);
                        left+=0.2*(gamma*values[k]+rewards[k]);
                    }
                    if(checkBorder(i-1,j)&&rewards[k-5]!=2)
                        up+=0.6*(gamma*values[k-5]+rewards[k-5]);
                    else
                        up+=0.6*(gamma*values[k]+rewards[k]);
                    if(checkBorder(i-1,j+1)&&rewards[k-4]!=2) {
                        up += 0.2 * (gamma*values[k - 4]+rewards[k-4]);
                        right+=0.2*(gamma*values[k-4]+rewards[k-4]);
                    }
                    else {
                        up += 0.2 * (gamma*values[k]+rewards[k]);
                        right+=0.2*(gamma*values[k]+rewards[k]);
                    }
                    //down
                    if(checkBorder(i+1,j-1)&&rewards[k+4]!=2) {
                        down += 0.2 * (gamma*values[k + 4]+rewards[k+4]);
                        left+=0.2*(gamma*values[k+4]+rewards[k+4]);
                    }
                    else {
                        down += 0.2 * (gamma*values[k]+rewards[k]);
                        left+=0.2*(gamma*values[k]+rewards[k]);
                    }
                    if(checkBorder(i+1,j)&&rewards[k+5]!=2)
                        down+=0.6*(gamma*values[k+5]+rewards[k+5]);
                    else
                        down+=0.6*(gamma*values[k]+rewards[k]);
                    if(checkBorder(i+1,j+1)&&rewards[k+6]!=2) {
                        down += 0.2 * (gamma*values[k + 6]+rewards[k+6]);
                        right+=0.2*(gamma*values[k+6]+rewards[k+6]);
                    }
                    else {
                        down += 0.2 * (gamma*values[k]+rewards[k]);
                        right+=0.2*(gamma*values[k]+rewards[k]);
                    }
                    //left
                    if(checkBorder(i,j-1)&&rewards[k-1]!=2)
                        left+=0.6*(gamma*values[k-1]+rewards[k-1]);
                    else
                        left+=0.6*(gamma*values[k]+rewards[k]);
                    //right
                    if(checkBorder(i,j+1)&&rewards[k+1]!=2)
                        right+=0.6*(gamma*values[k+1]+rewards[k+1]);
                    else
                        right+=0.6*(gamma*values[k]+rewards[k]);
                    newVal[k]=maxInFour(up,down,left,right);
                }
            }
        }
       System.arraycopy(newVal, 0, values, 0, 25);
   }



    public static boolean checkBorder(int i,int j){
        return 0<=i&&i<=4&&0<=j&&j<=4;
    }

    public static double maxInFour(double d1,double d2,double d3,double d4){
        double result = d1;
        if(result<d2)
            result=d2;
        if(result<d3)
            result = d3;
        if(result<d4)
            result = d4;
        return  result;
    }

}

一main到底,也不大,400行。

发布了113 篇原创文章 · 获赞 73 · 访问量 19万+

猜你喜欢

转载自blog.csdn.net/swy_swy_swy/article/details/105272847