Train_cpo.py saves data to be used for training

307fd299 · Bryson Howell · f8a5256a · 307fd299 · 307fd299 · 307fd299
Commit 307fd299 authored May 02, 2024 by Bryson Howell
10 changed files
--- a/deeprl_data/paths.png
+++ b/deeprl_data/paths.png
--- a/deeprl_data/ring_model.png
+++ b/deeprl_data/ring_model.png
--- a/deeprl_data/ring_prob.npy
+++ b/deeprl_data/ring_prob.npy
--- a/deeprl_data/robot0_waypoints.npy
+++ b/deeprl_data/robot0_waypoints.npy
--- a/deeprl_data/robot1_waypoints.npy
+++ b/deeprl_data/robot1_waypoints.npy
--- a/deeprl_data/robot2_waypoints.npy
+++ b/deeprl_data/robot2_waypoints.npy
--- a/mrmh_model/montecarlo.py
+++ b/mrmh_model/montecarlo.py
@@ -258,6 +258,7 @@ class MonteCarlo(params.Default):
        show = False
        if(show):
            print('Environment shape = %d x %d' % (self._x_shape,self._y_shape))
+            print(np.shape(self.p))
            plt.imshow(self.p)
            plt.title('Selected heatmap')
            plt.show()

--- a/sargym/sar_gym.py
+++ b/sargym/sar_gym.py
+import gym
+from gym import spaces
+import pygame
+import numpy as np
+class GridWorldSAR(gym.Env):
+    metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 4}
+    def __init__(self, render_mode=None, size=5):
+        self.size = size  # The size of the square grid
+        self.window_size = 512  # The size of the PyGame window
+        # Observations are dictionaries with the agent's and the target's location.
+        # Each location is encoded as an element of {0, ..., `size`}^2, i.e. MultiDiscrete([size, size]).
+        self.observation_space = spaces.Dict(
+            {
+                "agent": spaces.Box(0, size - 1, shape=(2,), dtype=int),
+                "target": spaces.Box(0, size - 1, shape=(2,), dtype=int),
+            }
+        )
+        #Need a continuous action space for path planning...?
+        self.action_space = spaces.Discrete(4)
+        assert render_mode is None or render_mode in self.metadata["render_modes"]
+        self.render_mode = render_mode
+        """
+        If human-rendering is used, `self.window` will be a reference
+        to the window that we draw to. `self.clock` will be a clock that is used
+        to ensure that the environment is rendered at the correct framerate in
+        human-mode. They will remain `None` until human-mode is used for the
+        first time.
+        """
+        self.window = None
+        self.clock = None
--- a/test_trustplanner.py
+++ b/test_trustplanner.py
@@ -108,6 +108,15 @@ def objective_printer(rgp_object = None, comp_time = 0.0, iteration = 0, folder
    return save_dict
+#Save paths from rgp as numpy array
+def gym_paths(rgp):
+    #New format: x, y, z, robot,
+    #np.save(rgp.)
+    return
 def main(iteration = 0, parameters = -1):
@@ -135,7 +144,22 @@ def main(iteration = 0, parameters = -1):
    rgp.optimize_risk_cost_grad(_show_detail=True)
-    robot_paths_local = waypoint_maker.write_file(rgp_object = rgp, terrain_class = mc.terrain, filename = 'waypoints.json') # write waypoints to file
+    #Printing values to test how paths look
+    print("Path lengths = ")
+    #print(rgp.robot_path_len) #[75, 75, 75]
+    print("Robot fixed paths shape = (6,3)")
+    print(np.shape(rgp.robot_fixed_paths))
+    print("Robot paths shape = (225,3)")
+    print(np.shape(rgp.robot_paths))
+    print("Min Risk Paths shape = (225,3)")
+    print(np.shape(rgp.min_risk_paths))
+    print(rgp.min_risk_paths[0])
+    #Save path info for RL environment
+    gym_paths(rgp)
+    #Save the waypoints from the optimized paths
+    robot_paths_local = waypoint_maker.write_file(rgp_object = rgp, terrain_class = mc.terrain, filename = 'waypointsTest.json') # write waypoints to file
    # write serialized files for plotter to run
    # with open("scouter/bozoo_save_{}.pkl".format(params['anchor_point']),'wb') as f:
@@ -193,8 +217,8 @@ if __name__ == "__main__":
        start_time = time.time()
        params = ({
-            'lp_model': 'stripes',    #custom, ring, trust, stripes
+            'lp_model': 'custom',    #custom, ring, trust, stripes
-            'opt_iterations': 1000,
+            'opt_iterations': 100,
            'path_style': 'basic',
            'stats_name': 'kentland',
            'anchor_point': [37.197730, -80.585233], # kentland
@@ -203,8 +227,9 @@ if __name__ == "__main__":
            'num_humans' : h_max,
            'lp_filename': kentland_heatmap,
            'lin_feat_filename': kentland_linfeat,
+            'save_data': True
        })
-        params['save_folder'] = 'trust_{}_n{}_s{}_{}'.format(
+        params['save_folder'] = 'trust-planner/{}_n{}_s{}_{}'.format(
            params['stats_name'],n_max,s_max,params['path_style'])
        params = Default(params).params

--- a/train_cpo.py
+++ b/train_cpo.py
@@ -8,6 +8,7 @@ import waypoint_maker
 import matplotlib.pyplot as plt
 from scouter.terrain_viewer import plot_all
 from matplotlib.backends.backend_pdf import PdfPages
+from scipy import ndimage
 from mrmh_model import montecarlo as MC
 import numpy as np
@@ -29,7 +30,7 @@ kentland_linfeat = '../ags_grabber/matlab_data/BW_LFandInac_Zelev_kentland.mat'
 hmpark_linfeat = '../ags_grabber/matlab_data/BW_LFandInac_Zelev_hmpark.mat'
-def main():
+def create_data():
    #Set up environment parameters
    n_max = 3   #Number of robots
@@ -39,7 +40,7 @@ def main():
    #Dictionary of parameters
    params = ({
-        'lp_model': 'stripes',    #custom, ring, trust, stripes
+        'lp_model': 'ring',    #custom, ring, trust, stripes
        'opt_iterations': 1000,
        'path_style': 'basic',
        'stats_name': 'kentland',
@@ -49,8 +50,10 @@ def main():
        'num_humans' : h_max,
        'lp_filename': kentland_heatmap,
        'lin_feat_filename': kentland_linfeat,
+        'res': 25,
+        'save_data': True
    })
-    params['save_folder'] = 'deeprl_{}_n{}_s{}_{}'.format(
+    params['save_folder'] = 'deeprl/{}_n{}_s{}_{}'.format(
        params['stats_name'],n_max,s_max,params['path_style'])
    params = Default(params).params
@@ -59,10 +62,97 @@ def main():
    mc = MC.MonteCarlo(params=params) # calls terrain builder
    mc.run_experiment()
-    #Make Gym Environment
+    #Put Ring search values and paths into one matrix for the gym Environment (have to fix for full res)
+    gridsize_x = mc._x_shape*params['res']
+    gridsize_y = mc._y_shape*params['res']
+    sar_fullgrid = np.zeros((gridsize_x,gridsize_y),dtype=float)
+    [x_crds,y_crds] = np.meshgrid(np.linspace(params['xlims'][0], params['xlims'][1], gridsize_x),
+                      np.linspace(params['ylims'][0], params['ylims'][1], gridsize_y))
+    dist_mat = np.sqrt(x_crds**2 + y_crds**2)
+    #Find cutoff points for ring model
+    maxdist = np.max(dist_mat)
+    mindist = np.min(dist_mat)
+    adjust = 0.10*gridsize_x    #center ring is 10% of total distance
+    cutoff = np.linspace(mindist+adjust, maxdist, 4)
+    print("New Ring boundaries = ")
+    print(cutoff)
+    #params['ring_mobi'] = 25*[0.6e3, 1.8e3, 3.2e3, 9.9e3]
+    params['ring_mobi'] = cutoff
+    sar_fullgrid[dist_mat <= params['ring_mobi'][0]] += 0.25 # center ring (95%)
+    sar_fullgrid[dist_mat <= params['ring_mobi'][1]] += 0.25 # middle ring (70%)
+    sar_fullgrid[dist_mat <= params['ring_mobi'][2]] += 0.25 # outter ring (45%)
+    sar_fullgrid[dist_mat <= params['ring_mobi'][3]] += 0.20 # containment ring (20%)
+    sar_fullgrid = ndimage.gaussian_filter(sar_fullgrid, sigma=64) # to make betta heatmap
+    # normalize heatmap to sum to 1
+    sar_fullgrid = sar_fullgrid - np.min(sar_fullgrid)
+    sar_fullgrid = sar_fullgrid/np.max(sar_fullgrid)
+    #For CPO 'penalty' will be 1-ring_prob for a grid.
+    #Show heatmap and save numpy array
+    saveprobs = False
+    if(saveprobs):
+        plt.imshow(sar_fullgrid)
+        plt.title('Selected heatmap')
+        plt.show()
+        np.save('./deeprl_data/ring_prob.npy',sar_fullgrid)
+    stime = time.time()
+    planner = planning.Planning(params, on_terrain=mc.terrain, mode='TOTALDIST') # also calls terrain builder...
+    rgp = robotgp.RobotGP(mc, planner, _stime = stime, parameters = params)
+    rgp.collect_trainData() # sets out paths for each robot
+    rgp.optimize_risk_cost_grad(_show_detail=True)
+    #Show optimized waypoints on matrix
+    sar_paths = np.zeros((gridsize_x,gridsize_y),dtype=float)
+    robot_paths = []
+    start_idx = 0
+    robot_idx = 0
+    for path_len in rgp.robot_path_len:
+        end_idx = start_idx + path_len
+        rpath = rgp.min_risk_paths[start_idx:end_idx]
+        rpath = np.int32(np.rint(rpath))
+        print('Path shape')
+        print(np.shape(rpath))
+        waypoints = np.zeros((np.shape(rpath)[0],2),dtype=np.int32())
+        #Coordinates are from (-xlim, xlim) and (-ylim, ylim). Transform so 0,0 is corner like grid
+        for i in range(0,np.shape(rpath)[0]):
+            x = rpath[i][0]
+            y = rpath[i][1]
+            sar_paths[x+599][y+599] += 1
+            waypoints[i][0] = x + params['xlims'][1]
+            waypoints[i][1] = y + params['ylims'][1]
+        #Plot path to make sure we're right
+        plt.scatter(waypoints[:,0], waypoints[:,1])
+        #save numpy array
+        title = './deeprl_data/robot' + str(robot_idx) + '_waypoints.npy'
+        np.save(title,waypoints)
+        #Indices
+        start_idx = end_idx
+        robot_idx += 1
+    #Show waypoints (should be overhead view)
+    plt.show()
+    plt.imshow(sar_paths)
+    plt.title('Optimized Path Waypoints')
+    plt.show()
+def main():
+    create_data()
+    #Load robot paths from file
+    #Make Gym Environment
+    return