Adding comments, trying to fix memory error with backward() call

1af694d9 · Bryson Howell · 94053c2f · 1af694d9 · 1af694d9 · 1af694d9
Commit 1af694d9 authored Feb 24, 2024 by Bryson Howell
26 changed files
--- a/__pycache__/arcgis_terrain.cpython-310.pyc
+++ b/__pycache__/arcgis_terrain.cpython-310.pyc
--- a/__pycache__/plotting_utils.cpython-310.pyc
+++ b/__pycache__/plotting_utils.cpython-310.pyc
--- a/__pycache__/waypoint_maker.cpython-310.pyc
+++ b/__pycache__/waypoint_maker.cpython-310.pyc
--- a/arcgis_terrain.py
+++ b/arcgis_terrain.py
@@ -106,10 +106,7 @@ def centroid_calc(points_meters):
 def get_terrain_map(lat_lon = [0,0], sample_dist = 10, extent = 100, heading = 0, show_plot = False, verbosity = True):
-    # gis = GIS("pro")
    # gis = GIS(url="http://virginiatech.maps.arcgis.com", client_id="rluxzSWjZS6TfeXs", username="hlarkin3_virginiatech", password="arcgisheintzman97#26640", verify_cert=False)
-    #gis = GIS(username="larkinheintzman",password="Meepp97#26640")
-    #gis = GIS()
    #gis = GIS(url="https://virginiatech.maps.arcgis.com", username="blhowell_virginiatech", password="Cc4f2LzUBC66q_P")
    gis = GIS(api_key="AAPKe7cb1ab4f2ba44748cf53ac4f30d0caavu5t_-uMdWP8SRjtoea3s66-cRyFZmMTNx4rqy2w5sjwegU_tbJyBesd0LCMmdtV")
    if verbosity:

--- a/bezier_interp/__pycache__/__init__.cpython-310.pyc
+++ b/bezier_interp/__pycache__/__init__.cpython-310.pyc
--- a/bezier_interp/__pycache__/bezier.cpython-310.pyc
+++ b/bezier_interp/__pycache__/bezier.cpython-310.pyc
--- a/bezier_interp/__pycache__/fitCurves.cpython-310.pyc
+++ b/bezier_interp/__pycache__/fitCurves.cpython-310.pyc
--- a/gp/__pycache__/beziergp.cpython-310.pyc
+++ b/gp/__pycache__/beziergp.cpython-310.pyc
--- a/gp/__pycache__/robotgp.cpython-310.pyc
+++ b/gp/__pycache__/robotgp.cpython-310.pyc
--- a/gp/robotgp.py
+++ b/gp/robotgp.py
@@ -83,7 +83,10 @@ class RobotGP(torch.nn.Module):
        self._stime = _stime if _stime is not None else time.time()
+    #!!Memory problem is here @_@. Hook is triggered by backward(forward too?)
+    #What is this hook doing though. Why do we need it
    def _hook(self, tensor, _min=-1e10, _max=1e10):
+        print(tensor)
        tensor.requires_grad_(True)
        #tensor.retain_grad()
        tensor.register_hook(lambda grad: grad.clamp_(min=_min, max=_max))
@@ -360,6 +363,8 @@ class RobotGP(torch.nn.Module):
        self.scaled_path_length_cost = self.lengthcost_scaling * self.path_len_cost
        self.risk_cost = self.scaled_risk_cost + self.scaled_path_length_cost
        self.risk_cost = self._hook(self.risk_cost)
+        print("!!Ran hook on Risk cost, why are we doing it again...\n\n")
+        print(self.risk_cost)
    def compute_risk_cost_batchless(self, yst_mu, yst_cov):
        # kind of assuming that risk cost would be different if evaluated without batches included?
@@ -388,7 +393,7 @@ class RobotGP(torch.nn.Module):
        self.planner.robotClass.__all_robots__ = [] # reset robot list
        self.planner.robotClass.num_robots = 0 # reset robot num
        self.mc_handle.searcher_class.searchers_list = [] # reset searcher list
-        # print('cleaned garbage!!!')
+        # print('cleaned garbage!')
    def time_to_find(self, robot_paths, searcher_paths):
        # figure out if lp is found and how long it took. do while loop to get good measure.
@@ -484,6 +489,7 @@ class RobotGP(torch.nn.Module):
        # _thread.start_new_thread(_input_thread, (a_list,))
        # print('\nRisk cost function optimization')
+        #!!Get this to not be 0.95
        self._iter = 0
        mem_ratio = torch.cuda.memory_allocated()/torch.cuda.max_memory_allocated()
        print('\nOptimization memory ratio: {:8.6f}'.format(mem_ratio))
@@ -491,6 +497,7 @@ class RobotGP(torch.nn.Module):
            print("oh no")
        while self._iter_since_update < self._max_iter:
+            print('iter %d' % self._iter) #Run into problems at iter 0
            stime = time.time()
            self.optimizer.zero_grad()
@@ -501,6 +508,7 @@ class RobotGP(torch.nn.Module):
            self.compute_risk_cost()
            #self.risk_cost.backward(retain_graph=True)
+            print('\n\n\n\nRunning backward on risk cost...')
            self.risk_cost.backward(retain_graph=False) # testing if this is required
            # no_nans_p_grad = _find_nans(self.robot_points.grad).shape[0] == 0

--- a/larrt/__pycache__/__init__.cpython-310.pyc
+++ b/larrt/__pycache__/__init__.cpython-310.pyc
--- a/larrt/__pycache__/planning.cpython-310.pyc
+++ b/larrt/__pycache__/planning.cpython-310.pyc
--- a/larrt/__pycache__/robot.cpython-310.pyc
+++ b/larrt/__pycache__/robot.cpython-310.pyc
--- a/larrt/__pycache__/searchspace.cpython-310.pyc
+++ b/larrt/__pycache__/searchspace.cpython-310.pyc
--- a/larrt/__pycache__/tree_boost.cpython-310.pyc
+++ b/larrt/__pycache__/tree_boost.cpython-310.pyc
--- a/mrmh_model/__pycache__/__init__.cpython-310.pyc
+++ b/mrmh_model/__pycache__/__init__.cpython-310.pyc
--- a/mrmh_model/__pycache__/human.cpython-310.pyc
+++ b/mrmh_model/__pycache__/human.cpython-310.pyc
--- a/mrmh_model/__pycache__/lp_model.cpython-310.pyc
+++ b/mrmh_model/__pycache__/lp_model.cpython-310.pyc
--- a/mrmh_model/__pycache__/montecarlo.cpython-310.pyc
+++ b/mrmh_model/__pycache__/montecarlo.cpython-310.pyc
--- a/mrmh_model/__pycache__/params.cpython-310.pyc
+++ b/mrmh_model/__pycache__/params.cpython-310.pyc
--- a/mrmh_model/__pycache__/searcher.cpython-310.pyc
+++ b/mrmh_model/__pycache__/searcher.cpython-310.pyc
--- a/mrmh_model/__pycache__/space.cpython-310.pyc
+++ b/mrmh_model/__pycache__/space.cpython-310.pyc
--- a/mrmh_model/__pycache__/terrain.cpython-310.pyc
+++ b/mrmh_model/__pycache__/terrain.cpython-310.pyc
--- a/mrmh_model/terrain.py
+++ b/mrmh_model/terrain.py
@@ -62,7 +62,7 @@ class Terrain(space.Space):
        elif terrainType == 'real':
            # load actual terrain from gps points
-            print("collecting terrain data ...")
+            print("collecting GIS terrain data ...")
            terrain_location = self.params.get('anchor_point', False)
            [e,_,x,y,data,cen_pt] = get_terrain_map(terrain_location, sample_dist = self.res,
                                      extent = self._xrange, heading=self.params.get('heading'), show_plot=False, verbosity=False)

--- a/scouter/__pycache__/terrain_viewer.cpython-310.pyc
+++ b/scouter/__pycache__/terrain_viewer.cpython-310.pyc
--- a/test_robotgp.py
+++ b/test_robotgp.py
@@ -202,10 +202,6 @@ if __name__ == "__main__":
    kentland_linfeat = '../ags_grabber/matlab_data/BW_LFandInac_Zelev_kentland.mat'
    hmpark_linfeat = '../ags_grabber/matlab_data/BW_LFandInac_Zelev_hmpark.mat'
-    #kentland_linfeat = '~/trustSAR/ags_grabber/matlab_data/BW_LFandInac_Zelev_kentland.mat'
-    #hmpark_linfeat = '~/trustSAR/ags_grabber/matlab_data/BW_LFandInac_Zelev_hmpark.mat'
    # KENTLAND case
    if True:
@@ -237,7 +233,7 @@ if __name__ == "__main__":
                    counter = 0
                    while counter < avg_runs and global_fails <= global_fail_max: # number of averaging runs
-                            #print(torch.cuda.get_device_name())
+                            print('Running on ' + torch.cuda.get_device_name())
                            torch.cuda.empty_cache()
                            torch.cuda.ipc_collect()
                            try: