Source code for model_architectures.rnn_base_model.optimization.hydra_optimization_plots

"""Automatic plotting for Hydra hyperparameter optimization.

Functions
---------
plot_optimization_history
    Plot Hydra multi-run optimization process history.
"""
#
#                                                                       Modules
# =============================================================================
# Standard
import sys
import pathlib
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Add project root directory to sys.path
root_dir = str(pathlib.Path(__file__).parents[3])
if root_dir not in sys.path:
    sys.path.insert(0, root_dir)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
import os
import re
# Third-party
import numpy as np
import matplotlib.pyplot as plt
# Local
from ioput.plots import scatter_xy_data, save_figure
#
#                                                          Authorship & Credits
# =============================================================================
__author__ = 'Bernardo Ferreira (bernardo_ferreira@brown.edu)'
__credits__ = ['Bernardo Ferreira', ]
__status__ = 'Stable'
# =============================================================================
#
# =============================================================================
[docs]def plot_optimization_history(optim_history, optim_metric, is_log_metric=False, objective_scale='linear', is_data_labels=False, filename=None, save_dir=None, is_save_fig=False, is_stdout_display=False, is_latex=False, is_verbose=False): """Plot Hydra multi-run optimization process history. Assumes that each Hydra multi-run optimization process generates a 'job_summary.dat' file for each job (in hydra_cfg.runtime.output_dir) with data formatted as < optim_metric >: < value >. Parameters ---------- optim_history : dict One or more multi-run optimization processes (key, str) jobs directories (item, str). The multi-run job directory is set in Hydra configuration file (hydra.sweep.dir). Dictionary keys are taken as labels in the corresponding optimization processes history plot. optim_metric : str The metric whose optimization process history is to be plotted. Must be available from all optimization processes jobs summary data files in the format < optim_metric >: < value >. is_log_metric : bool, default=False Applies logarithm to optimization metric values if True, keeps original metric values otherwise. objective_scale : {'linear', 'log'}, default='linear' Optimization metric values axis scale type. is_data_labels : bool, default=False If True, then plot data labels according with optimization processes dictionary keys. filename : str, default=None Figure name. If None, then figure name is set as optimization_history_{optim_metric}. save_dir : str, default=None Directory where figure is saved. If None, then figure is saved in current working directory. is_save_fig : bool, default=False Save figure. is_stdout_display : bool, default=False True if displaying figure to standard output device, False otherwise. is_latex : bool, default=False If True, then render all strings in LaTeX. If LaTex is not available, then this option is silently set to False and all input strings are processed to remove $(...)$ enclosure. is_verbose : bool, default=False If True, enable verbose output. """ if is_verbose: print('\nPlot Hydra multi-run optimization processes' '\n-------------------------------------------') # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Get number of optimization processes n_optim_history = len(optim_history.keys()) # Initialize maximum number of jobs (function evaluations) max_n_jobs = 0 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Initialize optimization processes data optim_data = {} # Initialize data labels data_labels=None if is_data_labels: data_labels = [] # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Loop over optimization processes for i, (label, optim_dir) in enumerate(optim_history.items()): if is_verbose: print(f'\n> Process: {label}') # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Check optimization process directory if not os.path.isdir(optim_dir): raise RuntimeError('The optimization jobs directory has not been ' 'found:\n\n' + optim_dir) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Get job files in optimization process directory directory_list = [x for x in os.listdir(optim_dir) if re.search(r'^(\d+)$', x)] # Sort job files in optimization process directory directory_list = sorted(directory_list, key=lambda x: int(re.search(r'^(\d+)$', x).groups()[-1])) # Check directory if not directory_list: raise RuntimeError('No job files have been found in optimization ' 'process directory:\n\n' + optim_dir) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Initialize optimization metric history metric_hist = [] # Initialize optimization jobs history job_hist = [] # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Loop over optimization process jobs for job_dir in directory_list: # Check if optimization process job directory job_id = re.search(r'^([0-9]+)$', job_dir) # Extract history data from job summary file if job_id is not None: # Get job ID job_id = int(job_id.groups()[0]) # Check job ID expected_dir = os.path.join(os.path.normpath(optim_dir), str(len(metric_hist))) if job_id != len(metric_hist): raise RuntimeError( f'Job ID {len(metric_hist)} directory has not been ' f'found:\n\n{expected_dir}') # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Get job summary file path job_summary_path = \ os.path.join(os.path.normpath(optim_dir), f'{job_id}/job_summary.dat') # Check job summary file path if not os.path.isfile(job_summary_path): raise RuntimeError(f'The job summary file path has not ' f'been found for job ID {job_id}:' f'\n\n {job_summary_path}') # Open job summary file job_summary_file = open(job_summary_path, 'r') job_summary_file.seek(0) # Look for optimization metric value metric = None line_number = 0 for line in job_summary_file: line_number = line_number + 1 if str(optim_metric) in line: metric = float(line.split()[-1]) break # Append job ID and optimization metric to history if metric is None: raise RuntimeError(f'Optimization metric {optim_metric} ' f'has not been found in job summary ' f'file:\n\n{job_summary_path}') else: job_hist.append(job_id) metric_hist.append(metric) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Update maximum number of jobs if len(metric_hist) > max_n_jobs: max_n_jobs = len(metric_hist) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Assemble optimization metric history optim_data[label] = metric_hist # Assemble data label if is_data_labels: data_labels.append(label) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if is_verbose: # Get sorted indices based on optimization metric sorted_indices = sorted(range(len(metric_hist)), key=lambda i: metric_hist[i]) # Get sorted job IDs and optimization metric histories job_hist_sorted = [job_hist[i] for i in sorted_indices] metric_hist_sorted = [metric_hist[i] for i in sorted_indices] # Diplay best performance print(f'\n > Best performance: {metric_hist_sorted[0]:15.8e} ' f'(job #{job_hist_sorted[0]})') # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if is_verbose: print(f'\n> Generating optimization processes plot...') # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Initialize data array and data labels data_xy = np.full((max_n_jobs, 2*n_optim_history), fill_value=None) # Build data array for i, (label, data_hist) in enumerate(optim_data.items()): # Get optimization process number of jobs n_jobs = len(data_hist) # Assemble optimization process history to data array data_xy[:n_jobs, 2*i] = tuple([*range(0, n_jobs)]) if is_log_metric: data_xy[:n_jobs, 2*i + 1] = tuple(np.log(metric_hist)) else: data_xy[:n_jobs, 2*i + 1] = tuple(metric_hist) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Set axes limits and scale x_lims = (0, max_n_jobs) y_lims = (None, None) y_scale = objective_scale # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Set axes labels x_label = 'Jobs (Function Evaluations)' if is_log_metric: y_label = f'log({optim_metric})' else: y_label = f'{optim_metric.capitalize()}' # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Set title title = None # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Plot loss history figure, axes = scatter_xy_data(data_xy, data_labels=data_labels, x_lims=x_lims, y_lims=y_lims, title=title, x_label=x_label, y_label=y_label, y_scale=y_scale, x_tick_format='int', is_latex=is_latex) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Build best-so-far history data_min_xy = np.copy(data_xy) data_min = data_xy[0, 1] for i in range(data_min_xy.shape[0]): if data_xy[i, 1] < data_min: data_min = data_xy[i, 1] data_min_xy[i, 1] = data_min # Plot best-so-far history axes.plot(data_min_xy[:, 0], data_min_xy[:, 1], color='#EE7733', label='Best-so-far', linestyle='-') # Plot best-so-far legend legend = axes.legend(loc='upper right', frameon=True, fancybox=True, facecolor='inherit', edgecolor='inherit', fontsize=8, framealpha=1.0) legend.set_zorder(10) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Display figure if is_stdout_display: plt.show() # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Set default figure name if filename is None: filename = f'optimization_history_{optim_metric}' # Save figure if is_save_fig: save_figure(figure, filename, format='pdf', save_dir=save_dir) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Close plot plt.close(figure) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if is_verbose: print()
# ============================================================================= if __name__ == "__main__": # Set optimization processes optim_history = {} optim_history['label'] = \ ('/home/username/Documents/hyperparameter_opt/' 'optimize_gru_material_model_composite_rve/2025-01-23/18-56-06') # Set plot directory save_dir = ('/home/username/Documents/hyperparameter_opt') # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Set optimization metric optim_metric = 'objective' # Set optimization metric scale objective_scale = 'linear' # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Plot optimization process history. plot_optimization_history(optim_history, optim_metric, objective_scale=objective_scale, save_dir=save_dir, is_save_fig=True, is_stdout_display=False, is_latex=True)