Source code for utilities.output_prediction_metrics

# Standard
import sys
import pathlib
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Add project root directory to sys.path
root_dir = str(pathlib.Path(__file__).parents[1])
if root_dir not in sys.path:
    sys.path.insert(0, root_dir)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
import os
import re
import pickle
# Third-party
import torch
import numpy as np
import matplotlib.pyplot as plt
# Local
from utilities.prediction_metrics import compute_prediction_metrics
from ioput.iostandard import make_directory
from ioput.plots import plot_xy_data, save_figure
# =============================================================================
# Summary: Output mean prediction metrics from prediction directories
# =============================================================================
def compute_processes_prediction_metrics(predictions_dirs,
                                         mean_prediction_metrics,
                                         save_dir=None, is_save_file=False,
                                         is_display_results=False):
    """Compute mean prediction metrics for multiple prediction processes.

    Parameters
    ----------
    predictions_dirs : dict
        For each prediction process (key, str), store the directory (item, str)
        where the corresponding samples predictions results files are stored.
    mean_prediction_metrics : list[str]
        Mean prediction metrics.
    save_dir : str, default=None
        Directory where file with mean prediction metrics is saved.
    is_save_file : bool, default=False
        If True, then save file with mean prediction metrics in predictions
        dedicated subdirectory.
    is_display_results : bool, default=False
        If True, then display mean prediction metrics to standard output
        device.
        
    Returns
    -------
    processes_results : dict
        For each prediction process (key, str), store the corresponding
        mean prediction metrics data (item, dict).
    """
    # Initialize display
    if is_display_results:
        print('\nMean prediction metrics'
              '\n-----------------------')
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Initialize prediction processes results
    processes_results = {}
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Loop over predictions processes directories
    for process_label, predictions_dir in predictions_dirs.items():
        # Compute mean prediction metrics
        n_sample, mean_metrics_results = \
            compute_directory_prediction_metrics(predictions_dir,
                                                 mean_prediction_metrics)
        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Store prediction process results
        processes_results[process_label] = {
            'n_sample': n_sample,
            'mean_metrics_results': mean_metrics_results}
        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Save file with mean prediction metrics
        if is_save_file:
            # Write prediction metrics file
            write_mean_metrics_results_file(save_dir, n_sample,
                                            mean_metrics_results,
                                            process_label=process_label,
                                            is_overwrite=False)
        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Display mean prediction metrics
        if is_display_results:
            # Get formatted mean prediction metrics
            formatted_results = \
                format_mean_metrics_results(n_sample, mean_metrics_results,
                                            process_label=process_label)
            # Display
            sys.stdout.writelines(formatted_results)
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    return processes_results
# =============================================================================
def compute_directory_prediction_metrics(
        predictions_dir, mean_prediction_metrics, process_label='',
        is_save_file=False, is_display_results=False):
    """Compute mean prediction metrics for given prediction directory.
    
    Parameters
    ----------
    predictions_dir : str
        Directory where samples predictions results files are stored.
    mean_prediction_metrics : list[str]
        Mean prediction metrics.
    process_label : str, default=''
        Prediction process label.
    is_save_file : bool, default=False
        If True, then save file with mean prediction metrics in predictions
        dedicated subdirectory.
    is_display_results : bool, default=False
        If True, then display mean prediction metrics to standard output
        device.

    Returns
    -------
    n_sample : int
        Number of samples.
    mean_metrics_results : dict
        Samples mean value (item, torch.Tensor) of each prediction metric
        (key, str).
    """
    # Get samples prediction files
    prediction_file_paths, _ = \
        get_samples_prediction_files(predictions_dir)
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Get number of samples
    n_sample = len(prediction_file_paths)
    # Compute samples mean prediction metrics
    mean_metrics_results = \
        compute_mean_prediction_metrics(prediction_file_paths,
                                        mean_prediction_metrics)
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Save file with mean prediction metrics
    if is_save_file:
        # Set prediction metrics directory
        prediction_metrics_dir = set_prediction_metrics_dir(predictions_dir)
        # Write prediction metrics file
        write_mean_metrics_results_file(prediction_metrics_dir, n_sample,
                                        mean_metrics_results,
                                        process_label=process_label,
                                        is_overwrite=True)
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Display mean prediction metrics
    if is_display_results:
        # Get formatted mean prediction metrics
        formatted_results = \
            format_mean_metrics_results(n_sample, mean_metrics_results,
                                        process_label=process_label)
        # Display
        sys.stdout.writelines(formatted_results)
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    return n_sample, mean_metrics_results
# =============================================================================
def set_prediction_metrics_dir(predictions_dir):
    """Set prediction metrics directory.
    
    Parameters
    ----------
    predictions_dir : str
        Directory where samples predictions results files are stored.
    
    Returns
    -------
    prediction_metrics_dir : str
        Prediction metrics directory.
    """
    # Check sample predictions directory
    if not os.path.isdir(predictions_dir):
        raise RuntimeError('The samples predictions directory has not been '
                           'found:\n\n' + predictions_dir)
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Set prediction metrics directory
    prediction_metrics_dir = os.path.join(os.path.normpath(predictions_dir),
                                          f'prediction_metrics')
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Create prediction metrics directory
    make_directory(prediction_metrics_dir, is_overwrite=True)
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    return prediction_metrics_dir
# =============================================================================
def get_samples_prediction_files(predictions_dir):
    """Get samples prediction files from prediction directory.
    
    Parameters
    ----------
    predictions_dir : str
        Directory where samples predictions results files are stored.
    
    Returns
    -------
    prediction_file_paths : list[str]
        Samples prediction files paths.
    prediction_files_ids : list[int]
        Samples IDs.
    """
    # Get files in samples predictions results directory
    directory_list = os.listdir(predictions_dir)
    # Check directory
    if not directory_list:
        raise RuntimeError('No files have been found in directory where '
                           'samples predictions results files are stored.')
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Initialize samples prediction files paths and samples IDs
    prediction_file_paths = []
    prediction_files_ids = []
    # Loop over files
    for filename in directory_list:
        # Check if file is sample prediction file
        id = re.search(r'^prediction_sample_([0-9]+).pkl$', filename)
        # Store sample prediction file and ID
        if id is not None:
            # Store sample file path
            prediction_file_paths.append(
                os.path.join(os.path.normpath(predictions_dir), filename))
            # Store sample ID
            prediction_files_ids.append(int(id.groups()[0]))
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    return prediction_file_paths, prediction_files_ids
# =============================================================================
[docs]def compute_mean_prediction_metrics(prediction_file_paths, mean_prediction_metrics): """Compute samples mean prediction metrics from prediction files. Parameters ---------- prediction_file_paths : list[str] Samples prediction files paths. mean_prediction_metrics : list[str] Mean prediction metrics. Returns ------- mean_metrics_results : dict Samples mean value (item, torch.Tensor) of each prediction metric (key, str). """ # Initialize samples prediction metrics samples_metrics_results = {x: [] for x in mean_prediction_metrics} # Loop over samples prediction files for sample_prediction_path in prediction_file_paths: # Compute sample prediction metrics sample_metrics_results = \ compute_prediction_metrics(sample_prediction_path, ['rmse', 'mav_gt']) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Get sample prediction metrics sample_rmse = sample_metrics_results['rmse'] sample_mav_gt = sample_metrics_results['mav_gt'] # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Loop over mean prediction metrics for metric in mean_prediction_metrics: if metric == 'rmse': # Collect sample Root Mean Squared Error (RMSE) samples_metrics_results[metric].append(sample_rmse) elif metric == 'nrmse': # Compute sample Normalized Root Mean Squared Error (NRMSE) samples_metrics_results[metric].append( sample_rmse/sample_mav_gt) else: raise RuntimeError(f'Unknown mean prediction metric: ' f'\'{metric}\'') # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Initialize mean prediction metrics mean_metrics_results = {} # Loop over mean prediction metrics for metric in mean_prediction_metrics: # Compute mean prediction metric if len(samples_metrics_results[metric]) > 0: mean_metrics_results[metric] = torch.mean( torch.stack(samples_metrics_results[metric], dim=0), dim=0) else: mean_metrics_results[metric] = torch.empty(0) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return mean_metrics_results
# ============================================================================= def write_mean_metrics_results_file(save_dir, n_sample, mean_metrics_results, process_label='', filename='mean_prediction_metrics', is_overwrite=False): """Write file with mean prediction metrics. Parameters ---------- save_dir : str Directory where file with mean prediction metrics is saved. n_sample : int Number of samples. mean_metrics_results : dict Samples mean value (item, torch.Tensor) of each prediction metric (key, str). process_label : str, default='' Prediction process label. filename : str, default='mean_prediction_metrics' File name. is_overwrite : bool, default=False If True, then overwrite existing file. """ # Check saving directory if not os.path.exists(save_dir): raise RuntimeError(f'The saving directory has not been found:' f'\n\n{save_dir}') # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Set file path file_path = os.path.join(os.path.normpath(save_dir), f'{filename}.dat') # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Initialize file content file_content = [] # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Set default opening mode open_mode = 'w' # Set appending mode if os.path.isfile(file_path) and not is_overwrite: open_mode = 'a' # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Set file header if open_mode == 'w': string = 'Mean prediction metrics' sep = len(string)*'-' file_content += [f'\n{string}\n{sep}\n',] # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Get formatted mean prediction metrics formatted_results = \ format_mean_metrics_results(n_sample, mean_metrics_results, process_label=process_label) # Add formatted mean prediction metrics to file content file_content += formatted_results # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Write file open(file_path, open_mode).writelines(file_content) # =============================================================================
[docs]def read_mean_metrics_results_file(file_path, n_sample=None): """Read mean prediction metrics from file. Parameters ---------- file_path : str Mean prediction metrics file path. n_sample : int, default=None Number of samples for which mean prediction metrics are read from file. If None, then return the first mean prediction metrics found. Returns ------- mean_metrics_results : dict Samples mean value (item, torch.Tensor) of each prediction metric (key, str). """ # Check mean prediction metrics file path if not os.path.isfile(file_path): raise RuntimeError('Mean prediction metrics file has not been ' 'found:\n\n' + file_path) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Initialize mean prediction metrics mean_metrics_results = {} # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Open mean prediction metrics file _input_file = open(file_path, 'r') # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Reset file position _input_file.seek(0) # Initialize search flags is_keyword_found = False # Search for number of samples keyword and collect mean prediction metrics for line in _input_file: if bool(re.search(r'n_sample =', line, re.IGNORECASE)): # Collect number of samples n_sample_read = \ int(re.search(r'n_sample\s*=\s*(\d+)', line).group(1)) # Check number of samples if n_sample is None or n_sample_read == n_sample: # Start processing data is_keyword_found = True elif is_keyword_found and (bool(re.search(r'^' + r'[*][A-Z]+', line)) or line.strip() == ''): # Finished processing data break elif is_keyword_found: # Collect metric data (assumes values are formatted in scientific # notation) prefix_pattern = r'^\s*>\s*(\w+):\s*' number_list_pattern = (r'\[\s*' r'((?:-?\d*\.?\d*(?:e[-+]?\d+)' r'?(?:\s*,\s*-?\d*\.?\d*(?:e[-+]?\d+)?)*))' r'\s*\]') metric_data = \ re.search(prefix_pattern + number_list_pattern, line) # Extract metric name and values metric = metric_data.group(1) metric_results_str = metric_data.group(2) metric_results = torch.tensor( [float(val) for val in metric_results_str.split(',') if val.strip()]) # Store mean prediction results mean_metrics_results[metric] = metric_results # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Check mean prediction metrics if len(mean_metrics_results.keys()) == 0: raise RuntimeError('The mean prediction metrics have not been ' 'successfully read from the following file:' f'\n\n{file_path}') # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return mean_metrics_results
# ============================================================================= def format_mean_metrics_results(n_sample, mean_metrics_results, process_label=''): """Format samples mean prediction metrics. Parameters ---------- n_sample : int Number of samples. mean_metrics_results : dict Samples mean value (item, torch.Tensor) of each prediction metric (key, str). process_label : str, default='' Prediction process label. Returns ------- formatted_results : list[str] Formatted samples mean prediction metrics results. """ # Initialize formatted results formatted_results = [] # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Set process label formatted_results += [f'\n> Process: {process_label}'] # Add number of samples formatted_results += [f'\n > n_sample = {n_sample}'] # Loop over mean prediction metrics for metric, metric_results in mean_metrics_results.items(): # Convert mean prediction metrics to list metric_results_list = \ ', '.join([f'{x:15.8e}' for x in metric_results.tolist()]) # Add mean prediction metric results formatted_results += [f'\n > {metric}: [{metric_results_list}]'] # Add blankline formatted_results += ['\n\n',] # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ return formatted_results # ============================================================================= def plot_prediction_metrics_convergence( processes_results, mean_prediction_metrics, metric_features_labels=None, save_dir=None, is_save_plot_data=False, is_save_fig=False, is_stdout_display=False, is_latex=False): """Plot mean prediction metrics convergence analysis. Only prediction processes named as 'nX', where X is a given training dataset size, are processed. Parameters ---------- processes_results : dict For each prediction process (key, str), store the corresponding mean prediction metrics data (item, dict). mean_prediction_metrics : list[str] Mean prediction metrics. metric_features_labels : dict, default=None For each prediction metric (key, str), store the corresponding features labels (list, str). save_dir : str, default=None Directory where data set plots are saved. is_save_fig : bool, default=False Save figure. is_save_plot_data : bool, default=False Save plot data. Plot data is stored in a file with a single dictionary where each item corresponds to a relevant variable used to generate the plot. If the figure directory is provided, then plot data is saved in the same directory, otherwise is saved in the current working directory. is_stdout_display : bool, default=False True if displaying figure to standard output device, False otherwise. is_latex : bool, default=False If True, then render all strings in LaTeX. If LaTex is not available, then this option is silently set to False and all input strings are processed to remove $(...)$ enclosure. """ # Initialize training data set sizes training_sizes = [] # Collect training data set sizes for process_label in processes_results.keys(): # Check if process label is training data set size training_size = re.search(r'^n([0-9]+)$', process_label) # Store prediction process training data set size if id is not None: # Get training data set size training_size = int(training_size.groups()[0]) # Store training data set size training_sizes.append(training_size) # Get number of training data set sizes n_size = len(training_sizes) # Sort training data set sizes training_sizes = sorted(training_sizes) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Loop over mean prediction metrics for metric in mean_prediction_metrics: # Initialize mean prediction metric convergence data metric_convergence_data = [] # Loop over training data set sizes for training_size in training_sizes: # Get mean prediction metrics data mean_metrics_results = \ processes_results[f'n{training_size}']['mean_metrics_results'] # Get metric data metric_data = mean_metrics_results[metric] # Store metric data metric_convergence_data.append(metric_data) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Build convergence metric tensor metric_convergence = torch.vstack(metric_convergence_data).numpy() # Get metric number of dimensions n_metric_dim = metric_convergence.shape[1] # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Initialize data array data_xy = np.zeros((n_size, 2*n_metric_dim)) # Loop over metric dimensions for i in range(n_metric_dim): # Assemble training data set size data data_xy[:, 2*i] = training_sizes # Assemble metric dimension data data_xy[:, 2*i+1] = metric_convergence[:, i] # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Set data labels if (isinstance(metric_features_labels, dict) and metric in metric_features_labels.keys()): # Get metric features labels data_labels = metric_features_labels[metric] else: # Set default metric features labels if n_metric_dim > 1: data_labels = [f'Feature {i}' for i in range(n_metric_dim)] else: data_labels = None # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Set axes labels x_label = 'Training data set size' y_label = metric.upper() # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Plot data figure, _ = plot_xy_data( data_xy, data_labels=data_labels, x_label=x_label, y_label=y_label, x_scale='log', y_scale='linear', marker='o', markersize=3, is_latex=is_latex) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Set filename filename = f'mean_{metric}_convergence' # Save figure if is_save_fig: save_figure(figure, filename, format='pdf', save_dir=save_dir) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Save plot data if is_save_plot_data: # Set current working directory to save plot data if save_dir is None: save_dir = os.getcwd() # Set plot data subdirectory plot_data_dir = \ os.path.join(os.path.normpath(save_dir), 'plot_data') # Create plot data directory if not os.path.isdir(plot_data_dir): make_directory(plot_data_dir) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Build plot data plot_data = {} plot_data['data_xy'] = data_xy plot_data['x_label'] = x_label plot_data['y_label'] = y_label # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Set plot data file path plot_data_file_path = os.path.join( plot_data_dir, filename + '_data' + '.pkl') # Save model samples best parameters data with open(plot_data_file_path, 'wb') as data_file: pickle.dump(plot_data, data_file) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Display figure if is_stdout_display: plt.show() # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Close plot plt.close('all') # ============================================================================= if __name__ == "__main__": # Set computation processes is_multiple_processes = False # Set uncertainty quantification and model realization is_uncertainty_quantification = False uq_model = 2 # Set mean predictions metrics to be plotted mean_prediction_metrics = ['rmse', 'nrmse',] # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if is_multiple_processes: # Set testing type testing_type = ('in_distribution', 'out_distribution')[0] # Set training data set sizes training_sizes = (10, 20, 40, 80, 160, 320, 640, 1280, 2560) # Set convergence analysis base directory base_dir = ('/home/username/Documents/brown/projects/' 'test_output_metric/strain_to_stress') # Set saving directory save_dir = os.path.join(os.path.normpath(base_dir), 'prediction_metrics') # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Initialize prediction processes directories predictions_dirs = {} # Loop over training data set sizes for training_size in training_sizes: # Set model base directory model_base_dir = os.path.join(os.path.normpath(base_dir), f'n{training_size}') # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Set prediction data set directory if is_uncertainty_quantification: prediction_dir = os.path.join( os.path.normpath(model_base_dir), f'uncertainty_quantification/model_{uq_model}/' f'7_prediction/{testing_type}/') else: prediction_dir = os.path.join( os.path.normpath(model_base_dir), f'7_prediction/{testing_type}/' f'prediction_set_0') # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Store prediction directory if os.path.isdir(prediction_dir): predictions_dirs[f'n{training_size}'] = prediction_dir else: raise RuntimeError('The prediction directory has not been ' 'found:\n\n' + prediction_dir) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Create saving directory (overwrite existing directory) make_directory(save_dir, is_overwrite=True) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Compute mean prediction metrics for multiple prediction processes processes_results = compute_processes_prediction_metrics( predictions_dirs, mean_prediction_metrics, save_dir=save_dir, is_save_file=True, is_display_results=True) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Set mean prediction metrics features labels metric_features_labels = { metric: [f'Stress {x}' for x in ('11', '22', '33', '12', '23', '13')] for metric in mean_prediction_metrics} # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Plot mean prediction metrics convergence analysis plot_prediction_metrics_convergence( processes_results, mean_prediction_metrics, metric_features_labels=metric_features_labels, save_dir=save_dir, is_save_fig=True, is_save_plot_data=True, is_stdout_display=False, is_latex=True) else: # Set predictions directory predictions_dir = ('/home/username/Documents/brown/projects/' 'test_output_metric/prediction_set_0') # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Compute mean prediction metrics for given prediction directory _, _ = compute_directory_prediction_metrics( predictions_dir, mean_prediction_metrics, is_save_file=True, is_display_results=True) # ============================================================================= """Plot miscellaneous options: 1. Add GRU reference to mean prediction metrics plots (paste in plot_xy_data()) Change the number of colors to number of labels! # Set reference data file path data_file_path = ('/home/username/Documents/brown/projects/' 'darpa_paper_examples/local/hybrid_models/dp_plus_gru/' 'dp_2d50_plus_gru/prediction_metrics_gru_reference_data/' 'plot_data/mean_rmse_convergence_data.pkl') # Load reference model data with open(data_file_path, 'rb') as dataset_file: model_data = pickle.load(dataset_file) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Get reference model loss convergence data data_xy_ref = model_data['data_xy'] # Loop over data sets for i in range(n_datasets): # Plot reference data set axes.plot(data_xy_ref[:, 2*i], data_xy_ref[:, 2*i + 1], label=None, linestyle='--', marker=None, markersize=markersize, markeredgecolor=markeredgecolor, markeredgewidth=markeredgewidth, alpha=0.8, zorder=1) """