Source code for utilities.output_prediction_metrics

# Standard
import sys
import pathlib
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Add project root directory to sys.path
root_dir = str(pathlib.Path(__file__).parents[1])
if root_dir not in sys.path:
    sys.path.insert(0, root_dir)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
import os
import re
import pickle
# Third-party
import torch
import numpy as np
import matplotlib.pyplot as plt
# Local
from utilities.prediction_metrics import compute_prediction_metrics
from ioput.iostandard import make_directory
from ioput.plots import plot_xy_data, save_figure
# =============================================================================
# Summary: Output mean prediction metrics from prediction directories
# =============================================================================
def compute_processes_prediction_metrics(predictions_dirs,
                                         mean_prediction_metrics,
                                         save_dir=None, is_save_file=False,
                                         is_display_results=False):
    """Compute mean prediction metrics for multiple prediction processes.

    Parameters
    ----------
    predictions_dirs : dict
        For each prediction process (key, str), store the directory (item, str)
        where the corresponding samples predictions results files are stored.
    mean_prediction_metrics : list[str]
        Mean prediction metrics.
    save_dir : str, default=None
        Directory where file with mean prediction metrics is saved.
    is_save_file : bool, default=False
        If True, then save file with mean prediction metrics in predictions
        dedicated subdirectory.
    is_display_results : bool, default=False
        If True, then display mean prediction metrics to standard output
        device.
        
    Returns
    -------
    processes_results : dict
        For each prediction process (key, str), store the corresponding
        mean prediction metrics data (item, dict).
    """
    # Initialize display
    if is_display_results:
        print('\nMean prediction metrics'
              '\n-----------------------')
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Initialize prediction processes results
    processes_results = {}
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Loop over predictions processes directories
    for process_label, predictions_dir in predictions_dirs.items():
        # Compute mean prediction metrics
        n_sample, mean_metrics_results = \
            compute_directory_prediction_metrics(predictions_dir,
                                                 mean_prediction_metrics)
        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Store prediction process results
        processes_results[process_label] = {
            'n_sample': n_sample,
            'mean_metrics_results': mean_metrics_results}
        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Save file with mean prediction metrics
        if is_save_file:
            # Write prediction metrics file
            write_mean_metrics_results_file(save_dir, n_sample,
                                            mean_metrics_results,
                                            process_label=process_label,
                                            is_overwrite=False)
        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Display mean prediction metrics
        if is_display_results:
            # Get formatted mean prediction metrics
            formatted_results = \
                format_mean_metrics_results(n_sample, mean_metrics_results,
                                            process_label=process_label)
            # Display
            sys.stdout.writelines(formatted_results)
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    return processes_results
# =============================================================================
def compute_directory_prediction_metrics(
        predictions_dir, mean_prediction_metrics, process_label='',
        is_save_file=False, is_display_results=False):
    """Compute mean prediction metrics for given prediction directory.
    
    Parameters
    ----------
    predictions_dir : str
        Directory where samples predictions results files are stored.
    mean_prediction_metrics : list[str]
        Mean prediction metrics.
    process_label : str, default=''
        Prediction process label.
    is_save_file : bool, default=False
        If True, then save file with mean prediction metrics in predictions
        dedicated subdirectory.
    is_display_results : bool, default=False
        If True, then display mean prediction metrics to standard output
        device.

    Returns
    -------
    n_sample : int
        Number of samples.
    mean_metrics_results : dict
        Samples mean value (item, torch.Tensor) of each prediction metric
        (key, str).
    """
    # Get samples prediction files
    prediction_file_paths, _ = \
        get_samples_prediction_files(predictions_dir)
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Get number of samples
    n_sample = len(prediction_file_paths)
    # Compute samples mean prediction metrics
    mean_metrics_results = \
        compute_mean_prediction_metrics(prediction_file_paths,
                                        mean_prediction_metrics)
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Save file with mean prediction metrics
    if is_save_file:
        # Set prediction metrics directory
        prediction_metrics_dir = set_prediction_metrics_dir(predictions_dir)
        # Write prediction metrics file
        write_mean_metrics_results_file(prediction_metrics_dir, n_sample,
                                        mean_metrics_results,
                                        process_label=process_label,
                                        is_overwrite=True)
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Display mean prediction metrics
    if is_display_results:
        # Get formatted mean prediction metrics
        formatted_results = \
            format_mean_metrics_results(n_sample, mean_metrics_results,
                                        process_label=process_label)
        # Display
        sys.stdout.writelines(formatted_results)
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    return n_sample, mean_metrics_results
# =============================================================================
def set_prediction_metrics_dir(predictions_dir):
    """Set prediction metrics directory.
    
    Parameters
    ----------
    predictions_dir : str
        Directory where samples predictions results files are stored.
    
    Returns
    -------
    prediction_metrics_dir : str
        Prediction metrics directory.
    """
    # Check sample predictions directory
    if not os.path.isdir(predictions_dir):
        raise RuntimeError('The samples predictions directory has not been '
                           'found:\n\n' + predictions_dir)
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Set prediction metrics directory
    prediction_metrics_dir = os.path.join(os.path.normpath(predictions_dir),
                                          f'prediction_metrics')
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Create prediction metrics directory
    make_directory(prediction_metrics_dir, is_overwrite=True)
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    return prediction_metrics_dir
# =============================================================================
def get_samples_prediction_files(predictions_dir):
    """Get samples prediction files from prediction directory.
    
    Parameters
    ----------
    predictions_dir : str
        Directory where samples predictions results files are stored.
    
    Returns
    -------
    prediction_file_paths : list[str]
        Samples prediction files paths.
    prediction_files_ids : list[int]
        Samples IDs.
    """
    # Get files in samples predictions results directory
    directory_list = os.listdir(predictions_dir)
    # Check directory
    if not directory_list:
        raise RuntimeError('No files have been found in directory where '
                           'samples predictions results files are stored.')
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Initialize samples prediction files paths and samples IDs
    prediction_file_paths = []
    prediction_files_ids = []
    # Loop over files
    for filename in directory_list:
        # Check if file is sample prediction file
        id = re.search(r'^prediction_sample_([0-9]+).pkl$', filename)
        # Store sample prediction file and ID
        if id is not None:
            # Store sample file path
            prediction_file_paths.append(
                os.path.join(os.path.normpath(predictions_dir), filename))
            # Store sample ID
            prediction_files_ids.append(int(id.groups()[0]))
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    return prediction_file_paths, prediction_files_ids
# =============================================================================
[docs]def compute_mean_prediction_metrics(prediction_file_paths,
                                    mean_prediction_metrics):
    """Compute samples mean prediction metrics from prediction files.
    
    Parameters
    ----------
    prediction_file_paths : list[str]
        Samples prediction files paths.
    mean_prediction_metrics : list[str]
        Mean prediction metrics.

    Returns
    -------
    mean_metrics_results : dict
        Samples mean value (item, torch.Tensor) of each prediction metric
        (key, str).
    """
    # Initialize samples prediction metrics
    samples_metrics_results = {x: [] for x in mean_prediction_metrics}
    # Loop over samples prediction files
    for sample_prediction_path in prediction_file_paths:
        # Compute sample prediction metrics
        sample_metrics_results = \
            compute_prediction_metrics(sample_prediction_path,
                                       ['rmse', 'mav_gt'])
        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Get sample prediction metrics
        sample_rmse = sample_metrics_results['rmse']
        sample_mav_gt = sample_metrics_results['mav_gt']
        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Loop over mean prediction metrics
        for metric in mean_prediction_metrics:
            if metric == 'rmse':
                # Collect sample Root Mean Squared Error (RMSE)
                samples_metrics_results[metric].append(sample_rmse)
            elif metric == 'nrmse':
                # Compute sample Normalized Root Mean Squared Error (NRMSE)
                samples_metrics_results[metric].append(
                    sample_rmse/sample_mav_gt)
            else:
                raise RuntimeError(f'Unknown mean prediction metric: '
                                   f'\'{metric}\'')
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Initialize mean prediction metrics
    mean_metrics_results = {}
    # Loop over mean prediction metrics
    for metric in mean_prediction_metrics:
        # Compute mean prediction metric
        if len(samples_metrics_results[metric]) > 0:
            mean_metrics_results[metric] = torch.mean(
                torch.stack(samples_metrics_results[metric], dim=0), dim=0)
        else:
            mean_metrics_results[metric] = torch.empty(0)
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    return mean_metrics_results
# =============================================================================
def write_mean_metrics_results_file(save_dir, n_sample, mean_metrics_results,
                                    process_label='',
                                    filename='mean_prediction_metrics',
                                    is_overwrite=False):
    """Write file with mean prediction metrics.
    
    Parameters
    ----------
    save_dir : str
        Directory where file with mean prediction metrics is saved.        
    n_sample : int
        Number of samples.
    mean_metrics_results : dict
        Samples mean value (item, torch.Tensor) of each prediction metric
        (key, str).
    process_label : str, default=''
        Prediction process label.
    filename : str, default='mean_prediction_metrics'
        File name.
    is_overwrite : bool, default=False
        If True, then overwrite existing file.
    """
    # Check saving directory
    if not os.path.exists(save_dir):
        raise RuntimeError(f'The saving directory has not been found:'
                           f'\n\n{save_dir}')
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Set file path
    file_path = os.path.join(os.path.normpath(save_dir), f'{filename}.dat')
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Initialize file content
    file_content = []
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Set default opening mode
    open_mode = 'w'
    # Set appending mode
    if os.path.isfile(file_path) and not is_overwrite:
        open_mode = 'a'
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Set file header
    if open_mode == 'w':
        string = 'Mean prediction metrics'
        sep = len(string)*'-'
        file_content += [f'\n{string}\n{sep}\n',]
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Get formatted mean prediction metrics
    formatted_results = \
        format_mean_metrics_results(n_sample, mean_metrics_results,
                                    process_label=process_label)
    # Add formatted mean prediction metrics to file content
    file_content += formatted_results
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Write file
    open(file_path, open_mode).writelines(file_content)
# =============================================================================
[docs]def read_mean_metrics_results_file(file_path, n_sample=None):
    """Read mean prediction metrics from file.
    
    Parameters
    ----------
    file_path : str
        Mean prediction metrics file path.
    n_sample : int, default=None
        Number of samples for which mean prediction metrics are read from file.
        If None, then return the first mean prediction metrics found.
        
    Returns
    -------
    mean_metrics_results : dict
        Samples mean value (item, torch.Tensor) of each prediction metric
        (key, str).
    """
    # Check mean prediction metrics file path
    if not os.path.isfile(file_path):
        raise RuntimeError('Mean prediction metrics file has not been '
                           'found:\n\n' + file_path)
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Initialize mean prediction metrics
    mean_metrics_results = {}
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Open mean prediction metrics file
    _input_file = open(file_path, 'r')
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Reset file position
    _input_file.seek(0)
    # Initialize search flags
    is_keyword_found = False
    # Search for number of samples keyword and collect mean prediction metrics
    for line in _input_file:
        if bool(re.search(r'n_sample =', line, re.IGNORECASE)):
            # Collect number of samples
            n_sample_read = \
                int(re.search(r'n_sample\s*=\s*(\d+)', line).group(1))
            # Check number of samples
            if n_sample is None or n_sample_read == n_sample:
                # Start processing data
                is_keyword_found = True
        elif is_keyword_found and (bool(re.search(r'^' + r'[*][A-Z]+', line))
                                   or line.strip() == ''):
            # Finished processing data
            break
        elif is_keyword_found:
            # Collect metric data (assumes values are formatted in scientific
            # notation)
            prefix_pattern = r'^\s*>\s*(\w+):\s*'
            number_list_pattern = (r'\[\s*'
                                   r'((?:-?\d*\.?\d*(?:e[-+]?\d+)'
                                   r'?(?:\s*,\s*-?\d*\.?\d*(?:e[-+]?\d+)?)*))'
                                   r'\s*\]')
            metric_data = \
                re.search(prefix_pattern + number_list_pattern, line)
            # Extract metric name and values
            metric = metric_data.group(1)
            metric_results_str = metric_data.group(2)
            metric_results = torch.tensor(
                [float(val) for val in metric_results_str.split(',')
                 if val.strip()])
            # Store mean prediction results
            mean_metrics_results[metric] = metric_results
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Check mean prediction metrics
    if len(mean_metrics_results.keys()) == 0:
        raise RuntimeError('The mean prediction metrics have not been '
                           'successfully read from the following file:'
                           f'\n\n{file_path}')
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    return mean_metrics_results 
# =============================================================================
def format_mean_metrics_results(n_sample, mean_metrics_results,
                                process_label=''):
    """Format samples mean prediction metrics.
    
    Parameters
    ----------
    n_sample : int
        Number of samples.
    mean_metrics_results : dict
        Samples mean value (item, torch.Tensor) of each prediction metric
        (key, str).
    process_label : str, default=''
        Prediction process label.
    
    Returns
    -------
    formatted_results : list[str]
        Formatted samples mean prediction metrics results.
    """
    # Initialize formatted results
    formatted_results = []
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Set process label
    formatted_results += [f'\n> Process: {process_label}']
    # Add number of samples
    formatted_results += [f'\n    > n_sample = {n_sample}']
    # Loop over mean prediction metrics
    for metric, metric_results in mean_metrics_results.items():
        # Convert mean prediction metrics to list
        metric_results_list = \
            ', '.join([f'{x:15.8e}' for x in metric_results.tolist()])
        # Add mean prediction metric results
        formatted_results += [f'\n    > {metric}: [{metric_results_list}]']
    # Add blankline
    formatted_results += ['\n\n',]
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    return formatted_results
# =============================================================================
def plot_prediction_metrics_convergence(
        processes_results, mean_prediction_metrics,
        metric_features_labels=None, save_dir=None, is_save_plot_data=False,
        is_save_fig=False, is_stdout_display=False, is_latex=False):
    """Plot mean prediction metrics convergence analysis.
    
    Only prediction processes named as 'nX', where X is a given training
    dataset size, are processed.
    
    Parameters
    ----------
    processes_results : dict
        For each prediction process (key, str), store the corresponding
        mean prediction metrics data (item, dict).
    mean_prediction_metrics : list[str]
        Mean prediction metrics.
    metric_features_labels : dict, default=None
        For each prediction metric (key, str), store the corresponding features
        labels (list, str).
    save_dir : str, default=None
        Directory where data set plots are saved.
    is_save_fig : bool, default=False
        Save figure.
    is_save_plot_data : bool, default=False
        Save plot data. Plot data is stored in a file with a single dictionary
        where each item corresponds to a relevant variable used to generate the
        plot. If the figure directory is provided, then plot data is saved in
        the same directory, otherwise is saved in the current working
        directory.
    is_stdout_display : bool, default=False
        True if displaying figure to standard output device, False otherwise.
    is_latex : bool, default=False
        If True, then render all strings in LaTeX. If LaTex is not available,
        then this option is silently set to False and all input strings are
        processed to remove $(...)$ enclosure.
    """
    # Initialize training data set sizes
    training_sizes = []
    # Collect training data set sizes
    for process_label in processes_results.keys():
        # Check if process label is training data set size
        training_size = re.search(r'^n([0-9]+)$', process_label)
        # Store prediction process training data set size
        if id is not None:
            # Get training data set size
            training_size = int(training_size.groups()[0])
            # Store training data set size
            training_sizes.append(training_size)
    # Get number of training data set sizes
    n_size = len(training_sizes)
    # Sort training data set sizes
    training_sizes = sorted(training_sizes)
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Loop over mean prediction metrics
    for metric in mean_prediction_metrics:
        # Initialize mean prediction metric convergence data
        metric_convergence_data = []
        # Loop over training data set sizes
        for training_size in training_sizes:
            # Get mean prediction metrics data
            mean_metrics_results = \
                processes_results[f'n{training_size}']['mean_metrics_results']
            # Get metric data
            metric_data = mean_metrics_results[metric]
            # Store metric data
            metric_convergence_data.append(metric_data)
        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Build convergence metric tensor
        metric_convergence  = torch.vstack(metric_convergence_data).numpy()
        # Get metric number of dimensions
        n_metric_dim = metric_convergence.shape[1]
        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Initialize data array
        data_xy = np.zeros((n_size, 2*n_metric_dim))
        # Loop over metric dimensions
        for i in range(n_metric_dim):
            # Assemble training data set size data
            data_xy[:, 2*i] = training_sizes
            # Assemble metric dimension data
            data_xy[:, 2*i+1] = metric_convergence[:, i]
        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Set data labels
        if (isinstance(metric_features_labels, dict)
                and metric in metric_features_labels.keys()):
            # Get metric features labels
            data_labels = metric_features_labels[metric]
        else:
            # Set default metric features labels
            if n_metric_dim > 1:
                data_labels = [f'Feature {i}' for i in range(n_metric_dim)]
            else:
                data_labels = None
        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Set axes labels
        x_label = 'Training data set size'
        y_label = metric.upper()
        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Plot data
        figure, _ = plot_xy_data(
            data_xy, data_labels=data_labels, x_label=x_label, y_label=y_label,
            x_scale='log', y_scale='linear', marker='o', markersize=3,
            is_latex=is_latex)
        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Set filename
        filename = f'mean_{metric}_convergence'
        # Save figure
        if is_save_fig:
            save_figure(figure, filename, format='pdf', save_dir=save_dir)
        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Save plot data
        if is_save_plot_data:
            # Set current working directory to save plot data
            if save_dir is None:
                save_dir = os.getcwd()
            # Set plot data subdirectory
            plot_data_dir = \
                os.path.join(os.path.normpath(save_dir), 'plot_data')
            # Create plot data directory
            if not os.path.isdir(plot_data_dir):
                make_directory(plot_data_dir)
            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # Build plot data
            plot_data = {}
            plot_data['data_xy'] = data_xy
            plot_data['x_label'] = x_label
            plot_data['y_label'] = y_label
            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # Set plot data file path
            plot_data_file_path = os.path.join(
                plot_data_dir, filename + '_data' + '.pkl')
            # Save model samples best parameters data
            with open(plot_data_file_path, 'wb') as data_file:
                pickle.dump(plot_data, data_file)
        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Display figure
        if is_stdout_display:
            plt.show()
        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Close plot
        plt.close('all')
# =============================================================================
if __name__ == "__main__":
    # Set computation processes
    is_multiple_processes = False
    # Set uncertainty quantification and model realization
    is_uncertainty_quantification = False
    uq_model = 2
    # Set mean predictions metrics to be plotted
    mean_prediction_metrics = ['rmse', 'nrmse',]
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    if is_multiple_processes:
        # Set testing type
        testing_type = ('in_distribution', 'out_distribution')[0]
        # Set training data set sizes
        training_sizes = (10, 20, 40, 80, 160, 320, 640, 1280, 2560)
        # Set convergence analysis base directory
        base_dir = ('/home/username/Documents/brown/projects/'
                    'test_output_metric/strain_to_stress')
        # Set saving directory
        save_dir = os.path.join(os.path.normpath(base_dir),
                                'prediction_metrics')
        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Initialize prediction processes directories
        predictions_dirs = {}
        # Loop over training data set sizes
        for training_size in training_sizes:
            # Set model base directory
            model_base_dir = os.path.join(os.path.normpath(base_dir),
                                          f'n{training_size}')
            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # Set prediction data set directory
            if is_uncertainty_quantification:
                prediction_dir = os.path.join(
                    os.path.normpath(model_base_dir),
                    f'uncertainty_quantification/model_{uq_model}/'
                    f'7_prediction/{testing_type}/')
            else:
                prediction_dir = os.path.join(
                    os.path.normpath(model_base_dir),
                    f'7_prediction/{testing_type}/'
                    f'prediction_set_0')
            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            # Store prediction directory
            if os.path.isdir(prediction_dir):
                predictions_dirs[f'n{training_size}'] = prediction_dir
            else:
                raise RuntimeError('The prediction directory has not been '
                                   'found:\n\n' + prediction_dir)
        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Create saving directory (overwrite existing directory)
        make_directory(save_dir, is_overwrite=True)
        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Compute mean prediction metrics for multiple prediction processes
        processes_results = compute_processes_prediction_metrics(
            predictions_dirs, mean_prediction_metrics, save_dir=save_dir,
            is_save_file=True, is_display_results=True)
        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Set mean prediction metrics features labels
        metric_features_labels = {
            metric: [f'Stress {x}'
                     for x in ('11', '22', '33', '12', '23', '13')]
            for metric in mean_prediction_metrics}
        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Plot mean prediction metrics convergence analysis
        plot_prediction_metrics_convergence(
            processes_results, mean_prediction_metrics,
            metric_features_labels=metric_features_labels, save_dir=save_dir,
            is_save_fig=True, is_save_plot_data=True, is_stdout_display=False,
            is_latex=True)
    else:
        # Set predictions directory
        predictions_dir = ('/home/username/Documents/brown/projects/'
                           'test_output_metric/prediction_set_0')
        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # Compute mean prediction metrics for given prediction directory
        _, _ = compute_directory_prediction_metrics(
            predictions_dir, mean_prediction_metrics, is_save_file=True,
            is_display_results=True)
# =============================================================================      
"""Plot miscellaneous options:

1. Add GRU reference to mean prediction metrics plots (paste in plot_xy_data())

    Change the number of colors to number of labels!

    # Set reference data file path
    data_file_path = ('/home/username/Documents/brown/projects/'
                      'darpa_paper_examples/local/hybrid_models/dp_plus_gru/'
                      'dp_2d50_plus_gru/prediction_metrics_gru_reference_data/'
                      'plot_data/mean_rmse_convergence_data.pkl')
    # Load reference model data
    with open(data_file_path, 'rb') as dataset_file:
        model_data = pickle.load(dataset_file)
    # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Get reference model loss convergence data
    data_xy_ref = model_data['data_xy']
    # Loop over data sets
    for i in range(n_datasets):
        # Plot reference data set
        axes.plot(data_xy_ref[:, 2*i], data_xy_ref[:, 2*i + 1],
                  label=None, linestyle='--',
                  marker=None, markersize=markersize,
                  markeredgecolor=markeredgecolor,
                  markeredgewidth=markeredgewidth,
                  alpha=0.8, zorder=1)

"""