"""Post-process of RNN-based model predictions.
Functions
---------
build_prediction_data_arrays
Build samples predictions data arrays with predictions and ground-truth.
build_time_series_predictions_data
Build times series prediction and ground-truth data arrays.
"""
#
# Modules
# =============================================================================
# Standard
import os
import re
# Third-party
import numpy as np
# Local
from time_series_data.time_dataset import load_dataset
from model_architectures.procedures.model_prediction import \
load_sample_predictions
#
# Authorship & Credits
# =============================================================================
__author__ = 'Bernardo Ferreira (bernardo_ferreira@brown.edu)'
__credits__ = ['Bernardo Ferreira', ]
__status__ = 'Stable'
# =============================================================================
#
# =============================================================================
[docs]def build_prediction_data_arrays(predictions_dir, prediction_type,
prediction_labels, samples_ids='all'):
"""Build samples predictions data arrays with predictions and ground-truth.
Specific output features indexes cannot be automatically inferred and must
be set according with the particular output features of the model for
suitable extraction.
Parameters
----------
predictions_dir : str
Directory where samples predictions results files are stored.
prediction_type : {'stress_comps', 'acc_p_strain', 'p_strain_comps'}
Type of prediction data arrays:
'stress_comps' : Stress components paths
'acc_p_strain' : Accumulated plastic strain
'p_strain_comps' : Plastic strain components paths
prediction_labels : tuple[str]
Labels of prediction data arrays.
samples_ids : {'all', list[int]}, default='all'
Samples IDs whose prediction results are collated in each prediction
data array.
Returns
-------
prediction_data_arrays : list[numpy.ndarray(2d)]
Prediction components data arrays. Each data array collates data from
all specified samples and is stored as a numpy.ndarray(2d) of shape
(n_points, 2), where data_array[:, 0] stores the ground-truth
and data_array[:, 1] stores the predictions.
"""
# Check sample predictions directory
if not os.path.isdir(predictions_dir):
raise RuntimeError('The samples predictions directory has not been '
'found:\n\n' + predictions_dir)
# Check samples IDs
if samples_ids != 'all' and not isinstance(samples_ids, list):
raise RuntimeError('Samples IDs must be specified as "all" or as '
'list[int].')
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Get files in samples predictions results directory
directory_list = os.listdir(predictions_dir)
# Check directory
if not directory_list:
raise RuntimeError('No files have been found in directory where '
'samples predictions results files are stored.')
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Get prediction files samples IDs
prediction_files_ids = []
for filename in directory_list:
# Check if file is sample results file
id = re.search(r'^prediction_sample_([0-9]+).pkl$', filename)
# Assemble sample ID
if id is not None:
prediction_files_ids.append(int(id.groups()[0]))
# Check prediction files
if not prediction_files_ids:
raise RuntimeError('No sample results files have been found in '
'directory where samples predictions results files '
'are stored.')
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Set all available samples
if samples_ids == 'all':
samples_ids = prediction_files_ids
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Set number of prediction components
if prediction_type == 'stress_comps':
# Set number of stress components
n_stress_comps = len(prediction_labels)
# Set number of prediction components
n_data_arrays = n_stress_comps
elif prediction_type == 'acc_p_strain':
# Set number of prediction components
n_data_arrays = 1
elif prediction_type == 'p_strain_comps':
# Set number of plastic strain components
n_strain_comps = len(prediction_labels)
# Set number of prediction components
n_data_arrays = n_strain_comps
else:
raise RuntimeError('Unknown prediction data array type.')
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Initialize prediction components
prediction_data_arrays = n_data_arrays*[np.empty((0, 2)),]
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Loop over samples
for sample_id in samples_ids:
# Check if sample ID prediction results file is available
if sample_id not in prediction_files_ids:
raise RuntimeError(f'The prediction results file for sample '
f'{sample_id} has not been found in directory: '
f'\n\n{predictions_dir}')
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Set sample predictions file path
sample_prediction_path = \
os.path.join(os.path.normpath(predictions_dir),
f'prediction_sample_{sample_id}.pkl')
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Load sample predictions
sample_results = load_sample_predictions(sample_prediction_path)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Loop over prediction components
for i in range(n_data_arrays):
# Build sample data array
if prediction_type == 'stress_comps':
# Set stress components features indexes
feature_idx = slice(0, n_stress_comps)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Get stress components predictions
stress_path = sample_results['features_out'][:, feature_idx]
# Get stress components ground-truth
stress_path_target = sample_results['targets'][:, feature_idx]
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Check availability of ground-truth
if stress_path_target is None:
raise RuntimeError(f'Stress components path ground-truth '
f'is not available for sample '
f'{sample_id}.')
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Build sample data array
data_array = np.concatenate(
(stress_path_target[:, i].reshape((-1, 1)),
stress_path[:, i].reshape((-1, 1))), axis=1)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
elif prediction_type == 'acc_p_strain':
# Set accumulated plastic strain feature index
feature_idx = len(sample_results['stress_comps_order'])
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Get accumulated plastic strain prediction
acc_p_strain_path = \
sample_results['features_out'][:, feature_idx]
# Get accumulated plastic strain ground-truth
acc_p_strain_path_target = \
sample_results['targets'][:, feature_idx]
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Check availability of ground-truth
if acc_p_strain_path_target is None:
raise RuntimeError(f'Accumulated plastic strain path '
f'ground-truth is not available for '
f'sample {sample_id}.')
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Build sample data array
data_array = np.concatenate(
(acc_p_strain_path_target.reshape((-1, 1)),
acc_p_strain_path.reshape((-1, 1))), axis=1)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
elif prediction_type == 'p_strain_comps':
# Set plastic components features indexes
feature_idx = slice(0, n_strain_comps)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Get plastic components predictions
p_strain_path = sample_results['features_out'][:, feature_idx]
# Get plastic components ground-truth
p_strain_path_target = \
sample_results['targets'][:, feature_idx]
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Check availability of ground-truth
if p_strain_path_target is None:
raise RuntimeError(f'Plastic strain components path '
f'ground-truth is not available for '
f'sample {sample_id}.')
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Build sample data array
data_array = np.concatenate(
(p_strain_path_target[:, i].reshape((-1, 1)),
p_strain_path[:, i].reshape((-1, 1))), axis=1)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Assemble sample prediction data
prediction_data_arrays[i] = \
np.append(prediction_data_arrays[i], data_array, axis=0)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
return prediction_data_arrays
# =============================================================================
[docs]def build_time_series_predictions_data(dataset_file_path, predictions_dir,
prediction_type, prediction_labels,
samples_ids='all',
is_uncertainty_quantification=False):
"""Build times series prediction and ground-truth data arrays.
Specific output features indexes cannot be automatically inferred and must
be set according with the particular output features of the model for
suitable extraction.
Parameters
----------
dataset_file_path : str
Time series testing data set file path.
predictions_dir : str
Directory where samples predictions results files are stored.
prediction_type : {'stress_comps', 'acc_p_strain'}
Type of prediction data arrays:
'stress_comps' : Stress components paths
'acc_p_strain' : Accumulated plastic strain
'p_strain_comps' : Plastic strain components paths
prediction_labels : tuple[str]
Labels of prediction data arrays.
samples_ids : {'all', list[int]}, default='all'
Samples for which the data arrays with the time series prediction
and ground-truth are built.
is_uncertainty_quantification : bool, default=False
If True, then build the prediction data arrays for each sample
accounting for one or more model samples. Each model sample prediction
directory is inferred from the provided prediction directory (assumed
existing in base model directory). Uncertainty quantification data
accounting for different model samples predictions is required.
Returns
-------
prediction_data_arrays : list[dict]
Prediction components data arrays for each sample. Each prediction
component is stored as a dictionary, where the data array
(item, np.ndarray(2d)) of each sample (key, str) is stored as a
numpy.ndarray(2d) of shape (sequence_length, 2 + n_predictions), where
data_array[:, 0] stores the time series discrete time, data_array[:, 1]
stores the time series ground-truth and data_array[:, 2:] stores the
time series predictions.
"""
# Get model base directory
model_base_dir = os.path.dirname(os.path.dirname(dataset_file_path))
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Get model samples to process uncertainty quantification
if is_uncertainty_quantification:
# Set model uncertainty quantification directory
uq_directory = os.path.join(os.path.normpath(model_base_dir),
'uncertainty_quantification')
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Initialize model samples directories
model_sample_dirs = []
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Get files and directories in uncertainty quantification directory
directory_list = os.listdir(uq_directory)
# Loop over files and directories
for dirname in directory_list:
# Check if model sample directory
is_sample_model= \
bool(re.search(r'^' + 'model' + r'_[0-9]+', dirname))
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Append model sample directory
if is_sample_model:
model_sample_dirs.append(
os.path.join(os.path.normpath(uq_directory), dirname))
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Sort model samples directories
model_sample_dirs = \
sorted(model_sample_dirs,
key=lambda x: int(re.search(r'(\d+)\D*$', x).groups()[-1]))
# Get number of model samples
n_model_sample = len(model_sample_dirs)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Extract testing type from prediction subdirectory
testing_type = os.path.basename(os.path.dirname(predictions_dir))
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Load testing data set
test_dataset = load_dataset(dataset_file_path)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Check sample predictions directory
if (not is_uncertainty_quantification
and not os.path.isdir(predictions_dir)):
raise RuntimeError('The samples predictions directory has not been '
'found:\n\n' + predictions_dir)
# Check samples IDs
if samples_ids != 'all' and not isinstance(samples_ids, list):
raise RuntimeError('Samples IDs must be specified as "all" or as '
'list[int].')
elif (isinstance(samples_ids, list)
and max(samples_ids) >= len(test_dataset)):
raise RuntimeError(f'Sample ID ({max(samples_ids)}) is outside of the '
f'data set of size {len(test_dataset)}.')
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Get files in samples predictions results directory
if is_uncertainty_quantification:
# Probe first model sample directory
directory_list = os.listdir(
os.path.join(os.path.normpath(model_sample_dirs[0]),
'7_prediction', testing_type))
else:
directory_list = os.listdir(predictions_dir)
# Check directory
if not directory_list:
raise RuntimeError('No files have been found in directory where '
'samples predictions results files are stored.')
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Get prediction files samples IDs
prediction_files_ids = []
for filename in directory_list:
# Check if file is sample results file
file_id = re.search(r'^prediction_sample_([0-9]+).pkl$', filename)
# Assemble sample ID
if file_id is not None:
prediction_files_ids.append(int(file_id.groups()[0]))
# Check prediction files
if not prediction_files_ids:
raise RuntimeError('No sample results files have been found in '
'directory where samples predictions results files '
'are stored.')
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Set all available samples
if samples_ids == 'all':
samples_ids = prediction_files_ids
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Set number of prediction components
if prediction_type == 'stress_comps':
# Set number of stress components
n_stress_comps = len(prediction_labels)
# Set number of prediction components
n_pred_comps = n_stress_comps
elif prediction_type == 'acc_p_strain':
# Set number of prediction components
n_pred_comps = 1
elif prediction_type == 'p_strain_comps':
# Set number of plastic strain components
n_strain_comps = len(prediction_labels)
# Set number of prediction components
n_pred_comps = n_strain_comps
else:
raise RuntimeError('Unknown prediction data array type.')
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Initialize prediction components data
prediction_data_arrays = [{} for _ in range(n_pred_comps)]
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Loop over samples
for sample_id in samples_ids:
# Get time series discrete time
time_path = test_dataset[sample_id]['time_hist']
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Check if sample prediction results file is available
if sample_id not in prediction_files_ids:
raise RuntimeError(f'The prediction results file for sample '
f'{sample_id} has not been found in directory: '
f'\n\n{predictions_dir}')
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Initialize model samples prediction results
models_sample_results = []
# Get sample predictions
if is_uncertainty_quantification:
# Loop over model samples
for i in range(n_model_sample):
# Get model sample directory
sample_dir = model_sample_dirs[i]
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Set model sample prediction directory
sample_pred_dir = os.path.join(os.path.normpath(sample_dir),
'7_prediction', testing_type)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Set sample predictions file path
sample_prediction_path = \
os.path.join(os.path.normpath(sample_pred_dir),
f'prediction_sample_{sample_id}.pkl')
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Load sample predictions
sample_results = \
load_sample_predictions(sample_prediction_path)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Assemble sample predictions
models_sample_results.append(sample_results)
else:
# Set sample predictions file path
sample_prediction_path = \
os.path.join(os.path.normpath(predictions_dir),
f'prediction_sample_{sample_id}.pkl')
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Load sample predictions
sample_results = load_sample_predictions(sample_prediction_path)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Assemble sample predictions
models_sample_results.append(sample_results)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Loop over prediction components
for i in range(n_pred_comps):
# Initialize sample data array
data_array = time_path.reshape((-1, 1))
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Build sample data array
if prediction_type == 'stress_comps':
# Loop over sample predictions
for j, sample_results in enumerate(models_sample_results):
# Set stress components features indexes
feature_idx = slice(0, n_stress_comps)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Get stress component predictions
stress_path = \
sample_results['features_out'][:, feature_idx]
# Get stress components ground-truth
stress_path_target = \
sample_results['targets'][:, feature_idx]
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Assemble sample ground-truth data
if j == 0:
if stress_path_target is None:
raise RuntimeError(
f'Stress component path ground-truth is not '
f'available for sample {sample_id}.')
else:
data_array = np.concatenate(
(data_array,
stress_path_target[:, i].reshape((-1, 1))),
axis=1)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Concatenate sample prediction data
data_array = np.concatenate(
(data_array,
stress_path[:, i].reshape((-1, 1))), axis=1)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
elif prediction_type == 'acc_p_strain':
# Set accumulated plastic strain feature index
feature_idx = len(sample_results['stress_comps_order'])
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Loop over sample predictions
for j, sample_results in enumerate(models_sample_results):
# Get accumulated plastic strain prediction
acc_p_strain_path = \
sample_results['features_out'][:, feature_idx]
# Get accumulated plastic strain ground-truth
acc_p_strain_path_target = \
sample_results['targets'][:, feature_idx]
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Check availability of ground-truth
if j == 0:
if acc_p_strain_path_target is None:
raise RuntimeError(
f'Accumulated plastic strain path '
f'ground-truth is not available for sample '
f'{sample_id}.')
else:
data_array = np.concatenate(
(data_array,
acc_p_strain_path_target.reshape((-1, 1))),
axis=1)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Build sample data array
data_array = np.concatenate(
(data_array,
acc_p_strain_path.reshape((-1, 1))), axis=1)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
elif prediction_type == 'p_strain_comps':
# Loop over sample predictions
for j, sample_results in enumerate(models_sample_results):
# Set stress components features indexes
feature_idx = slice(0, n_strain_comps)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Get stress component predictions
p_strain_path = \
sample_results['features_out'][:, feature_idx]
# Get stress components ground-truth
p_strain_path_target = \
sample_results['targets'][:, feature_idx]
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Assemble sample ground-truth data
if j == 0:
if p_strain_path_target is None:
raise RuntimeError(
f'Plastic strain component path ground-truth '
f'is not available for sample {sample_id}.')
else:
data_array = np.concatenate(
(data_array,
p_strain_path_target[:, i].reshape((-1, 1))),
axis=1)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Concatenate sample prediction data
data_array = np.concatenate(
(data_array,
p_strain_path[:, i].reshape((-1, 1))), axis=1)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Assemble prediction component sample data
prediction_data_arrays[i][str(sample_id)] = data_array
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
return prediction_data_arrays