Source code for user_scripts.local_model_update.rnn_material_model.train_model
"""Local model update: RNN material model.
Functions
---------
perform_model_standard_training
Perform standard training of RNN-based model.
generate_standard_training_plots
Generate plots of standard training of model.
set_default_model_parameters
Set default model initialization parameters.
set_default_training_options
Set default model training options.
"""
#
# Modules
# =============================================================================
# Standard
import sys
import pathlib
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Add project root directory to sys.path
root_dir = str(pathlib.Path(__file__).parents[3])
if root_dir not in sys.path:
sys.path.insert(0, root_dir)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
import os
# Third-party
import torch
# Local
from time_series_data.time_dataset import load_dataset, \
concatenate_dataset_features, sum_dataset_features, \
add_dataset_feature_init
from model_architectures.rnn_base_model.train.training import train_model
from model_architectures.procedures.model_training import \
read_loss_history_from_file, read_lr_history_from_file
from model_architectures.procedures.model_summary import get_model_summary
from model_architectures.procedures.model_training import \
plot_training_loss_history, plot_training_loss_and_lr_history
from model_architectures.materials.strain_features import add_strain_features
from ioput.iostandard import make_directory, find_unique_file_with_regex
#
# Authorship & Credits
# =============================================================================
__author__ = 'Bernardo Ferreira (bernardo_ferreira@brown.edu)'
__credits__ = ['Bernardo Ferreira', ]
__status__ = 'Stable'
# =============================================================================
#
# =============================================================================
[docs]def perform_model_standard_training(train_dataset_file_path, model_directory,
val_dataset_file_path=None,
device_type='cpu', is_verbose=False):
"""Perform standard training of RNN-based model.
Parameters
----------
train_dataset_file_path : str
Training data set file path.
model_directory : str
Directory where model is stored.
val_dataset_file_path : str, default=None
Validation data set file path.
device_type : {'cpu', 'cuda'}, default='cpu'
Type of device on which torch.Tensor is allocated.
is_verbose : bool, default=False
If True, enable verbose output.
"""
# Get model default initialization parameters
model_init_args = set_default_model_parameters(model_directory,
device_type)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Set default model training options
opt_algorithm, lr_init, lr_scheduler_type, lr_scheduler_kwargs, \
loss_nature, loss_type, loss_kwargs, data_scaling_type, \
data_scaling_parameters, is_sampler_shuffle, is_early_stopping, \
early_stopping_kwargs = set_default_training_options()
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Initialize new strain-based feature
strain_features_labels = None
# Initialize features concatenation/summing flags
features_in_build = 'cat'
features_out_build = 'cat'
# Set data features for training
features_option = 'strain_to_stress'
if features_option == 'strain_to_stress':
# Set input features
new_label_in = 'features_in'
features_in_list = ('strain_path',)
features_in_build = 'cat'
# Set output features
new_label_out = 'features_out'
features_out_list = ('stress_path',)
features_out_build = 'cat'
# Set number of input and output features
model_init_args['n_features_in'] = 6
model_init_args['n_features_out'] = 6
elif features_option == 'strain_i1_i2_to_stress':
# Set new strain-based features labels
strain_features_labels = ('i1_strain', 'i2_strain')
# Set input features
new_label_in = 'features_in'
features_in_list = ('strain_path', *strain_features_labels)
features_in_build = 'cat'
# Set output features
new_label_out = 'features_out'
features_out_list = ('stress_path',)
features_out_build = 'cat'
# Set number of input and output features
model_init_args['n_features_in'] = 8
model_init_args['n_features_out'] = 6
elif features_option == 'strain_to_p_strain':
# Set input features
new_label_in = 'features_in'
features_in_list = ('strain_path',)
features_in_weights = {'strain_path': 1.0,}
features_in_build = 'sum'
# Set output features
new_label_out = 'features_out'
features_out_list = ('strain_path', 'e_strain_mf')
features_out_weights = {'strain_path': 1.0, 'e_strain_mf': -1.0}
features_out_build = 'sum'
# Set number of input and output features
model_init_args['n_features_in'] = 6
model_init_args['n_features_out'] = 6
elif features_option == 'strain_i1_i2_to_p_strain':
# Set new strain-based features labels
strain_features_labels = ('i1_strain', 'i2_strain')
# Set input features
new_label_in = 'features_in'
features_in_list = ('strain_path', *strain_features_labels)
features_in_build = 'cat'
# Set output features
new_label_out = 'features_out'
features_out_list = ('strain_path', 'e_strain_mf')
features_out_weights = {'strain_path': 1.0, 'e_strain_mf': -1.0}
features_out_build = 'sum'
# Set number of input and output features
model_init_args['n_features_in'] = 8
model_init_args['n_features_out'] = 6
elif features_option == 'stress_acc_p_strain':
# Set input features
new_label_in = 'features_in'
features_in_list = ('strain_path',)
features_in_build = 'cat'
# Set output features
new_label_out = 'features_out'
features_out_list = ('stress_path', 'acc_p_strain')
features_out_build = 'cat'
# Set number of input and output features
model_init_args['n_features_in'] = 6
model_init_args['n_features_out'] = 7
elif features_option == 'strain_vf_to_stress':
# Set input features
new_label_in = 'features_in'
features_in_list = ('strain_path', 'vf_path')
features_in_build = 'cat'
# Set output features
new_label_out = 'features_out'
features_out_list = ('stress_path',)
features_out_build = 'cat'
# Set number of input and output features
model_init_args['n_features_in'] = 7
model_init_args['n_features_out'] = 6
elif features_option == 'strain_temperature_composition_to_stress':
# Set input features
new_label_in = 'features_in'
features_in_list = ('strain_path', 'temperature_hist',
'composition_hist')
features_in_build = 'cat'
# Set output features
new_label_out = 'features_out'
features_out_list = ('stress_path',)
features_out_build = 'cat'
# Set number of input and output features
model_init_args['n_features_in'] = 8
model_init_args['n_features_out'] = 6
else:
raise RuntimeError('Unknown features option.')
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Set hidden state initialization
hidden_features_in = torch.zeros((model_init_args['n_recurrent_layers'],
model_init_args['hidden_layer_size']))
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Set model training options:
# Set number of epochs
n_max_epochs = 200
# Set batch size
batch_size = 32
# Set learning rate
lr_init = 1.0e-03
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Compute exponential decay (learning rate scheduler)
lr_end = 1.0e-5
gamma = (lr_end/lr_init)**(1/n_max_epochs)
# Set learning rate scheduler
lr_scheduler_type = 'explr'
lr_scheduler_kwargs = {'gamma': gamma}
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Set early stopping
is_early_stopping = True
# Set early stopping parameters
if is_early_stopping:
# Check validation data set file path
if val_dataset_file_path is None:
raise RuntimeError('The validation data set file path must be '
'provided to process early stopping criterion.')
else:
# Load validation data set
val_dataset = load_dataset(val_dataset_file_path)
# Compute new strain-based features
if strain_features_labels is not None:
# Loop over strain-based features
for strain_feature_label in strain_features_labels:
# Add strain-based feature to data set
val_dataset = add_strain_features(
val_dataset, strain_feature_label)
# Set validation data set features
if features_in_build == 'cat':
val_dataset = concatenate_dataset_features(
val_dataset, new_label_in, features_in_list,
is_remove_features=False)
elif features_in_build == 'sum':
val_dataset = sum_dataset_features(
val_dataset, new_label_in, features_in_list,
features_weights=features_in_weights,
is_remove_features=False)
if features_out_build == 'cat':
val_dataset = concatenate_dataset_features(
val_dataset, new_label_out, features_out_list,
is_remove_features=False)
elif features_out_build == 'sum':
val_dataset = sum_dataset_features(
val_dataset, new_label_out, features_out_list,
features_weights=features_out_weights,
is_remove_features=False)
# Add hidden state initialization to data set
val_dataset = add_dataset_feature_init(
val_dataset, 'hidden_features_in', hidden_features_in)
# Set early stopping parameters
early_stopping_kwargs = {'validation_dataset': val_dataset,
'validation_frequency': 1,
'trigger_tolerance': 20,
'improvement_tolerance':1e-2}
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Load training data set
train_dataset = load_dataset(train_dataset_file_path)
# Compute new strain-based features
if strain_features_labels is not None:
# Loop over strain-based features
for strain_feature_label in strain_features_labels:
# Add strain-based feature to data set
train_dataset = add_strain_features(
train_dataset, strain_feature_label)
# Set training data set features
if features_in_build == 'cat':
train_dataset = concatenate_dataset_features(
train_dataset, new_label_in, features_in_list,
is_remove_features=False)
elif features_in_build == 'sum':
train_dataset = sum_dataset_features(
train_dataset, new_label_in, features_in_list,
features_weights=features_in_weights, is_remove_features=False)
if features_out_build == 'cat':
train_dataset = concatenate_dataset_features(
train_dataset, new_label_out, features_out_list,
is_remove_features=False)
elif features_out_build == 'sum':
train_dataset = sum_dataset_features(
train_dataset, new_label_out, features_out_list,
features_weights=features_out_weights, is_remove_features=False)
# Add hidden state initialization to data set
train_dataset = add_dataset_feature_init(
train_dataset, 'hidden_features_in', hidden_features_in)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Set loss type
loss_type = 'mse'
# Set loss parameters
loss_kwargs = {}
# Set model state loading
model_load_state = None
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Training of RNN-based model
model, _, _ = train_model(n_max_epochs, train_dataset, model_init_args,
lr_init, opt_algorithm=opt_algorithm,
lr_scheduler_type=lr_scheduler_type,
lr_scheduler_kwargs=lr_scheduler_kwargs,
loss_nature=loss_nature, loss_type=loss_type,
loss_kwargs=loss_kwargs,
data_scaling_type=data_scaling_type,
data_scaling_parameters=data_scaling_parameters,
batch_size=batch_size,
is_sampler_shuffle=is_sampler_shuffle,
is_early_stopping=is_early_stopping,
early_stopping_kwargs=early_stopping_kwargs,
model_load_state=model_load_state,
save_every=None,
dataset_file_path=train_dataset_file_path,
device_type=device_type, seed=None,
is_verbose=is_verbose)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Generate plots of model training process
generate_standard_training_plots(model_directory)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Display summary of PyTorch model
_ = get_model_summary(model, device_type=device_type,
is_verbose=is_verbose)
# =============================================================================
[docs]def generate_standard_training_plots(model_directory):
"""Generate plots of standard training of model.
Parameters
----------
model_directory : str
Directory where material patch model is stored.
"""
# Set loss history record file path
loss_record_path = os.path.join(model_directory, 'loss_history_record.pkl')
# Read training process training and validation loss history
loss_nature, loss_type, training_loss_history, validation_loss_history = \
read_loss_history_from_file(loss_record_path)
# Build training process loss history
loss_histories = {}
loss_histories['Training'] = training_loss_history
if validation_loss_history is not None:
loss_histories['Validation'] = validation_loss_history
# Read training process learning rate history
lr_scheduler_type, lr_history_epochs = \
read_lr_history_from_file(loss_record_path)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Create plot directory
plot_dir = os.path.join(os.path.normpath(model_directory), 'plots')
if not os.path.isdir(plot_dir):
make_directory(plot_dir)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Plot model training process loss history
plot_training_loss_history(loss_histories, loss_type.upper(),
loss_scale='log', save_dir=plot_dir,
is_save_fig=True, is_stdout_display=False,
is_latex=True)
# Plot model training process loss and learning rate histories
plot_training_loss_and_lr_history(training_loss_history,
lr_history_epochs, loss_type=None,
is_log_loss=False, loss_scale='log',
lr_type=lr_scheduler_type,
save_dir=plot_dir, is_save_fig=True,
is_stdout_display=False, is_latex=True)
# =============================================================================
def set_default_model_parameters(model_directory, device_type='cpu'):
"""Set default model initialization parameters.
Parameters
----------
model_directory : str
Directory where model is stored.
device_type : {'cpu', 'cuda'}, default='cpu'
Type of device on which torch.Tensor is allocated.
Returns
-------
model_init_args : dict
Model class initialization parameters (check GRURNNModel).
"""
# Set model name
model_name = 'gru_material_model'
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Set number of input features
n_features_in = 6
# Set number of output features
n_features_out = 6
# Set hidden layer size
hidden_layer_size = 444
# Set number of recurrent layers (stacked RNN)
n_recurrent_layers = 3
# Set dropout probability
dropout = 0
# Set model input and output features normalization
is_model_in_normalized = True
is_model_out_normalized = True
# Set GRU model source
gru_model_source = 'torch'
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Build model initialization parameters
model_init_args = {'n_features_in': n_features_in,
'n_features_out': n_features_out,
'hidden_layer_size': hidden_layer_size,
'n_recurrent_layers': n_recurrent_layers,
'dropout': dropout,
'model_directory': model_directory,
'model_name': model_name,
'is_model_in_normalized': is_model_in_normalized,
'is_model_out_normalized': is_model_out_normalized,
'gru_model_source': gru_model_source,
'device_type': device_type}
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
return model_init_args
# =============================================================================
def set_default_training_options():
"""Set default model training options.
Returns
-------
opt_algorithm : {'adam',}
Optimization algorithm:
'adam' : Adam (torch.optim.Adam)
lr_init : float
Initial value optimizer learning rate. Constant learning rate value if
no learning rate scheduler is specified (lr_scheduler_type=None).
lr_scheduler_type : {'steplr', 'explr', 'linlr'}
Type of learning rate scheduler:
'steplr' : Step-based decay (torch.optim.lr_scheduler.SetpLR)
'explr' : Exponential decay (torch.optim.lr_scheduler.ExponentialLR)
'linlr' : Linear decay (torch.optim.lr_scheduler.LinearLR)
lr_scheduler_kwargs : dict
Arguments of torch.optim.lr_scheduler.LRScheduler initializer.
loss_nature : {'features_out',}, default='features_out'
Loss nature:
'features_out' : Based on output features
loss_type : {'mse',}
Loss function type:
'mse' : MSE (torch.nn.MSELoss)
loss_kwargs : dict
Arguments of torch.nn._Loss initializer.
data_scaling_type : {'min-max', 'mean-std'}
Type of data scaling. Min-Max scaling ('min-max') or
standardization ('mean-std').
data_scaling_parameters : dict
Data scaling parameters (item, dict) for each features type
(key, str). For 'min-max' data scaling, the parameters are the
'minimum' and 'maximum' features normalization tensors, as well as
the 'norm_minimum' and 'norm_maximum' normalization bounds. For
'mean-std' data scaling, the parameters are the 'mean' and 'std'
features normalization tensors.
is_sampler_shuffle : bool
If True, shuffles data set samples at every epoch.
is_early_stopping : bool
If True, then training process is halted when early stopping criterion
is triggered.
early_stopping_kwargs : dict
Early stopping criterion parameters (key, str, item, value).
"""
opt_algorithm = 'adam'
lr_init = 1.0e-04
lr_scheduler_type = None
lr_scheduler_kwargs = None
loss_nature = 'features_out'
loss_type = 'mse'
loss_kwargs = {}
data_scaling_type='mean-std'
data_scaling_parameters={}
is_sampler_shuffle = False
is_early_stopping = True
early_stopping_kwargs = {'validation_dataset': None,
'validation_frequency': 1,
'trigger_tolerance': 20,
'improvement_tolerance':1e-2}
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
return opt_algorithm, lr_init, lr_scheduler_type, lr_scheduler_kwargs, \
loss_nature, loss_type, loss_kwargs, data_scaling_type, \
data_scaling_parameters, is_sampler_shuffle, is_early_stopping, \
early_stopping_kwargs
# =============================================================================
if __name__ == "__main__":
# Set computation processes
is_standard_training = True
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Set case studies base directory
base_dir = ('/home/username/Documents/brown/projects/'
'darpa_project/8_global_random_specimen/von_mises/'
'1_local_vanilla_GRU/strain_to_stress')
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Initialize case study directories
case_study_dirs = []
# Set case study directories
if False:
# Set training data set sizes
training_sizes = (10, 20, 40, 80, 160, 320, 640, 1280, 2560)
# Set case study directories
case_study_dirs += [os.path.join(os.path.normpath(base_dir), f'n{n}/')
for n in training_sizes]
elif False:
case_study_dirs += [os.path.join(os.path.normpath(base_dir),
f'n2560/0_pretraining/'),]
else:
case_study_dirs += [base_dir,]
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Loop over case study directories
for case_study_dir in case_study_dirs:
# Check case study directory
if not os.path.isdir(case_study_dir):
raise RuntimeError('The case study directory has not been found:'
'\n\n' + case_study_dir)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Set training data set directory
training_dataset_dir = os.path.join(os.path.normpath(case_study_dir),
'1_training_dataset')
# Get training data set file path
regex = (r'^ss_paths_dataset_n[0-9]+.pkl$',)
is_file_found, train_dataset_file_path = \
find_unique_file_with_regex(training_dataset_dir, regex)
# Check data set file
if not is_file_found:
raise RuntimeError(f'Training data set file has not been found '
f'in data set directory:\n\n'
f'{training_dataset_dir}')
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Set model directory
model_directory = \
os.path.join(os.path.normpath(case_study_dir), '3_model')
# Create model directory
if is_standard_training:
# Create model directory (overwrite)
make_directory(model_directory, is_overwrite=True)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Set validation data set directory
val_dataset_directory = os.path.join(os.path.normpath(case_study_dir),
'2_validation_dataset')
# Get validation data set file path
regex = (r'^ss_paths_dataset_n[0-9]+.pkl$',)
is_file_found, val_dataset_file_path = \
find_unique_file_with_regex(val_dataset_directory, regex)
# Check data set file
if not is_file_found:
raise RuntimeError(f'Validation data set file has not been found '
f'in data set directory:\n\n'
f'{val_dataset_directory}')
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Set device type
if torch.cuda.is_available():
device_type = 'cuda'
else:
device_type = 'cpu'
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Perform standard training of model
if is_standard_training:
perform_model_standard_training(
train_dataset_file_path, model_directory,
val_dataset_file_path=val_dataset_file_path,
device_type=device_type, is_verbose=True)