做企业网站需要资质吗,北京城建亚泰建设集团有限公司网站首页,怎么做和京东一样网站,亚马逊品牌网站怎么做文章目录 深度学习超参数调优网格搜索示例一#xff1a;网格搜索回归模型超参数示例二#xff1a;Keras网格搜索 随机搜索贝叶斯搜索 超参数调优框架Optuna深度学习超参数优化框架nvidia nemo大模型超参数优化框架 参数调整理论#xff1a;
黑盒优化#xff1a;超参数优化… 文章目录 深度学习超参数调优网格搜索示例一网格搜索回归模型超参数示例二Keras网格搜索 随机搜索贝叶斯搜索 超参数调优框架Optuna深度学习超参数优化框架nvidia nemo大模型超参数优化框架 参数调整理论
黑盒优化超参数优化算法最新进展总结 均为转载联系侵删
深度学习超参数调优
pytorch 网格搜索LSTM最优参数 python网格搜索优化参数Keras深度学习超参数优化官方手册Keras深度学习超参数优化手册-CSDN博客版超参数搜索不够高效这几大策略了解一下使用贝叶斯优化进行深度神经网络超参数优化
网格搜索
示例一网格搜索回归模型超参数
# grid search cnn for airline passengers
from math import sqrt
from numpy import array, mean
from pandas import DataFrame, concat, read_csv
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv1D, MaxPooling1D# split a univariate dataset into train/test sets
def train_test_split(data, n_test):return data[:-n_test], data[-n_test:]# transform list into supervised learning format
def series_to_supervised(data, n_in1, n_out1):df DataFrame(data)cols list()# input sequence (t-n, ... t-1)for i in range(n_in, 0, -1):cols.append(df.shift(i))# forecast sequence (t, t1, ... tn)for i in range(0, n_out):cols.append(df.shift(-i))# put it all togetheragg concat(cols, axis1)# drop rows with NaN valuesagg.dropna(inplaceTrue)return agg.values# root mean squared error or rmse
def measure_rmse(actual, predicted):return sqrt(mean_squared_error(actual, predicted))# difference dataset
def difference(data, order):return [data[i] - data[i - order] for i in range(order, len(data))]# fit a model
def model_fit(train, config):# unpack confign_input, n_filters, n_kernel, n_epochs, n_batch, n_diff config# prepare dataif n_diff 0:train difference(train, n_diff)# transform series into supervised formatdata series_to_supervised(train, n_inn_input)# separate inputs and outputstrain_x, train_y data[:, :-1], data[:, -1]# reshape input data into [samples, timesteps, features]n_features 1train_x train_x.reshape((train_x.shape[0], train_x.shape[1], n_features))# define modelmodel Sequential()model.add(Conv1D(filtersn_filters, kernel_sizen_kernel, activationrelu, input_shape(n_input, n_features)))model.add(MaxPooling1D(pool_size2))model.add(Flatten())model.add(Dense(1))model.compile(lossmse, optimizeradam)# fitmodel.fit(train_x, train_y, epochsn_epochs, batch_sizen_batch, verbose0)return model# forecast with the fit model
def model_predict(model, history, config):# unpack confign_input, _, _, _, _, n_diff config# prepare datacorrection 0.0if n_diff 0:correction history[-n_diff]history difference(history, n_diff)x_input array(history[-n_input:]).reshape((1, n_input, 1))# forecastyhat model.predict(x_input, verbose0)return correction yhat[0]# walk-forward validation for univariate data
def walk_forward_validation(data, n_test, cfg):predictions list()# split datasettrain, test train_test_split(data, n_test)# fit modelmodel model_fit(train, cfg)# seed history with training datasethistory [x for x in train]# step over each time-step in the test setfor i in range(len(test)):# fit model and make forecast for historyyhat model_predict(model, history, cfg)# store forecast in list of predictionspredictions.append(yhat)# add actual observation to history for the next loophistory.append(test[i])# estimate prediction errorerror measure_rmse(test, predictions)print( %.3f % error)return error# score a model, return None on failure
def repeat_evaluate(data, config, n_test, n_repeats10):# convert config to a keykey str(config)# fit and evaluate the model n timesscores [walk_forward_validation(data, n_test, config) for _ in range(n_repeats)]# summarize scoreresult mean(scores)print( Model[%s] %.3f % (key, result))return (key, result)# grid search configs
def grid_search(data, cfg_list, n_test):# evaluate configsscores [repeat_evaluate(data, cfg, n_test) for cfg in cfg_list]# sort configs by error, ascscores.sort(keylambda tup: tup[1])return scores# create a list of configs to try
def model_configs():# define scope of configsn_input [12]n_filters [64]n_kernels [3, 5]n_epochs [100]n_batch [1, 150]n_diff [0, 12]# create configsconfigs list()for a in n_input:for b in n_filters:for c in n_kernels:for d in n_epochs:for e in n_batch:for f in n_diff:cfg [a, b, c, d, e, f]configs.append(cfg)print(Total configs: %d % len(configs))return configs# define dataset
# 下载数据集https://raw.githubusercontent.com/jbrownlee/Datasets/master/airline-passengers.csv
series read_csv(airline-passengers.csv, header0, index_col0)
data series.values
# data split
n_test 12
# model configs
cfg_list model_configs()
# grid search
scores grid_search(data, cfg_list, n_test)
print(done)
# list top 10 configs
for cfg, error in scores[:3]:print(cfg, error)示例二Keras网格搜索 调整batch size和epochs
# Use scikit-learn to grid search the batch size and epochs
import numpy as np
import tensorflow as tf
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from scikeras.wrappers import KerasClassifier
# Function to create model, required for KerasClassifier
def create_model():# create modelmodel Sequential()model.add(Dense(12, input_shape(8,), activationrelu))model.add(Dense(1, activationsigmoid))# Compile modelmodel.compile(lossbinary_crossentropy, optimizeradam, metrics[accuracy])return model
# fix random seed for reproducibility
seed 7
tf.random.set_seed(seed)
# load dataset
dataset np.loadtxt(pima-indians-diabetes.csv, delimiter,)
# split into input (X) and output (Y) variables
X dataset[:,0:8]
Y dataset[:,8]
# create model
model KerasClassifier(modelcreate_model, verbose0)
# define the grid search parameters
batch_size [10, 20, 40, 60, 80, 100]
epochs [10, 50, 100]
param_grid dict(batch_sizebatch_size, epochsepochs)
grid GridSearchCV(estimatormodel, param_gridparam_grid, n_jobs-1, cv3)
grid_result grid.fit(X, Y)
# summarize results
print(Best: %f using %s % (grid_result.best_score_, grid_result.best_params_))
means grid_result.cv_results_[mean_test_score]
stds grid_result.cv_results_[std_test_score]
params grid_result.cv_results_[params]
for mean, stdev, param in zip(means, stds, params):print(%f (%f) with: %r % (mean, stdev, param))更多参考https://machinelearningmastery.com/grid-search-hyperparameters-deep-learning-models-python-keras/随机搜索
# Load the dataset
X, Y load_dataset()# Create model for KerasClassifier
def create_model(hparams1dvalue,hparams2dvalue,...hparamsndvalue):# Model definition...model KerasClassifier(build_fncreate_model) # Specify parameters and distributions to sample from
hparams1 randint(1, 100)
hparams2 [elu, relu, ...]
...
hparamsn uniform(0, 1)# Prepare the Dict for the Search
param_dist dict(hparams1hparams1, hparams2hparams2, ...hparamsnhparamsn)# Search in action!
n_iter_search 16 # Number of parameter settings that are sampled.
random_search RandomizedSearchCV(estimatormodel, param_distributionsparam_dist,n_itern_iter_search,n_jobs, cv, verbose)
random_search.fit(X, Y)# Show the results
print(Best: %f using %s % (random_search.best_score_, random_search.best_params_))
means random_search.cv_results_[mean_test_score]
stds random_search.cv_results_[std_test_score]
params random_search.cv_results_[params]
for mean, stdev, param in zip(means, stds, params):print(%f (%f) with: %r % (mean, stdev, param))贝叶斯搜索 准备数据(train_images, train_labels), (test_images, test_labels) fashion_mnist.load_data()# split into train, validation and test sets
train_x, val_x, train_y, val_y train_test_split(train_images, train_labels, stratifytrain_labels, random_state48, test_size0.05)
(test_x, test_y)(test_images, test_labels)# normalize pixels to range 0-1
train_x train_x / 255.0
val_x val_x / 255.0
test_x test_x / 255.0#one-hot encode target variable
train_y to_categorical(train_y)
val_y to_categorical(val_y)
test_y to_categorical(test_y)# pip3 install keras-tuner调整获取最优参数MLP版model Sequential()model.add(Dense(units hp.Int(dense-bot, min_value50, max_value350, step50), input_shape(784,), activationrelu))for i in range(hp.Int(num_dense_layers, 1, 2)):model.add(Dense(unitshp.Int(dense_ str(i), min_value50, max_value100, step25), activationrelu))model.add(Dropout(hp.Choice(dropout_ str(i), values[0.0, 0.1, 0.2])))model.add(Dense(10,activationsoftmax))hp_optimizerhp.Choice(Optimizer, values[Adam, SGD])if hp_optimizer Adam:hp_learning_rate hp.Choice(learning_rate, values[1e-1, 1e-2, 1e-3])
elif hp_optimizer SGD:hp_learning_rate hp.Choice(learning_rate, values[1e-1, 1e-2, 1e-3])nesterovTruemomentum0.9
model.compile(optimizer hp_optimizer, losscategorical_crossentropy, metrics[accuracy])tuner_mlp kt.tuners.BayesianOptimization(model,seedrandom_seed,objectiveval_loss,max_trials30,directory.,project_nametuning-mlp)
tuner_mlp.search(train_x, train_y, epochs50, batch_size32, validation_data(dev_x, dev_y), callbackscallback)
best_mlp_hyperparameters tuner_mlp.get_best_hyperparameters(1)[0]
print(Best Hyper-parameters)
# best_mlp_hyperparameters.values使用最优参数来训练模型model_mlp Sequential()model_mlp.add(Dense(best_mlp_hyperparameters[dense-bot], input_shape(784,), activationrelu))for i in range(best_mlp_hyperparameters[num_dense_layers]):model_mlp.add(Dense(unitsbest_mlp_hyperparameters[dense_ str(i)], activationrelu))model_mlp.add(Dropout(ratebest_mlp_hyperparameters[dropout_ str(i)]))model_mlp.add(Dense(10,activationsoftmax))model_mlp.compile(optimizerbest_mlp_hyperparameters[Optimizer], losscategorical_crossentropy,metrics[accuracy])
history_mlp model_mlp.fit(train_x, train_y, epochs100, batch_size32, validation_data(dev_x, dev_y), callbackscallback)
# model_mlptuner_mlp.hypermodel.build(best_mlp_hyperparameters)
# history_mlpmodel_mlp.fit(train_x, train_y, epochs100, batch_size32, validation_data(dev_x, dev_y), callbackscallback)效果测试mlp_test_loss, mlp_test_acc model_mlp.evaluate(test_x, test_y, verbose2)
print(\nTest accuracy:, mlp_test_acc)
# Test accuracy: 0.8823
CNN版基线模型model_cnn Sequential()
model_cnn.add(Conv2D(32, (3, 3), activationrelu, input_shape(28, 28, 1)))
model_cnn.add(MaxPooling2D((2, 2)))
model_cnn.add(Flatten())
model_cnn.add(Dense(100, activationrelu))
model_cnn.add(Dense(10, activationsoftmax))
model_cnn.compile(optimizeradam, losscategorical_crossentropy, metrics[accuracy])贝叶斯搜索超参数model Sequential()model Sequential()
model.add(Input(shape(28, 28, 1)))for i in range(hp.Int(num_blocks, 1, 2)):hp_paddinghp.Choice(padding_ str(i), values[valid, same])hp_filtershp.Choice(filters_ str(i), values[32, 64])model.add(Conv2D(hp_filters, (3, 3), paddinghp_padding, activationrelu, kernel_initializerhe_uniform, input_shape(28, 28, 1)))model.add(MaxPooling2D((2, 2)))model.add(Dropout(hp.Choice(dropout_ str(i), values[0.0, 0.1, 0.2])))model.add(Flatten())hp_units hp.Int(units, min_value25, max_value150, step25)
model.add(Dense(hp_units, activationrelu, kernel_initializerhe_uniform))model.add(Dense(10,activationsoftmax))hp_learning_rate hp.Choice(learning_rate, values[1e-2, 1e-3])
hp_optimizerhp.Choice(Optimizer, values[Adam, SGD])if hp_optimizer Adam:hp_learning_rate hp.Choice(learning_rate, values[1e-2, 1e-3])
elif hp_optimizer SGD:hp_learning_rate hp.Choice(learning_rate, values[1e-2, 1e-3])nesterovTruemomentum0.9
model.compile( optimizerhp_optimizer,losscategorical_crossentropy, metrics[accuracy])tuner_cnn kt.tuners.BayesianOptimization(model,objectiveval_loss,max_trials100,directory.,project_nametuning-cnn)采用最佳超参数训练模型model_cnn Sequential()model_cnn.add(Input(shape(28, 28, 1)))for i in range(best_cnn_hyperparameters[num_blocks]):hp_paddingbest_cnn_hyperparameters[padding_ str(i)]hp_filtersbest_cnn_hyperparameters[filters_ str(i)]model_cnn.add(Conv2D(hp_filters, (3, 3), paddinghp_padding, activationrelu, kernel_initializerhe_uniform, input_shape(28, 28, 1)))model_cnn.add(MaxPooling2D((2, 2)))model_cnn.add(Dropout(best_cnn_hyperparameters[dropout_ str(i)]))model_cnn.add(Flatten())
model_cnn.add(Dense(best_cnn_hyperparameters[units], activationrelu, kernel_initializerhe_uniform))model_cnn.add(Dense(10,activationsoftmax))model_cnn.compile(optimizerbest_cnn_hyperparameters[Optimizer], losscategorical_crossentropy, metrics[accuracy])
print(model_cnn.summary())history_cnn model_cnn.fit(train_x, train_y, epochs50, batch_size32, validation_data(dev_x, dev_y), callbackscallback)
cnn_test_loss, cnn_test_acc model_cnn.evaluate(test_x, test_y, verbose2)
print(\nTest accuracy:, cnn_test_acc)# Test accuracy: 0.92超参数调优框架
Optuna-深度学习-超参数优化nvidia nemo-大模型训练优化自动超参数搜索分析https://github.com/NVIDIA/NeMo-Framework-Launcher
Optuna深度学习超参数优化框架
import os
import optuna
import plotly
from optuna.trial import TrialState
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
from torchvision import datasets
from torchvision import transforms
from optuna.visualization import plot_optimization_history
from optuna.visualization import plot_param_importances
from optuna.visualization import plot_slice
from optuna.visualization import plot_intermediate_values
from optuna.visualization import plot_parallel_coordinate# 下述代码指定了SGDClassifier分类器的参数alpha、max_iter 的搜索空间、损失函数loss的搜索空间。
def objective(trial):iris sklearn.datasets.load_iris()classes list(set(iris.target))train_x, valid_x, train_y, valid_y sklearn.model_selection.train_test_split(iris.data, iris.target, test_size0.25, random_state0)#指定参数搜索空间alpha trial.suggest_loguniform(alpha, 1e-5, 1e-1)max_iter trial.suggest_int(max_iter,64,192,step64)loss trial.suggest_categorical(loss,[hinge,log,perceptron])clf sklearn.linear_model.SGDClassifier(alphaalpha,max_itermax_iter)# 下述代码指定了学习率learning_rate、优化器optimizer、神经元个数n_uint 的搜索空间。
def objective(trial):params {learning_rate: trial.suggest_loguniform(learning_rate, 1e-5, 1e-1),optimizer: trial.suggest_categorical(optimizer, [Adam, RMSprop, SGD]),n_unit: trial.suggest_int(n_unit, 4, 18)}model build_model(params)accuracy train_and_evaluate(params, model)return accuracy# 记录超参数训练过程
def objective(trial):iris sklearn.datasets.load_iris()classes list(set(iris.target))train_x, valid_x, train_y, valid_y sklearn.model_selection.train_test_split(iris.data, iris.target, test_size0.25, random_state0)alpha trial.suggest_loguniform(alpha, 1e-5, 1e-1)max_iter trial.suggest_int(max_iter,64,192,step64)loss trial.suggest_categorical(loss,[hinge,log,perceptron])clf sklearn.linear_model.SGDClassifier(alphaalpha,max_itermax_iter)for step in range(100):clf.partial_fit(train_x, train_y, classesclasses)intermediate_value 1.0 - clf.score(valid_x, valid_y)trial.report(intermediate_value, step)if trial.should_prune():raise optuna.TrialPruned()return 1.0 - clf.score(valid_x, valid_y)# 创建优化过程
def objective(trial):iris sklearn.datasets.load_iris()classes list(set(iris.target))train_x, valid_x, train_y, valid_y sklearn.model_selection.train_test_split(iris.data, iris.target, test_size0.25, random_state0)alpha trial.suggest_loguniform(alpha, 1e-5, 1e-1)max_iter trial.suggest_int(max_iter,64,192,step64)loss trial.suggest_categorical(loss,[hinge,log,perceptron])clf sklearn.linear_model.SGDClassifier(alphaalpha,max_itermax_iter)for step in range(100):clf.partial_fit(train_x, train_y, classesclasses)intermediate_value 1.0 - clf.score(valid_x, valid_y)trial.report(intermediate_value, step)if trial.should_prune():raise optuna.TrialPruned()return 1.0 - clf.score(valid_x, valid_y)study optuna.create_study(storagepath,study_namefirst,pruneroptuna.pruners.MedianPruner())
#study optuna.study.load_study(first,path)
study.optimize(objective, n_trials20)
print(Study statistics: )
print( Number of finished trials: , len(study.trials))
print( Number of pruned trials: , len(pruned_trials))
print( Number of complete trials: , len(complete_trials))
print(Best trial:)
trial study.best_trial
print( Value: , trial.value)
print( Params: )
for key, value in trial.params.items():print({}:{}.format(key, value))# 可视化搜索结果
optuna.visualization.plot_contour(study)#若不行请尝试
vis_path rresult-vis/
graph_cout optuna.visualization.plot_contour(study,params[n_layers,lr])
plotly.offline.plot(graph_cout,filenamevis_pathgraph_cout.html)plot_optimization_history(study)#若不行请尝试
vis_path rresult-vis/
history plot_optimization_history(study)
plotly.offline.plot(history,filenamevis_pathhistory.html)plot_intermediate_values(study)#若不行请尝试
vis_path rresult-vis/
intermed plot_intermediate_values(study)
plotly.offline.plot(intermed,filenamevis_pathintermed.html)plot_slice(study, params[alpha,max_iter,loss])#若不行请尝试
vis_path rresult-vis/
slices plot_slice(study)
plotly.offline.plot(slices,filenamevis_pathslices.html)plot_parallel_coordinate(study,params[alpha,max_iter,loss])#若不行请尝试
vis_path rresult-vis/
paraller plot_parallel_coordinate(study)
plotly.offline.plot(paraller,filenamevis_pathparaller.html)nvidia nemo大模型超参数优化框架
用户手册nvidia nemo用户手册