Merge MLP And CNN in Keras

In the post (https://statcompute.wordpress.com/2017/01/08/an-example-of-merge-layer-in-keras), it was shown how to build a merge-layer DNN by using the Keras Sequential model. In the example below, I tried to scratch a merge-layer DNN with the Keras functional API in both R and Python. In particular, the merge-layer DNN is the average of a multilayer perceptron network and a 1D convolutional network, just for fun and curiosity. Since the purpose of this exercise is to explore the network structure and the use case of Keras API, I didn’t bother to mess around with parameters.


library(keras)
df <- read.csv("credit_count.txt")
Y <- matrix(df[df$CARDHLDR == 1, ]$DEFAULT)
X <- scale(df[df$CARDHLDR == 1, ][3:14])
inputs <- layer_input(shape = c(ncol(X)))
mlp <- inputs %>%
layer_dense(units = 64, activation = 'relu', kernel_initializer = 'he_uniform') %>%
layer_dropout(rate = 0.2, seed = 1) %>%
layer_dense(units = 64, activation = 'relu', kernel_initializer = 'he_uniform') %>%
layer_dropout(rate = 0.2, seed = 1) %>%
layer_dense(1, activation = 'sigmoid')
cnv <- inputs %>%
layer_reshape(c(ncol(X), 1)) %>%
layer_conv_1d(32, 4, activation = 'relu', padding = "same", kernel_initializer = 'he_uniform') %>%
layer_max_pooling_1d(2) %>%
layer_spatial_dropout_1d(0.2) %>%
layer_flatten() %>%
layer_dense(1, activation = 'sigmoid')
avg <- layer_average(c(mlp, cnv))
mdl <- keras_model(inputs = inputs, outputs = avg)
mdl %>% compile(optimizer = optimizer_sgd(lr = 0.1, momentum = 0.9), loss = 'binary_crossentropy', metrics = c('binary_accuracy'))
mdl %>% fit(x = X, y = Y, epochs = 50, batch_size = 1000, verbose = 0)
mdl %>% predict(x = X)

view raw

keras_average.R

hosted with ❤ by GitHub


from numpy.random import seed
from pandas import read_csv, DataFrame
from sklearn.preprocessing import scale
from keras.layers.convolutional import Conv1D, MaxPooling1D
from keras.layers.merge import average
from keras.layers import Input, Dense, Flatten, Reshape, Dropout, SpatialDropout1D
from keras.models import Model
from keras.optimizers import SGD
from keras.utils import plot_model
df = read_csv("credit_count.txt")
Y = df[df.CARDHLDR == 1].DEFAULT
X = scale(df[df.CARDHLDR == 1].iloc[:, 2:12])
D = 0.2
S = 1
seed(S)
### INPUT DATA
inputs = Input(shape = (X.shape[1],))
### DEFINE A MULTILAYER PERCEPTRON NETWORK
mlp_net = Dense(64, activation = 'relu', kernel_initializer = 'he_uniform')(inputs)
mlp_net = Dropout(rate = D, seed = S)(mlp_net)
mlp_net = Dense(64, activation = 'relu', kernel_initializer = 'he_uniform')(mlp_net)
mlp_net = Dropout(rate = D, seed = S)(mlp_net)
mlp_out = Dense(1, activation = 'sigmoid')(mlp_net)
mlp_mdl = Model(inputs = inputs, outputs = mlp_out)
### DEFINE A CONVOLUTIONAL NETWORK
cnv_net = Reshape((X.shape[1], 1))(inputs)
cnv_net = Conv1D(32, 4, activation = 'relu', padding = "same", kernel_initializer = 'he_uniform')(cnv_net)
cnv_net = MaxPooling1D(2)(cnv_net)
cnv_net = SpatialDropout1D(D)(cnv_net)
cnv_net = Flatten()(cnv_net)
cnv_out = Dense(1, activation = 'sigmoid')(cnv_net)
cnv_mdl = Model(inputs = inputs, outputs = cnv_out)
### COMBINE MLP AND CNV
con_out = average([mlp_out, cnv_out])
con_mdl = Model(inputs = inputs, outputs = con_out)
sgd = SGD(lr = 0.1, momentum = 0.9)
con_mdl.compile(optimizer = sgd, loss = 'binary_crossentropy', metrics = ['binary_accuracy'])
con_mdl.fit(X, Y, batch_size = 2000, epochs = 50, verbose = 0)
plot_model(con_mdl, to_file = 'model.png', show_shapes = True, show_layer_names = True)

model