import pandas as pd
from sklearn.datasets import fetch_20newsgroups
newsgroups_train = fetch_20newsgroups(subset='train')
newsgroups_test = fetch_20newsgroups(subset='test')
train = pd.DataFrame()
train['article'] = newsgroups_train.data
train['category'] = newsgroups_train.target
test = pd.DataFrame()
test['article'] = newsgroups_test.data
test['category'] = newsgroups_test.target
import numpy as np
from keras.models import Model
from keras.layers import Dense, Embedding, Input, LSTM, Bidirectional, GlobalMaxPool1D, Dropout
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.preprocessing import text, sequence
max_features = 10000
maxlen = 100
embed_size = 512
batch_size = 64
epochs = 100
train_sentences = train['article'].values
test_sentences = test['article'].values
y = train['category'].values
tokenizer = text.Tokenizer(num_words=max_features)
tokenizer.fit_on_texts(list(train_sentences))
train_tokenized = tokenizer.texts_to_sequences(train_sentences)
test_tokenized = tokenizer.texts_to_sequences(test_sentences)
train = sequence.pad_sequences(train_tokenized, maxlen=maxlen)
test = sequence.pad_sequences(test_tokenized, maxlen=maxlen)
def get_model():
inp = Input(shape=(maxlen, ))
x = Embedding(max_features, embed_size)(inp)
x = Bidirectional(LSTM(128, return_sequences=True))(x)
x = GlobalMaxPool1D()(x)
x = Dropout(0.2)(x)
x = Dense(64, activation='relu')(x)
x = Dropout(0.3)(x)
x = Dense(20, activation='softmax')(x)
model = Model(inputs=inp, outputs=x)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
file_path = 'model.hdf5'
checkpoint = ModelCheckpoint(file_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
early = EarlyStopping(monitor='val_loss', mode='min', patience=10)
callbacks = [checkpoint, early]
model = get_model()
model.fit(train, y, batch_size=batch_size, epochs=epochs, validation_split=0.15, callbacks=callbacks)
model.load_weights(file_path)
y_test = model.predict(test)
job:
name: "newsgroups"
execution:
image: "tensorflow/tensorflow:1.5.0-gpu"
command: "pip install keras --upgrade && python training.py"
completionTime: "10"
resources:
cpus: 1
memory: 10000
gpus: 1
cf sapml job submit -f newsgroup.yaml code
You must be a registered user to add a comment. If you've already registered, sign in. Otherwise, register and sign in.
User | Count |
---|---|
4 | |
1 | |
1 | |
1 | |
1 | |
1 |