-- GetUserTimeline Function already built as per Twitter Adapter setup
CREATE COLUMN TABLE NEWS_REAL AS
(select ID, SCREENNAME, TWEET, CREATEDAT from GetUserTimeline('BBCNews',3500,null,null)
where RETWEET = 0);
# Python 3 Code
df = pd.read_csv('../data/Fake.News/Fake/fake.csv',
usecols =['uuid','author','title','text','language','site_url'])
# Filter out the records with UTF-8 issues
df = df[df.uuid !='97b1c7d89d0c2856afb7d729ad79038ea88b6943']
df = df[df.uuid !='e8077b77ccacf1493c94c9f85d9d47d949b340cb']
df = df[df.uuid !='effbcf6de55f9d987e4d8c5619843aff0219333d']
df = df[df.uuid !='fce977b83e53b59c297a8ef5b3523186bd6837fd']
df = df[df.uuid !='42ac7f1707d16e3c95317025e0cf5d5cd009e373']
df['title'].fillna(value="", inplace=True)
df.dropna(axis=0, inplace=True, subset=['text'])
# Trim Column Lengths
df.title = df.title.str.slice(0, 253)
df.text = df.text.str.slice(0, 4999)
df.author = df.text.str.slice(0,127)
print(df.shape)
df.head()
from sqlalchemy import create_engine
hanaeng = create_engine('hana+pyhdb://USER:PASS@myHANAServer.com:30015')
df.to_sql('NEWS_FAKE', con=hanaeng, index=False,
dtype={'uuid': sqlalchemy.types.NVARCHAR(length=100),
'author': sqlalchemy.types.NVARCHAR(length=128),
'title': sqlalchemy.types.NVARCHAR(length=256),
'text': sqlalchemy.types.NVARCHAR(length=5000),
'language': sqlalchemy.types.NVARCHAR(length=24),
'site_url': sqlalchemy.types.NVARCHAR(length=255)})
--Create Combined Temp Table
CREATE COLUMN TABLE TITLE_TEMP AS
(SELECT "UUID", CASE WHEN TITLE ='' THEN LEFT("TEXT",140) ELSE LEFT(TITLE,140) END as TITLE, 1 as FAKE
FROM "FAKENEWS"."NEWS_FAKE");
--Add Tweets
INSERT INTO TITLE_TEMP
SELECT ID, LEFT("TWEET",140), 0 FROM "FAKENEWS"."NEWS_REAL";
--Randomise Order of Texts and generate ID
CREATE COLUMN TABLE TITLE_140 AS (SELECT *,ROW_NUMBER() OVER () as ID from (SELECT RAND() as RND1, * FROM TITLE_TEMP ORDER BY RND1));
-- Text Analaysis requires a primary Key
ALTER TABLE TITLE_140 ADD CONSTRAINT PRIM_KEY_ID PRIMARY KEY (ID);
CREATE FULLTEXT INDEX "TITLE_140_IDX" on
"FAKENEWS"."TITLE_140"("TITLE")
LANGUAGE DETECTION ('EN')
ASYNC PHRASE INDEX RATIO 0.0
CONFIGURATION 'LINGANALYSIS_BASIC'
SEARCH ONLY OFF
FAST PREPROCESS OFF
TEXT MINING ON
TEXT ANALYSIS ON;
TOKEN SEPARATORS '\/;,.:-_()[]<>!?*@+{}="&#$~|';
CREATE VIEW "FAKENEWS"."V_TOKEN_ID" ( "TOKEN_ID", "TA_TOKEN" ) AS
SELECT
ROW_NUMBER() OVER() AS TOKEN_ID,
TA_TOKEN
FROM ( SELECT, TA_TOKEN, COUNT(*)
FROM "FAKENEWS"."$TA_TITLE_140_IDX"
GROUP BY TA_TOKEN
ORDER BY COUNT(*) DESC )
# Connect to HANA and retrieve data
connection = pyhdb.connect(
host="10.0.21.182",
port=30015,
user="MY_TOP_SECRET_USERNAME",
password="MY_TOP_SECRET_PASSWORD"
)
cursor = connection.cursor()
cursor.execute("SELECT ID,D.TA_COUNTER, V.TA_TOKEN,V.TOKEN_ID FROM TBL_TOKEN_ID V INNER JOIN TBL_TA_TOKENS D ON V.TA_TOKEN = D.TA_TOKEN ORDER BY ID,D.TA_COUNTER")
# Show First 5 Reords for Testing
#cursor.fetchmany(5)
model = Sequential([
Embedding(5000, 32, input_length=40),
SpatialDropout1D(0.2),
Dropout(0.25),
Convolution1D(64, 5, padding='same', activation='relu'),
Dropout(0.25),
MaxPooling1D(),
Flatten(),
Dense(100, activation='relu'),
Dropout(0.7),
Dense(1, activation='sigmoid',name='prediction')])
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
embedding_2 (Embedding) (None, 40, 32) 160000
_________________________________________________________________
spatial_dropout1d_2 (Spatial (None, 40, 32) 0
_________________________________________________________________
dropout_4 (Dropout) (None, 40, 32) 0
_________________________________________________________________
conv1d_2 (Conv1D) (None, 40, 64) 10304
_________________________________________________________________
dropout_5 (Dropout) (None, 40, 64) 0
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, 20, 64) 0
_________________________________________________________________
flatten_2 (Flatten) (None, 1280) 0
_________________________________________________________________
dense_2 (Dense) (None, 100) 128100
_________________________________________________________________
dropout_6 (Dropout) (None, 100) 0
_________________________________________________________________
dense_3 (Dense) (None, 1) 101
=================================================================
Total params: 298,505
Trainable params: 298,505
Non-trainable params: 0
__________________________
Train on 31498 samples, validate on 7874 samples
Epoch 1/14
31498/31498 [==============================] - 2s 60us/step - loss: 0.3160 - acc: 0.8539 - val_loss: 0.2802 - val_acc: 0.9271
Epoch 2/14
31498/31498 [==============================] - 1s 39us/step - loss: 0.0790 - acc: 0.9726 - val_loss: 0.3442 - val_acc: 0.9053
Epoch 3/14
31498/31498 [==============================] - 1s 39us/step - loss: 0.0457 - acc: 0.9846 - val_loss: 0.3334 - val_acc: 0.9347
model.save('FakeNews-v2.h5')
def preprocess (txt_input):
sparse_tokenized_input = tf.string_split(txt_input,delimiter=' ')
tokenized_input = tf.sparse_tensor_to_dense(sparse_tokenized_input, default_value='0')
token_idxs = tf.string_to_number(tokenized_input, out_type=tf.float32)
inputlength = tf.size(token_idxs)
# Max Number of Words in Sentance 40
padding = 40 - inputlength
token_idxs_padded = tf.pad(token_idxs, [[0,0],[padding,0]])
token_idxs_embedding = tf.slice(token_idxs_padded, [0,0], [1,40])
return token_idxs_embedding;
from tensorflow.python.saved_model import builder as saved_model_builder
from tensorflow.python.saved_model import utils
from tensorflow.python.saved_model import tag_constants, signature_constants
from tensorflow.python.saved_model.signature_def_utils_impl import build_signature_def, predict_signature_def
from tensorflow.contrib.session_bundle import exporter
export_path = 'FakeNews-Serving/17'
builder = saved_model_builder.SavedModelBuilder(export_path)
signature = predict_signature_def(inputs={'text': txt_input},
outputs={'labels': model.output})
with K.get_session() as sess:
builder.add_meta_graph_and_variables(sess=sess,
tags=[tag_constants.SERVING],
signature_def_map={'predict': signature})
builder.save()
AssertionError: Export directory already exists. Please specify a different
export directory: FakeNews-Serving/16
ubuntu@ip-10-0-31-145:~$ saved_model_cli show --dir FakeNews-Serving/17 --signature_def 'serving_default' --all
/usr/lib/python3.5/importlib/_bootstrap.py:222: RuntimeWarning: compiletime version 3.6 of module 'tensorflow.python.framework.fast_tensor_util' does not match runtime version 3.5
return f(*args, **kwds)
MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:
signature_def['predict']:
The given SavedModel SignatureDef contains the following input(s):
inputs['text'] tensor_info:
dtype: DT_STRING
shape: unknown_rank
name: txt_input:0
The given SavedModel SignatureDef contains the following output(s):
outputs['labels'] tensor_info:
dtype: DT_FLOAT
shape: (-1, 1)
name: prediction_1/Sigmoid:0
Method name is: tensorflow/serving/predict
ubuntu@ip-10-0-31-145:~$
ubuntu@ip-10-0-31-145:tensorflow_model_server --port=9000 --model_name=fakenews --model_base_path=/home/ubuntu/FakeNews-Serving
2017-12-19 16:05:18.560516: I external/org_tensorflow/tensorflow/contrib/session_bundle/bundle_shim.cc:360] Attempting to load native SavedModelBundle in bundle-shim from: /home/ubuntu/FakeNews-Serving/17
2017-12-19 16:05:18.560541: I external/org_tensorflow/tensorflow/cc/saved_model/loader.cc:236] Loading SavedModel from: /home/ubuntu/FakeNews-Serving/17
2017-12-19 16:05:18.598911: I external/org_tensorflow/tensorflow/cc/saved_model/loader.cc:155] Restoring SavedModel bundle.
2017-12-19 16:05:18.623654: I external/org_tensorflow/tensorflow/cc/saved_model/loader.cc:190] Running LegacyInitOp on SavedModel bundle.
2017-12-19 16:05:18.625504: I external/org_tensorflow/tensorflow/cc/saved_model/loader.cc:284] Loading SavedModel: success. Took 64901 microseconds.
2017-12-19 16:05:18.626184: I tensorflow_serving/core/loader_harness.cc:86] Successfully loaded servable version {name: fakenews version: 17}
E1219 16:05:18.628689844 15683 ev_epoll1_linux.c:1051] grpc epoll fd: 3
2017-12-19 16:05:18.630423: I tensorflow_serving/model_servers/main.cc:288] Running ModelServer at 0.0.0.0:9000 ...
-- register model
INSERT INTO "_SYS_AFL"."EML_MODEL_CONFIGURATION" VALUES ('fakenews', 'RemoteSource', 'TensorFlowModelServer');
SELECT * FROM "_SYS_AFL"."EML_MODEL_CONFIGURATION";
-- create parameters table (used in subsequent calls)
CREATE TABLE "PARAMETERS" ("Parameter" VARCHAR(100), "Value" VARCHAR(100));
-- apply registered models
CALL "_SYS_AFL"."EML_CTL_PROC" ('UpdateModelConfiguration', "PARAMETERS", ?);
-- verify model is up and running on remote source
TRUNCATE TABLE "PARAMETERS";
INSERT INTO "PARAMETERS" VALUES ('Model', 'fakenews');
CALL "_SYS_AFL"."EML_CHECKDESTINATION_PROC" ("PARAMETERS", ?);
CALL "SYS"."AFLLANG_WRAPPER_PROCEDURE_DROP" ('FAKENEWS', 'CLASSIFY_NEWS');
-- create table types
CREATE TYPE "T_PARAMS" AS TABLE ("Parameter" VARCHAR(100), "Value" VARCHAR(100));
CREATE TYPE "T_DATA" AS TABLE ("text" VARCHAR(256));
CREATE TYPE "T_RESULTS" AS TABLE ("Score1" FLOAT);
-- create signature table then generate stored procedure
CREATE COLUMN TABLE "SIGNATURE" ("POSITION" INTEGER, "SCHEMA_NAME" NVARCHAR(256), "TYPE_NAME" NVARCHAR(256), "PARAMETER_TYPE" VARCHAR(7));
INSERT INTO "SIGNATURE" VALUES (1, 'FAKENEWS', 'T_PARAMS', 'IN');
INSERT INTO "SIGNATURE" VALUES (2, 'FAKENEWS', 'T_DATA', 'IN');
INSERT INTO "SIGNATURE" VALUES (3, 'FAKENEWS', 'T_RESULTS', 'OUT');
CALL "SYS"."AFLLANG_WRAPPER_PROCEDURE_CREATE" ('EML', 'PREDICT', 'FAKENEWS', 'CLASSIFY_NEWS', "SIGNATURE");
-- create tables
CREATE TABLE "PARAMS" LIKE "T_PARAMS";
CREATE TABLE "RESULTS" LIKE "T_RESULTS";
-- run time
-- data to be scored
CREATE VIEW "NEWS_SENTANCE" AS (SELECT '445 4999 400 41 3016 334 1806 1160 4999' as "text" from dummy);
CREATE VIEW "NEWS_SENTANCE_FAKE" AS (SELECT '161 45 145 30 4999 107 1112 947 25' as "text" from dummy);
-- params
TRUNCATE TABLE "PARAMS";
INSERT INTO "PARAMS" VALUES ('Model', 'fakenews');
--INSERT INTO "PARAMS" VALUES ('Model', 'saved_model%predict'); -- mandatory: model name (optional: signature name)
--INSERT INTO "PARAMS" VALUES ('Deadline', '1000'); -- optional: max milliseconds to wait
-- scoring : results inline
CALL "CLASSIFY_NEWS" ("PARAMS", "NEWS_SENTANCE", ?);
print(trn_data[31504])
[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 445 4999 400 41 3016 334 1806 1160 4999]
model.predict(trn_data[31504:31506])
array([[ 0.03409993],
[ 0.99994373]], dtype=float32)
-- scoring : results inline
CALL "CLASSIFY_NEWS" ("PARAMS", "NEWS_SENTANCE", ?);
Could not execute 'CALL "CLASSIFY_NEWS" ("PARAMS", "NEWS_SENTANCE", ?)' in 137 ms 909 µs .
SAP DBTech JDBC: [423]: AFL error: search table error: _SYS_AFL.EML:PREDICT: [423] (range 3) AFL error exception: RPC "Prediction/Metamodel" failed (5)
TRUNCATE TABLE "PARAMS";
INSERT INTO "PARAMS" VALUES ('Model', 'fakenews%predict');
nohup tensorflow_model_server --port=9000 --model_name=fakenews --model_base_path=/home/ubuntu/FakeNews
[1256]{200896}[7/-1] 2017-12-19 14:52:31.262719 e LJIT cePopCustomLjit.cpp(00639) : _SYS_AFL.EML:PREDICT: [423] (range 3) AFL error exception: RPC "Prediction/Predict" failed (3)
[8590]{200896}[3/-1] 2017-12-19 14:54:31.781945 e AFL_EML impl.cpp(00272) : PredictClient::predict failed (3): indices[0,33] = 9999 is not in [0, 5000)%0A%09 [[Node: embedding_1_1/Gather = Gather[Tindices=DT_INT32, Tparams=DT_FLOAT, _output_shapes=[[1,40,32]], validate_indices=true, _device="/job:localhost/replica:0/task:0/device:CPU:0"](embedding_1_1/embeddings/read, embedding_1_1/Cast)]]
[8590]{200896}[3/-1] 2017-12-19 14:54:31.782200 e LJIT cePopCustomLjit.cpp(00620) : Llang Runtime Error: Exception::SQLException423: RPC "Prediction/Predict" failed (3)
You must be a registered user to add a comment. If you've already registered, sign in. Otherwise, register and sign in.
User | Count |
---|---|
30 | |
17 | |
9 | |
9 | |
8 | |
7 | |
7 | |
6 | |
6 | |
5 |