
!pip install scikit-learn==0.22.2
!pip install seaborn==0.10.0
import os
import re
import csv
import random
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
import joblib
from sklearn.model_selection import train_test_split
import pandas as pd
import sapdi
import shutil
import matplotlib.pyplot as plt
import seaborn as sns
import requests
import json
from sapdi import tracking
%matplotlib inline
ws = sapdi.get_workspace(name='suresh-ws')
dc = ws.get_datacollection(name='gender-collection')
with dc.open('gender.csv').get_reader() as reader:
df = pd.read_csv(reader)
is_male = df['male(1-male 0-female)']==1
is_female = df['male(1-male 0-female)']==0
male = df[is_male]
female = df[is_female]
female = female.head(-13)
print(male.sample(3))
print(male.shape)
print(female.sample(3))
print(female.shape)
male_weight = male[['weight(kg)']]
male_height = male[['height(cm)']]
print("{} {}".format(male_weight.shape, male_height.shape))
female_weight = female[['weight(kg)']]
female_height = female[['height(cm)']]
print("{} {}".format(female_weight.shape, female_height.shape))
plt.figure(figsize=(18, 6))
x_range = [range(0, 247)]
plt.scatter(x_range, male_weight, color='r', alpha=0.5, s=125)
plt.scatter(x_range, female_weight, color='g', alpha=0.5, s=125)
plt.xlabel('Range')
plt.ylabel('Weight')
plt.show()
plt.figure(figsize=(18, 6))
x_range = [range(0, 247)]
plt.scatter(x_range, male_height, color='r', alpha=0.5, s=125)
plt.scatter(x_range, female_height, color='g', alpha=0.5, s=125)
plt.xlabel('Range')
plt.ylabel('Height')
plt.show()
y = df.pop('male(1-male 0-female)')
print(y.sample(5))
X = df
print(X.sample(5))
sns.heatmap(X.corr(), annot=True)
plt.show()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)
model = SVC()
model = model.fit(X_train, y_train)
train_accuracy = model.score(X_train, y_train) * 100
test_accuracy = model.score(X_test, y_test) * 100
print('Accuracy of Training Set: {:.2f}'.format(train_accuracy))
print('Accuracy of Test Set: {:.2f}'.format(test_accuracy))
y_pred = model.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
metrics = {
"training_accuracy": train_accuracy,
"test_accuracy": test_accuracy
}
run = tracking.start_run(run_collection_name="gender")
tracking.log_metrics(metrics)
tracking.set_tags({"algo": "SVC"})
tracking.end_run()
model = DecisionTreeClassifier()
model = model.fit(X_train, y_train)
train_accuracy = model.score(X_train, y_train) * 100
test_accuracy = model.score(X_test, y_test) * 100
print('Accuracy of Training Set: {:.2f}'.format(train_accuracy))
print('Accuracy of Test Set: {:.2f}'.format(test_accuracy))
y_pred = model.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
metrics = {
"training_accuracy": train_accuracy,
"test_accuracy": test_accuracy
}
run = tracking.start_run(run_collection_name="gender")
tracking.log_metrics(metrics)
tracking.set_tags({"algo": "DecisionTreeClassifier"})
tracking.end_run()
model = RandomForestClassifier()
model = model.fit(X_train, y_train)
train_accuracy = model.score(X_train, y_train) * 100
test_accuracy = model.score(X_test, y_test) * 100
print('Accuracy of Training Set: {:.2f}'.format(train_accuracy))
print('Accuracy of Test Set: {:.2f}'.format(test_accuracy))
y_pred = model.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
metrics = {
"training_accuracy": train_accuracy,
"test_accuracy": test_accuracy
}
run = tracking.start_run(run_collection_name="gender")
tracking.log_metrics(metrics)
tracking.set_tags({"algo": "RandomForestClassifier"})
tracking.end_run()
model = AdaBoostClassifier()
model = model.fit(X_train, y_train)
train_accuracy = model.score(X_train, y_train) * 100
test_accuracy = model.score(X_test, y_test) * 100
print('Accuracy of Training Set: {:.2f}'.format(train_accuracy))
print('Accuracy of Test Set: {:.2f}'.format(test_accuracy))
y_pred = model.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
metrics = {
"training_accuracy": train_accuracy,
"test_accuracy": test_accuracy
}
run = tracking.start_run(run_collection_name="gender")
tracking.log_metrics(metrics)
tracking.set_tags({"algo": "AdaBoostClassifier"})
tracking.end_run()
sc = sapdi.get_current_scenario()
run_data = tracking.get_runs(scenario = sc,notebook = sapdi.scenario.Notebook.get(notebook_id="gender.ipynb"))
lst = list()
for r in run_data:
lst_data = list()
lst_data.append(r.tags.get("algo"))
for m in r.metrics:
lst_data.append(m.get("value"))
lst.append(lst_data)
mdf = pd.DataFrame(lst, columns =['algo', 'train_accuracy', 'test_accuracy'])
mdf
curr_dir = os.getcwd()
exporter_content = os.path.join(curr_dir, "exporter_content")
exported_content = os.path.join(curr_dir, "exported_content")
zip_file_path = os.path.join(curr_dir, "exported_content/gender_1.zip")
unzip_folder_path = os.path.join(curr_dir, "exported_unzip_content")
print(exporter_content)
print(exported_content)
print(zip_file_path)
print(unzip_folder_path)
if os.path.exists(exporter_content) and os.path.isdir(exporter_content):
shutil.rmtree(exporter_content)
os.makedirs(exporter_content)
joblib.dump(model, 'exporter_content/gender.pkl')
%%writefile exporter_content/pip_dependencies.txt
scikit-learn==0.22.2
joblib==0.14.1
%%writefile predictor.py
from sapdi.serving.pymodel.predictor import AbstractPyModelPredictor
import joblib
import json
class GenderPredictor(AbstractPyModelPredictor):
def initialize(self, asset_files_path):
self.classifier = joblib.load(asset_files_path+ '/gender.pkl')
def predict(self, input_dict):
age = input_dict.get("age")
weight = input_dict.get("weight")
height = input_dict.get("height")
real_value = list([[float(age), float(weight), int(height)]])
predicted = self.classifier.predict(real_value)
res = int(predicted[0])
return {'result': {'gender': res}}
from predictor import GenderPredictor
predictor = GenderPredictor()
predictor.initialize('exporter_content/')
payload = {"age": 37, "weight": 75, "height": 167}
predictor.predict(payload)
if os.path.exists(exported_content) and os.path.isdir(exported_content):
shutil.rmtree(exported_content)
os.makedirs(exported_content)
from sapdi.serving.pymodel.exporter import PyExporter
from predictor import GenderPredictor
exporter = PyExporter()
exporter.save_model(
name = "gender",
model_dir_path = exported_content,
func=GenderPredictor(),
source_path_list=[os.path.join(curr_dir,"predictor.py")],
asset_path_list=[os.path.join(curr_dir, "exporter_content/gender.pkl")],
pip_dependency_file_path=os.path.join(curr_dir, "exporter_content/pip_dependencies.txt"))
if os.path.exists(unzip_folder_path) and os.path.isdir(unzip_folder_path):
shutil.rmtree(unzip_folder_path)
os.makedirs(unzip_folder_path)
shutil.unpack_archive(zip_file_path, extract_dir=unzip_folder_path)
from sapdi.artifact.artifact import Artifact, ArtifactKind, ArtifactFileType
artifact = sapdi.create_artifact(
file_type=ArtifactFileType.FILE,
artifact_kind=ArtifactKind.MODEL,
description="Gender Model",
artifact_name="gender",
file_name=os.path.basename(zip_file_path),
upload_content=zip_file_path
)
print('Model artifact id {}, file {} registered successfully at {} \n'.format(artifact.artifact_id, zip_file_path,artifact.get_uri()))
import base64
credential = "dummytenant\\dummyuser:dummypassword"
print(str(base64.b64encode(credential.encode("utf-8")), "utf-8"))
payload = {"age": 37, "weight": 75, "height": 167}
url = "<REST API URL>"
headers = {
'Content-Type': 'application/json',
'X-Requested-With': 'Fetch',
'Authorization': 'Basic <XXXXX>'
}
response = requests.request("POST", url, headers=headers, data = json.dumps(payload))
interpret = response.json().get("result").get("gender")
print("MALE" if interpret==1 else "FEMALE")
You must be a registered user to add a comment. If you've already registered, sign in. Otherwise, register and sign in.
User | Count |
---|---|
12 | |
12 | |
11 | |
11 | |
11 | |
9 | |
8 | |
7 | |
7 | |
7 |