import sys
sys.path.append(r"C:\Program Files\SAP Predictive Analytics\Desktop\Automated\EXE\Clients\Python35")
import os
os.environ['PATH'] = r"C:\Program Files\SAP Predictive Analytics\Desktop\Automated\EXE\Clients\CPP"
AA_DIRECTORY = "C:\Program Files\SAP Predictive Analytics\Desktop\Automated"
import aalib
class DefaultContext(aalib.IKxenContext):
def __init__(self):
def userMessage(self, iSource, iMessage, iLevel):
return True
def userConfirm(self, iSource, iPrompt):
def userAskOne(iSource, iPrompt, iHidden):
def stopCallBack(iSource):
frontend = aalib.KxFrontEnd([])
factory = frontend.getFactory()
context = DefaultContext()
factory.setConfiguration("DefaultMessages", "true")
config_store = factory.createStore("Kxen.FileStore")
config_store.setContext(context, 'en', 10, False)
config_store.openStore(AA_DIRECTORY + "\EXE\Clients\CPP", "", "")
model_name = "My Classification Model"
store = factory.createStore("Kxen.FileStore")
store.openStore(folder_name, "", "")
model = store.restoreLastModelD(model_name)
my_list = ["ModelName", "BuildDate", "BuildData", "ClassName/Default"]
for i in my_list:
d_object = model.getParameter("Infos/" + i)
d_value = d_object.getNameValue().value
print("{}: {}".format(i, d_value))
d_path = "Protocols/Default/Transforms/Kxen.RobustRegression/Results"
d_param = model.getParameter(d_path)
d_values = d_param.getSubEntries([""])
target_idx = 0
target_col = d_values[target_idx][0]
partition = 'Estimation'
d_path = "Protocols/Default/Variables/%s/Statistics/%s/Categories" % (target_col, partition)
d_param = model.getParameter(d_path)
d_values = d_param.getSubEntries(["", "Weight", "Frequency"])
v_sorted = sorted(d_values, key=lambda x: float(x[2]), reverse=True)
text= "Observations within the %s partition" % (partition)
print('\x1b[1m'+ text + '\x1b[0m')
xs0 = [x[0] for x in v_sorted]
xs2 = [int(x[2]) for x in v_sorted]
xs3 = [float(x[3]) * 100 for x in v_sorted]
for x0, x2, x3 in zip(xs0, xs2, xs3):
print("{} = {} ; {:0} rows ({:0.2f}%)".format(target_col, x0, x2, x3))
d_path = ("Protocols/Default/Variables/%s/Statistics") % target_col
d_param = model.getParameter(d_path)
d_values = d_param.getSubEntries(["", "WeightTotal"])
import pandas as pd
df = pd.DataFrame(list(d_values), columns=["Partition", "Type", "Observations"])
df.drop("Type", axis=1, inplace=True)
df['Observations'] = df['Observations'].astype(int)
df['In %'] = (df['Observations'] / df['Observations'].sum()).round(4)*100
indicator = 'Ki'
d_path = "Protocols/Default/Variables/rr_%s/Statistics/Validation/Targets/%s/%s" % (target_col, target_col, indicator)
d_object = model.getParameter(d_path)
d_value = float(d_object.getNameValue().value) * 100
print("Predictive Power (KI) is {:0.2f}%".format(d_value))
indicator = 'Kr'
d_path = "Protocols/Default/Variables/rr_%s/Statistics/Estimation/Targets/%s/%s" % (target_col, target_col, indicator)
d_object = model.getParameter(d_path)
d_value = float(d_object.getNameValue().value) * 100
print("Prediction Confidence (KR) is {:0.2f}%".format(d_value))
d_path = ("Protocols/Default/Transforms/Kxen.RobustRegression/Results/%s/MaxCoefficients" % target_col)
d_param = model.getParameter(d_path)
d_values = d_param.getSubEntries(("Contrib"))
v_sorted = sorted(d_values, key=lambda x: float(x[1]), reverse=True)
df = pd.DataFrame(v_sorted, columns=["Variable", "Contribution"])
df['Contribution'] = df['Contribution'].astype(float)
df['Cumulative'] = df['Contribution'].cumsum()
df['Contribution'] = df['Contribution'].round(4)*100
df['Cumulative'] = df['Cumulative'].round(4)*100
non_zero = df['Contribution'] != 0
dfs = df[non_zero].sort_values(by=['Contribution'], ascending=False)
import matplotlib.pyplot as plt
c_title = "Contributions to %s" % target_col
dfs = dfs.sort_values(by=['Contribution'], ascending=True)
dfs.plot(kind='barh', x='Variable', y='Contribution', title=c_title, legend=False, fontsize=12)
def plot_profit_curve(model,
target_variable = "",
target_key_variable = "",
num_points = 20,
data_set = "",
based_on_frequency = True,
use_weight = True,
use_groups = True):
proto = model.getProtocolFromName("Default")
curve = proto.getProfitCurve2(variable_name, target_variable,
target_key_variable, num_points,
curve_type, data_set, based_on_frequency,
use_weight, use_groups)
pd_curve = pd.DataFrame(list(curve[1:]), columns = curve[0])
fig = plt.figure(figsize = (14,8))
mygreen = '#00ff00'
plt.fill_between(pd_curve['Frequency'], pd_curve['Random'], pd_curve['Wizard'], color=mygreen, alpha = 0.1)
plt.fill_between(pd_curve['Frequency'], pd_curve['Random'], pd_curve['Validation'], color="blue", alpha = 0.1)
pl_rnd, = plt.plot(pd_curve['Frequency'], pd_curve['Random'], color='red', label='Random', lw=1)
pl_wiz, = plt.plot(pd_curve['Frequency'], pd_curve['Wizard'], color=mygreen, label='Wizard', lw=1)
pl_est, = plt.plot(pd_curve['Frequency'], pd_curve['Validation'], color='blue', label='Model', lw=1)
plt.grid(color='b', alpha=0.9, linestyle='dashed', linewidth=0.2)
plt.xlabel('False Positive Rate', fontsize=14)
plt.ylabel('True Positive Rate', fontsize=14)
plt.title('ROC Curve for ' + variable_name.lstrip('rr_'), fontsize=16, fontweight='bold')
plt.xticks(pd_curve['Frequency'], pd_curve['Frequency'], rotation=30, fontsize=12)
plt.legend(handles=[pl_rnd, pl_wiz, pl_est], loc=4, fontsize=16)
plot_profit_curve(model, variable_name="rr_" + target_col, curve_type=aalib.Kxen_roc)
# Read Threshold Value
d_path = ("Protocols/Default/Transforms/Kxen.RobustRegression/Parameters/LowerBound")
d_param = model.getParameter(d_path)
d_value = float(d_param.getNameValue().value)
# Get Pairs with Coefficient
d_path = ("Protocols/Default/Transforms/Kxen.RobustRegression/Results/%s/Correlations" % target_col)
d_param = model.getParameter(d_path)
d_values = d_param.getSubEntries(["", "Var1", "Var2"])
v_sorted = sorted(d_values, key=lambda x: float(x[1]), reverse=True)
# Print Pairs
text= "Correlations above %s" % (d_value)
print('\x1b[1m'+ text + '\x1b[0m')
xs1 = [round(float(x[1]),3) for x in v_sorted]
xs2 = [x[2] for x in v_sorted]
xs3 = [x[3] for x in v_sorted]
for x1, x2, x3 in zip(xs1, xs2, xs3):
print("{:>40} : {:0.3}".format(x2 + ' - ' + x3, x1))
partition = 'Estimation'
predictor = 'capital-gain'
d_path = ("Protocols/Default/Variables/%s/Statistics/%s/Categories") % (predictor, partition)
d_param = model.getParameter(d_path)
d_path = ("Targets/%s/NormalProfit") % target_col
d_values = d_param.getSubEntries(["", "Weight", "Frequency", d_path])
df = pd.DataFrame(list(d_values), columns=["Category", "Type", "Observations", "In %", "Normal Profit"])
df.drop("Type", axis=1, inplace=True)
df['Observations'] = df['Observations'].astype(int)
df['In %'] = df['In %'].astype(float).round(4)*100
df['Normal Profit'] = df['Normal Profit'].astype(float).round(3)
d_path = ("Protocols/Default/Variables/%s/Statistics/%s/Targets/%s/Groups") % (predictor, partition, target_col)
d_param = model.getParameter(d_path)
d_path = ("Targets/%s/NormalProfit") % target_col
d_values = d_param.getSubEntries(["", "Weight", "Frequency", d_path])
df = pd.DataFrame(list(d_values), columns=["Group", "Type", "Observations", "In %", "Normal Profit"])
df.drop("Type", axis=1, inplace=True)
df['Observations'] = df['Observations'].astype(int)
df['In %'] = df['In %'].astype(float).round(4)*100
df['Normal Profit'] = df['Normal Profit'].astype(float).round(3)
predictor = 'occupation'
d_path = "Protocols/Default/Variables/%s/Statistics/%s/Categories" % (target_col, partition)
d_param = model.getParameter(d_path)
d_values = d_param.getSubEntries(["", "Code"])
target_v1 = d_values[0][0]
target_v2 = d_values[1][0]
d_path = ("Protocols/Default/Variables/%s/Statistics/%s/Categories") % (predictor, partition)
d_param = model.getParameter(d_path)
d_path = ("Targets/%s/TargetCategories/%s/Weight") % (target_col, target_v1)
d_path2 = ("Targets/%s/TargetCategories/%s/Weight") % (target_col, target_v2)
d_values = d_param.getSubEntries(["", d_path, d_path2])
df = pd.DataFrame(list(d_values), columns=["Category", "Type", target_v1, target_v2])
df.drop("Type", axis=1, inplace=True)
df[target_v1] = df[target_v1].astype(int)
df[target_v2] = df[target_v2].astype(int)
df['Total'] = df[target_v1] + df[target_v2]
df[target_v1 + ' (in %)'] = (df[target_v1] / df['Total']).round(4)*100
df[target_v2 + ' (in %)'] = (df[target_v2] / df['Total']).round(4)*100
text= "Observations by Category for %s" % (predictor)
print('\x1b[31;1m'+ text + '\x1b[0m')
c_title = "%s = %s" % (target_col, target_v1)
df.plot(kind='barh', x='Category', y=target_v1, title=c_title,
alpha=0.6, width=1.0, edgecolor='grey', legend=False, fontsize=12)
c_title = "%s = %s" % (target_col, target_v2)
df.plot(kind='barh', x='Category', y=target_v2, title=c_title,
alpha=0.6, width=1.0, edgecolor='grey', legend=False, fontsize=12)
d_path = ("Protocols/Default/Variables/%s/Statistics/%s/Targets/%s/Groups") % (predictor, partition, target_col)
d_param = model.getParameter(d_path)
d_path = ("Targets/%s/TargetCategories/%s/Weight") % (target_col, target_v1)
d_path2 = ("Targets/%s/TargetCategories/%s/Weight") % (target_col, target_v2)
d_values = d_param.getSubEntries(["", d_path, d_path2])
df = pd.DataFrame(list(d_values), columns=["Group", "Type", target_v1, target_v2])
df.drop("Type", axis=1, inplace=True)
df[target_v1] = df[target_v1].astype(int)
df[target_v2] = df[target_v2].astype(int)
df['Total'] = df[target_v1] + df[target_v2]
df[target_v1 + ' (in %)'] = (df[target_v1] / df['Total']).round(4)*100
df[target_v2 + ' (in %)'] = (df[target_v2] / df['Total']).round(4)*100
text= "Observations by Group for %s" % (predictor)
print('\x1b[31;1m'+ text + '\x1b[0m')
c_title = "%s = %s" % (target_col, target_v1)
df.plot(kind='barh', x='Group', y=target_v1, title=c_title,
alpha=0.6, width=1.0, edgecolor='grey', legend=False, fontsize=12)
c_title = "%s = %s" % (target_col, target_v2)
df.plot(kind='barh', x='Group', y=target_v2, title=c_title,
alpha=0.6, width=1.0, edgecolor='grey', legend=False, fontsize=12)
You must be a registered user to add a comment. If you've already registered, sign in. Otherwise, register and sign in.
User | Count |
11 | |
11 | |
10 | |
10 | |
9 | |
9 | |
7 | |
6 | |
6 | |
6 |