from hana_ml import dataframe as hd
conn = hd.ConnectionContext(userkey='MLMDA_KEY')
sql_cmd = """
select * from apl_samples.census
where "marital-status" not in (
select "marital-status" from apl_samples.census
group by "marital-status" having count(*) < 1500 )
order by "id"
"""
hdf_train= hd.DataFrame(conn, sql_cmd)
hdf_train.head(5).collect().style.hide(axis='index')
hdf_train.shape[0]
col_key = 'id'
col_target = 'marital-status'
col_predictors = hdf_train.columns
col_predictors.remove(col_key)
col_predictors.remove(col_target)
col_predictors.remove('education-num')
len(col_predictors)
from hana_ml.algorithms.apl.gradient_boosting_classification import GradientBoostingClassifier
apl_model = GradientBoostingClassifier()
apl_model.set_params(variable_auto_selection = True,
variable_selection_max_nb_of_final_variables = '6')
apl_model.fit(hdf_train, label=col_target, key=col_key, features=col_predictors)
my_filter = "\"Partition\" = 'Estimation'"
df = apl_model.get_debrief_report('MultiClassTarget_Statistics').filter(my_filter).collect()
df.drop('Oid', axis=1, inplace=True)
df.drop('Target Key', axis=1, inplace=True)
format_dict = {'% Weight':'{:,.2f}%', 'Weight':'{:,.0f}'}
df.style.format(format_dict).hide(axis='index')
from hana_ml.model_storage import ModelStorage
model_storage = ModelStorage(connection_context=conn, schema='USER_APL')
apl_model.name = 'My Multiclass Model'
model_storage.save_model(model=apl_model, if_exists='replace')
model_storage.list_models()
from hana_ml import dataframe as hd
conn = hd.ConnectionContext(userkey='MLMDA_KEY')
from hana_ml.model_storage import ModelStorage
model_storage = ModelStorage(connection_context=conn, schema='USER_APL')
apl_model = model_storage.load_model(name='My Multiclass Model')
apl_model.get_model_info()
from hana_ml.visualizers.unified_report import UnifiedReport
UnifiedReport(apl_model).build().display()
df = apl_model.get_debrief_report('ClassificationRegression_VariablesExclusion').collect()
df = df[['Variable', 'Reason For Exclusion']]
df.style.hide(axis='index')
sql_cmd = 'select * from apl_samples.census where "id" between 550 and 554 order by "id"'
hdf_apply = hd.DataFrame(conn, sql_cmd)
hdf_apply.collect().style.hide(axis='index')
apl_model.set_params( extra_applyout_settings=
{ 'APL/ApplyExtraMode': 'Advanced Apply Settings',
'APL/ApplyPredictedValue': 'false',
'APL/ApplyProbability': 'false',
'APL/ApplyDecision': 'true',
'APL/ApplyReasonCode/TopCount': '3',
'APL/ApplyReasonCode/ShowStrengthValue': 'false',
'APL/ApplyReasonCode/ShowStrengthIndicator': 'false' }
)
df = apl_model.predict(hdf_apply).collect()
df.columns = ['Id', 'Actual', 'Prediction', 'Reason 1 Name', 'Reason 1 Value', 'Reason 2 Name', 'Reason 2 Value', 'Reason 3 Name', 'Reason 3 Value']
df.style.hide(axis='index')
apl_model.set_params( extra_applyout_settings=
{ 'APL/ApplyExtraMode': 'Advanced Apply Settings',
'APL/ApplyPredictedValue': 'true',
'APL/ApplyProbability': 'false',
'APL/ApplyDecision': 'true',
}
)
df = apl_model.predict(hdf_apply).collect()
df.rename(columns={'TRUE_LABEL': 'Actual','PREDICTED': 'Prediction'}, inplace=True)
df.columns = [hdr.replace("gb_score_marital-status_", "") for hdr in df]
df.style.hide(axis='index')
apl_scoring_equation = apl_model.export_apply_code(code_type='JSON')
text_file = open("apl_model.json", "w")
text_file.write(apl_scoring_equation)
text_file.close()
You must be a registered user to add a comment. If you've already registered, sign in. Otherwise, register and sign in.
User | Count |
---|---|
17 | |
11 | |
11 | |
11 | |
11 | |
8 | |
6 | |
5 | |
5 | |
5 |