
Disclaimer
This tutorial is intended as a guide for the creation of demo/test data only. The sample script provided is not intended for use in a productive system.
Purpose
The main purpose of this document is to show you how demo or test data can be provided for SAP hybris Marketing Data Management 1511 and up.
This tutorial explains one way of collecting tweets from Twitter based on a search term so that they can be used in the Sentiment Engagement workset of SAP hybris Marketing Data Management. A Python script for harvesting is executed from SAP HANA Studio (alternatively, you can use an Eclipse IDE such as Aptana Studio). The script inserts the collected tweets directly into interactions using an Odata service. If you run the script with the default settings, it will harvest 100 posts in one run for a given search term. However, you can change the script settings to retrieve more data per run.
To run the script, you will need to make a few customizing and configuration settings in order to use the PyDev plug-in in SAP HANA studio.
Make sure that the following prerequisites are met before you start out:
Install SAP HANA studio
Please check SAP help file https://help.sap.com/hana/SAP_HANA_Studio_Installation_Update_Guide_en.pdf for more information about SAP HANA studio installation.
Tested with SAP HANA studio 64bit with minimal Version: 1.0.7200 and SAP HANA client 64bit, Microsoft Windows 7, 64 bit version.
To import the tweets using Odata service, the SAP hybris Marketing user needs the role SAP_CEI_ECOMMERCE_INTEGRATION.
Carry out the following steps:
Alternatively you can save the python script with .py extension in your file location and import this file to your project.
You have now set up harvesting.
REFERENCES:
https://help.sap.com/hana/SAP_HANA_Studio_Installation_Update_Guide_en.pdf
https://dev.twitter.com/docs/using-search
http://scn.sap.com/community/developer-center/hana/blog/2012/06/08/sap-hana-and-python-yes-sir
#Python Script starts here
import urllib2;
import urllib;
import sys;
import json;
import time;
import binascii;
import copy;
import calendar;
#===============================================================================
# CUSTOM PARAMETERS:
#===============================================================================
# 1. Target System
username = 'XXX' # hybris Marketing user
password = 'XXX' # password for your user
cuan_url = "XXX"
# EXAMPLE: cuan_url = "https://my.system:8080/sap/opu/odata/sap/CUAN_IMPORT_SRV"
# 2. Twitter OAuth Tokens
# https://apps.twitter.com/app/new (register your application to get an access tokens)
c_key = 'XXX' # CONSUMER_KEY
c_sec = 'XXX' # CONSUMER_SECRET
# 3. Proxy settings
proxy = ''
# EXAMPLE: proxy = 'http://proxy:8080'
# proxy = ''
# 4. Additional settings
language = "en" # Tweet language (ISO 639-1 language code)
socialmediachannel = 'TW' # Social media channel name like 'TW'.
# You can enter Maximum 3 characters,
# this has to be in sync with SCI customizing
#===============================================================================
# PREDEFINED DATA STRUCTURES:
#===============================================================================
contact_prefab = {
"Id": "",
"FullName": "",
"Timestamp": "",
"Facets": [],
"ImageURI": ""
}
facet_prefab = {
"Id": "",
"IdOrigin": socialmediachannel
}
interaction_prefab = {
"CommunicationMedium": socialmediachannel,
"ContactId": "",
"ContactIdOrigin": socialmediachannel,
"ContentData": "",
"ContentTitle": "",
"InteractionType" : "SOCIAL_POSTING",
"Timestamp": "",
"SourceObjectId" : "",
"SourceObjectType" : "",
"SourceSystemId" : "",
"SourceSystemType" : "",
"Tags": []
}
tag_prefab = {
"TagType": "SearchTerm",
"Tag": ""
}
payload_prefab = {
"Id": "",
"Timestamp": "",
"UserName": "USER",
"SourceSystemType": "EXT",
"SourceSystemId": "PYTHON",
"Interactions": [],
"Contacts": []
}
#===============================================================================
# UTILITY FUNCTIONS:
#===============================================================================
# Generate the Bearer Token for use in the Authorization Header when making
# calls to the Twitter API. This is done via Application-only authentication,
# see 'https://dev.twitter.com/oauth/application-only' for more details
def generateTwitterHeader():
api_url = "https://api.twitter.com/oauth2/token";
# step 1. generate bearer token credential
bearer_token_credential = "%s:%s" % (c_key, c_sec);
# step 2. generate Base64 encoded credential
base64_bearer_token_credential = binascii.b2a_base64(bearer_token_credential)[:-1];
# step 3. connect
bearer_token = "";
if proxy is not None and proxy != '':
handler = urllib2.ProxyHandler({'http': proxy, 'https': proxy});
try:
opener = urllib2.build_opener(handler);
opener.addheaders = [ ('Content-Type', "application/x-www-form-urlencoded;charset=UTF-8"),
('Authorization', "Basic %s" % base64_bearer_token_credential)];
data = opener.open(api_url, data="grant_type=client_credentials").read();
# step 5. parse json string
json_data = json.loads(data, encoding="utf-8");
bearer_token = json_data["access_token"];
except:
print "[ERROR]\n%s" % ("\n".join("%s" % info for info in sys.exc_info()));
return None;
return "Bearer %s" % bearer_token;
# URL Encode a String
def escapeParameter(text):
return urllib.quote(str(text), safe="~");
# Extract all "set-cookie" headers from a given urllib2 response Info end create
# a single String containing all those Cookies. This is done using a variety of
# String operations. The Cookie String can later be send as a Cookie Header
def extractCookies(info):
final_cookie = "";
cookie_list = info.getallmatchingheaders("set-cookie");
for i in range(0, len(cookie_list)):
cookie_str = cookie_list[i].split(": ", 1)[1];
final_cookie += cookie_str.split(";", 1)[0];
if i < (len(cookie_list) - 1):
final_cookie += "; ";
return final_cookie;
# Extract the value of the 'x-csrf-token' Header in the given urllib2 response
# Info
def extractCSRFToken(info):
token_str = info.getfirstmatchingheader("x-csrf-token")[0];
return token_str.split(": ", 1)[1][:-2];
# Convert the timestamp returned by Twitter to a CUAN_IMPORT friendly timestamp
def twitterToCUANTimestamp(string):
t = int( calendar.timegm(time.strptime(string, "%a %b %d %H:%M:%S +0000 %Y")) * 1000);
return intToCUANTimestamp(t);
# Wrap a given timestamp (can be an Integer)
def intToCUANTimestamp(t):
return "/Date(" + str(t) + ")/";
#===============================================================================
# MAIN PROGRAM LOGIC:
#===============================================================================
# Prompt the user to Input a search query
while True:
resp = raw_input("Please enter your search term:\n")
if resp == "":
resp = raw_input('Enter your search term or "stop" to exit:\n')
if resp == 'stop':
sys.exit();
if len(resp) > 0:
break;
if not resp:
continue
search_term = resp;
search_url = "https://api.twitter.com/1.1/search/tweets.json";
apiurl = "https://api.twitter.com/oauth2/token";
# Setup to make a call to the Twitter API with the given query
params = [("q", search_term),("count", "100"),("lang", language),("result_type", "recent")];
handler = urllib2.BaseHandler();
header = generateTwitterHeader();
# Handle Proxy settings
if proxy is not None and proxy != '':
handler = urllib2.ProxyHandler({'http': proxy, 'https': proxy});
opener = urllib2.build_opener(handler);
opener.addheaders = [('Authorization', header)];
http_url = ("%s?%s") % (search_url, "&".join(['%s=%s' % (param, escapeParameter(value)) for (param, value) in params]));
# Call the API and convert the returned JSON Object into a Python Object
data = opener.open(http_url, data=None).read();
json_data_tweets = json.loads(data);
# Construct the Payload for the CUAN_IMPORT service
replication_created_at = intToCUANTimestamp(int(time.time()));
payload = copy.deepcopy(payload_prefab);
payload["Timestamp"] = replication_created_at;
contact_ids = [];
user_count = 0;
tweet_count = 0;
# Process Tweets
for tweet in json_data_tweets["statuses"]:
# Extract information
tweet_id = tweet['id'];
from_user_lang = tweet['lang'];
id_str = tweet['id_str'];
user = tweet['user'];
from_user_id = user['id'];
from_screen_name = user['screen_name'];
from_user_name = user['name'];
userProfileLink = 'https://twitter.com/%s' % (from_screen_name);
socialPostLink = 'https://twitter.com/%s/status/%s' % (from_user_id, id_str);
profile_image_url = user['profile_image_url'];
created_at = tweet['created_at'];
if created_at is None:
continue;
created_at = twitterToCUANTimestamp(created_at);
user_created_at = twitterToCUANTimestamp(user['created_at']);
text = tweet['text'].encode('utf-8')
text = text.replace('\n', '');
socialmediachannel = socialmediachannel;
if len(text) == 0:
continue;
# Create a contact for every Tweet (only if it does not exist already) and add a facet
if not from_user_id in contact_ids:
contact_ids.append(from_user_id);
contact = copy.deepcopy(contact_prefab);
contact["Id"] = str(from_user_id);
contact["FullName"] = from_user_name;
contact["Timestamp"] = user_created_at;
contact["ImageURI"] = profile_image_url;
facet = copy.deepcopy(facet_prefab);
facet["Id"] = from_screen_name;
facet["IdOrigin"] = socialmediachannel;
contact["Facets"] = [facet];
payload["Contacts"].append(contact);
user_count = user_count + 1;
# Create an interaction for every Tweet
interaction = copy.deepcopy(interaction_prefab);
interaction["ContactId"] = from_screen_name;
interaction["ContentData"] = text;
interaction["Timestamp"] = created_at;
interaction["SourceObjectId"] = str(tweet_id);
interaction["SourceDataUrl"] = socialPostLink;
# Create a tag for the search term and add it to the interaction
tag = copy.deepcopy(tag_prefab);
tag["Tag"] = search_term;
interaction["Tags"].append(tag);
tweet_count = tweet_count + 1;
payload["Interactions"].append(interaction);
# Convert the Payload to JSON
json_payload = json.dumps(payload);
# Create HTTP Basic Auth Header
cuan_user_creds = binascii.b2a_base64(username + ":" + password)[:-1];
user_auth = "Basic %s" % cuan_user_creds;
# Fetch a CSRF Token and store the Cookies of the response (for later use)
opener = urllib2.build_opener();
opener.addheaders = [('x-csrf-token', "fetch"), ("Authorization", user_auth)];
response = opener.open(cuan_url);
csrf_token = extractCSRFToken(response.info());
cookies = extractCookies(response.info());
# Create a POST request and execute it. The Request will contain the payload,
# the previously fetched CSRF Token and the Cookies
opener.addheaders = [('x-csrf-token', csrf_token), ("Authorization", user_auth), ("Cookie", cookies)]; #, ("Cookie", cookies)
import_headers_url = cuan_url + ("/" if cuan_url[-1] != "/" else "") + "ImportHeaders";
data = json.dumps(payload);
req = urllib2.Request(import_headers_url, data, headers = {
"Content-Type": "application/json",
"Content-Length": len(data)
});
response = None;
try:
response = opener.open(req);
except urllib2.HTTPError as e:
# Something went wrong. Display information to the user and exit
error_message = e.read()
print "An Error occurred:";
print error_message;
sys.exit();
# Success
print "Imported %d tweets by %d users" % (tweet_count, user_count);
You must be a registered user to add a comment. If you've already registered, sign in. Otherwise, register and sign in.
User | Count |
---|---|
3 | |
2 | |
2 | |
2 | |
1 | |
1 | |
1 | |
1 | |
1 | |
1 |