
import urllib2
import base64
import zlib
import threading
from threading import Lock
import sys
import ssl
import json
from datetime import datetime
import calendar
import dbapi
from wsgiref.handlers import format_date_time
from time import mktime
CHUNKSIZE = 4*1024
GNIPKEEPALIVE = 30
NEWLINE = '\r\n'
URL = ''
username_gnip = ''
password_gnip = ''
HEADERS = { 'Accept': 'application/json',
'Connection': 'Keep-Alive',
'Accept-Encoding' : 'gzip',
'Authorization' : 'Basic %s' % base64.encodestring('%s:%s' % (username_gnip, password_gnip)) }
server = ''
port =
username_hana = ''
password_hana = ''
schema = ''
client = ''
socialmediachannel = ''
print_lock = Lock()
err_lock = Lock()
class procEntry(threading.Thread):
def __init__(self, buf):
self.buf = buf
threading.Thread.__init__(self)
def unicodeToAscii(self, word):
return word.encode('ascii', 'ignore')
def run(self):
for rec in [x.strip() for x in self.buf.split(NEWLINE) if x.strip() <> '']:
try:
jrec = json.loads(rec.strip())
with print_lock:
verb = jrec['verb']
verb = self.unicodeToAscii(verb)
# SOCIALUSERINFO DETAILS
socialUser = jrec['actor']['id'].split(':')[2]
socialUser = self.unicodeToAscii(socialUser)
socialUserProfileLink = jrec['actor']['link']
socialUserProfileLink = self.unicodeToAscii(socialUserProfileLink)
socialUserAccount = jrec['actor']['preferredUsername']
socialUserAccount = self.unicodeToAscii(socialUserAccount)
friendsCount = jrec['actor']['friendsCount']
followersCount = jrec['actor']['followersCount']
postedTime = jrec['postedTime']
postedTime = self.unicodeToAscii(postedTime)
displayName = jrec['actor']['displayName']
displayName = self.unicodeToAscii(displayName)
image = jrec['actor']['image']
image = self.unicodeToAscii(image)
# SOCIALDATA DETAILS
socialpost = jrec['id'].split(':')[2]
socialpost = self.unicodeToAscii(socialpost)
createdbyuser = socialUser
creationdatetime = postedTime
socialpostlink = jrec['link']
creationusername = displayName
socialpostsearchtermtext = jrec['gnip']['matching_rules'][0]['value']
socialpostsearchtermtext = self.unicodeToAscii(socialpostsearchtermtext)
d = datetime.utcnow()
time = d.strftime("%Y%m%d%H%M%S")
creationdatetime_utc = datetime.strptime(postedTime[:-5], "%Y-%m-%dT%H:%M:%S")
creationdatetime_utc = creationdatetime_utc.strftime(("%Y%m%d%H%M%S"))
stamp = calendar.timegm(datetime.strptime(creationdatetime[:-5], "%Y-%m-%dT%H:%M:%S").timetuple())
creationdatetime = format_date_time(stamp)
creationdatetime = creationdatetime[:-4] + ' +0000'
if verb == 'post':
socialdatauuid = jrec['object']['id'].split(':')[2]
socialdatauuid = self.unicodeToAscii(socialdatauuid)
socialposttext = jrec['object']['summary']
socialposttext = self.unicodeToAscii(socialposttext)
res = client + '\t' + socialmediachannel + '\t' + socialUser + '\t' + socialUserAccount + '\t' + str(friendsCount) + '\t' + str
(followersCount) + '\t' + postedTime + '\t' + displayName + '\t' + displayName.upper() + '\t' + socialUserProfileLink + '\t' +image
elif verb == 'share':
socialdatauuid = jrec['object']['object']['id'].split(':')[2]
socialdatauuid = self.unicodeToAscii(socialdatauuid)
socialposttext = jrec['object']['object']['summary']
socialposttext = self.unicodeToAscii(socialposttext)
res = client + '\t' + socialmediachannel + '\t' + socialUser + '\t' + socialUserAccount + '\t' + str(friendsCount) + '\t' + str
(followersCount) + '\t' + postedTime + '\t' + displayName + '\t' + displayName.upper() + '\t' + socialUserProfileLink + '\t' +image
print(res)
hdb_target = dbapi.connect(server, port, username_hana, password_hana)
cursor_target = hdb_target.cursor()
sql = 'upsert ' + schema + '.SOCIALUSERINFO(CLIENT, SOCIALMEDIACHANNEL, SOCIALUSER, SOCIALUSERPROFILELINK, SOCIALUSERACCOUNT,
NUMBEROFSOCIALUSERCONTACTS, SOCIALUSERINFLUENCESCOREVALUE, CREATIONDATETIME, SOCIALUSERNAME, SOCIALUSERNAME_UC, SOCIALUSERIMAGELINK, CREATEDAT) values
(?,?,?,?,?,?,?,?,?,?,?,?) with primary key'
cursor_target.execute(sql, (client, socialmediachannel, socialUser, socialUserProfileLink, socialUserAccount, friendsCount,
followersCount, creationdatetime, displayName, displayName.upper(), image, time))
hdb_target.commit()
sql = 'upsert ' + schema + '.SOCIALDATA(CLIENT, SOCIALDATAUUID, SOCIALPOST, SOCIALMEDIACHANNEL, CREATEDBYUSER, CREATIONDATETIME,
SOCIALPOSTLINK, CREATIONUSERNAME, SOCIALPOSTSEARCHTERMTEXT, SOCIALPOSTTEXT, CREATEDAT, CREATIONDATETIME_UTC) VALUES(?,?,?,?,?,?,?,?,?,?,?,?) WITH PRIMARY
KEY'
cursor_target.execute(sql, (client, socialdatauuid, socialpost, socialmediachannel, createdbyuser, creationdatetime, socialpostlink,
creationusername, socialpostsearchtermtext, socialposttext, time, creationdatetime_utc))
hdb_target.commit()
except ValueError, e:
with err_lock:
sys.stderr.write("Error processing JSON: %s (%s)\n"%(str(e), rec))
def getStream():
proxy = urllib2.ProxyHandler({'http': 'http://proxy:8080', 'https': 'https://proxy:8080'})
opener = urllib2.build_opener(proxy)
urllib2.install_opener(opener)
req = urllib2.Request(URL, headers=HEADERS)
response = urllib2.urlopen(req, timeout=(1+GNIPKEEPALIVE))
decompressor = zlib.decompressobj(16+zlib.MAX_WBITS)
remainder = ''
while True:
tmp = decompressor.decompress(response.read(CHUNKSIZE))
if tmp == '':
return
[records, remainder] = ''.join([remainder, tmp]).rsplit(NEWLINE,1)
procEntry(records).start()
if __name__ == "__main__":
print('Started...')
while True:
try:
getStream()
except ssl.SSLError, e:
with err_lock:
sys.stderr.write("Connection failed: %s\n"%(str(e)))
2. Run the script from your editor
3. Checking the Results in the database tables SOCIALDATA and SOCIALUSERINFO.
Other blog posts on connecting Social Channels:
Twitter connector to harvest tweets into Social Intelligence tables using Python script
http://scn.sap.com/docs/DOC-53824
Historical data harvesting from GNIP using Python scripts
Demo Social and Sentiment data generation using Python script
(If you find any mistakes or if you have any doubts in this blog please leave a comment)
You must be a registered user to add a comment. If you've already registered, sign in. Otherwise, register and sign in.
User | Count |
---|---|
5 | |
3 | |
3 | |
2 | |
2 | |
1 | |
1 | |
1 | |
1 | |
1 |