
!pip install numpy
!pip install faker
!pip install pandas
!pip install hana_ml
import pandas as pd
from faker import Faker
import numpy as np
fake = Faker()
# First name
for _ in range(5):
print(fake.first_name())
Faker will generate random data every time it is called
# There are specific versions of these generators
# It can generate names
print('Male first names: ' + fake.first_name_male())
print('Female first names: ' + fake.first_name_female())
print('Last names: ' + fake.last_name())
print('Full names: ' + fake.name())
# Generate prefixes and suffixes (there are also gender specific versions e.g. prefix_female())
print('Prefix: ' + fake.prefix())
print('Suffix: ' + fake.suffix())
# Generate emails
print('Company emails: ' + fake.ascii_company_email())
print('Safe emails: ' + fake.ascii_safe_email())
print('Free emails: ' + fake.ascii_free_email())
print('ASCII Emails: ' + fake.ascii_email())
print('Emails: ' + fake.email())
Faker can easily generate realistic looking PII. For more options, https://faker.readthedocs.io/en/master/providers.html
# Company names
print('Company name: ' + fake.company())
print('Company suffix: ' + fake.company_suffix())
# Generate Address components
print('Street address: ' + fake.street_address())
print('Bldg #: ' + fake.building_number())
print('City: ' + fake.city())
print('Country: ' + fake.country())
print('Postcode: ' + fake.postcode())
# Or generate full addresses
print('Full address: ' + fake.address())
# Even generate motto, etc.
print('Catch phrase: ' + fake.catch_phrase())
print('Motto: ' + fake.bs())
# Use bothify to generate random numbers(#) or letters(?). Can limit the letters used with letters=
print(fake.bothify('PROD-??-##', letters='ABCDE'))
print(fake.bothify('iPhone-#'))
# Create fake True/False values
# Random True/False
print(fake.boolean())
# Specify % True
print(fake.boolean(chance_of_getting_true=25))
import numpy as np
industry = ['Automotive','Health Care','Manufacturing','High Tech','Retail']
# Specify probabilities of each category (must sum to 1.0)
weights = [0.6, 0.2, 0.1, 0.07, 0.03]
# p= specifies the probabilities of each category. Must sum to 1.0
print(np.random.choice(industry, p=weights))
# Generating choice without weights (equal probability on all elements)
print(np.random.choice(industry))
# 1st argument is mean of distribution, 2nd is standard deviation
print(np.random.normal(1000, 100))
# Rounded result
print(round(np.random.normal(1000, 100)))
# Generate random integer between 0 and 4
print(np.random.randint(5))
print(fake.date_this_century().strftime('%m-%d-%Y'))
print(fake.date_this_decade().strftime('%m-%d-%Y'))
print(fake.date_this_year().strftime('%m-%d-%Y'))
print(fake.date_this_month().strftime('%m-%d-%Y'))
print(fake.time())
import pandas as pd
# Start and end dates to generate data
my_start = pd.to_datetime('01-01-2021')
my_end = pd.to_datetime('12-31-2021')
print(f'Random date between {my_start} & {my_end}')
fake.date_between_dates(my_start, my_end).strftime('%m-%d-%Y')
print(fake.year())
print(fake.month())
print(fake.day_of_month())
print(fake.day_of_week())
print(fake.month_name())
print(fake.past_date('-1y'))
print(fake.future_date('+1d'))
from faker import Faker
import numpy as np
import pandas as pd
industry = ['Automotive','Health Care','Manufacturing','High Tech','Retail']
fake = Faker()
def create_data(x):
# dictionary
b_user ={}
for i in range(0, x):
b_user[i] = {}
b_user[i]['name'] = fake.name()
b_user[i]['job'] = fake.job()
b_user[i]['birthdate'] = fake.date_of_birth(minimum_age=18,maximum_age=65)
b_user[i]['email'] = fake.company_email()
b_user[i]['company'] = fake.company()
b_user[i]['industry'] = fake.random_element(industry)
b_user[i]['city'] = fake.city()
b_user[i]['state'] = fake.state()
b_user[i]['zipcode'] = fake.postcode()
b_user[i]['netNew'] = fake.boolean(chance_of_getting_true=65)
b_user[i]['sales_rounded'] = round(np.random.normal(1000,200))
b_user[i]['sales_decimal'] = np.random.normal(1000,200)
b_user[i]['priority'] = fake.random_digit()
b_user[i]['industry2'] = np.random.choice(industry)
return b_user
df = pd.DataFrame(create_data(5)).transpose()
df.head(5)
fake = Faker('en-US')
print(fake.name())
fake = Faker('ja-JP')
print(fake.name())
fake = Faker('ru_RU')
print(fake.name())
fake = Faker('it_IT')
print(fake.name())
fake = Faker('de_DE')
print(fake.name())
fake = Faker('pt_BR')
print(fake.name())
# Instantiate Faker with multiple locales
fake = Faker(['en_US','de_DE','pt_BR','ja_JP','zh-CN'])
from faker import Faker
import numpy as np
import pandas as pd
industry = ['Automotive','Health Care','Manufacturing','High Tech','Retail']
# Instantiate Faker with multiple locales
fake = Faker(['en_US','de_DE','pt_BR','ja_JP','zh-CN'])
def create_data(x):
# dictionary
b_user ={}
for i in range(0, x):
b_user[i] = {}
b_user[i]['name'] = fake.name()
b_user[i]['job'] = fake.job()
b_user[i]['birthdate'] = fake.date_of_birth(minimum_age=18,maximum_age=65)
b_user[i]['email'] = fake.company_email()
b_user[i]['company'] = fake.company()
b_user[i]['industry'] = fake.random_element(industry)
b_user[i]['city'] = fake.city()
b_user[i]['state'] = fake.state()
b_user[i]['zipcode'] = fake.postcode()
b_user[i]['netNew'] = fake.boolean(chance_of_getting_true=65)
b_user[i]['sales_rounded'] = round(np.random.normal(1000,200))
b_user[i]['sales_decimal'] = np.random.normal(1000,200)
b_user[i]['priority'] = fake.random_digit()
b_user[i]['industry2'] = np.random.choice(industry)
return b_user
df = pd.DataFrame(create_data(1000)).transpose()
df.head(10)
# Create connection to HANA Cloud
import hana_ml.dataframe as dataframe
# Instantiate connection object
conn = dataframe.ConnectionContext(address = '<Your HANA tenant info.hanacloud.ondemand.com>',
port = 443,
user = '<USERNAME',
password = "<PASSWORD>",
encrypt = 'true',
sslValidateCertificate = 'false')
# Display HANA version to test connection
print('HANA version: ' + conn.hana_version())
# Print APL version to confirm PAL/APL are enabled
import hana_ml.algorithms.apl.apl_base as apl_base
v = apl_base.get_apl_version(conn)
v.head(5)
# Upload Pandas dataframe to HANA Cloud
dataframe.create_dataframe_from_pandas(connection_context = conn,
pandas_df = df,
table_name = 'FAKER',
force = True)
You must be a registered user to add a comment. If you've already registered, sign in. Otherwise, register and sign in.
User | Count |
---|---|
13 | |
11 | |
11 | |
10 | |
10 | |
9 | |
8 | |
8 | |
7 | |
6 |