Skip to content

Commit 19a8fd5

Browse files
authored
Merge pull request #35 from RaInta/master
Added automatic email sending script
2 parents c26bef8 + b236f14 commit 19a8fd5

File tree

3 files changed

+294
-0
lines changed

3 files changed

+294
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,223 @@
1+
from mimesis import Person, Address, Business, Payment, Text
2+
3+
from scipy.stats import pareto
4+
import pandas as pd
5+
import numpy as np
6+
7+
import sqlite3
8+
import os
9+
10+
# Note: we don't ever store user passwords as clear text!!!
11+
# To emulate salting and hashing the user passwords:
12+
import hashlib
13+
import uuid
14+
15+
# However, we should really use a dedicated password hashing
16+
# package, such as passlib. However, this is out of scope
17+
# for this script e.g:
18+
# import passlib
19+
20+
np.random.seed(42) # To make our analysis reproducible
21+
22+
person = Person()
23+
address = Address()
24+
business = Business()
25+
payment = Payment()
26+
text = Text()
27+
28+
##################################################
29+
### Define a couple of convenience functions:
30+
##################################################
31+
32+
33+
def hashed_passwd(passwd):
34+
"""We should never entertain the idea of storing users' passwords
35+
as plaintext. This function performs a basic salting and hashing
36+
of a password input. This function should *never* be used in a
37+
production setting; if you need to securely store salted and hashed
38+
passwords, use a dedicated package such as passlib."""
39+
salt = uuid.uuid4().hex
40+
return hashlib.sha512(passwd.encode('utf-8')
41+
+ salt.encode('utf-8')).hexdigest()
42+
43+
44+
def account_balance():
45+
"""Generate account balances according to a Pareto distribution.
46+
We should expect balances to be distributed as with other income
47+
distributions. The power exponent is chosen here to replicate
48+
the 80-20 rule."""
49+
return pareto.rvs(1.161)
50+
51+
52+
def generate_sales(df, age='age', account_balance='account_balance',
53+
marketing_level='marketing_level', min_age=25,
54+
max_age=35, noise_ampl=10):
55+
"""Generate sales as a linear function of age (as a weak power), account
56+
balance and the interaction between a marketing campaign and the age
57+
bracket it was intended for, plus a small amount of noise."""
58+
noise = noise_ampl*np.random.normal(0.01, 1.7, df.shape[0])
59+
gated_age = np.heaviside(df[age] - min_age, 0.5) - np.heaviside(df[age] - max_age, 0.5)
60+
return 0.01*pow(np.abs(df[age] - 30), 2.5) + df[age] + 50*df[marketing_level]*gated_age + 2*df[account_balance] + noise
61+
62+
63+
##################################################
64+
65+
##################################################
66+
### Generate a DataFrame of user information
67+
##################################################
68+
# Generate 10,000 rows of the following:
69+
# user_id, first_name, last_name, email, password, address,
70+
# birth_date, credit_card_num, credit_card_exp, security_answer,
71+
# account_balance
72+
73+
user_df = pd.DataFrame([[x, person.name(), person.surname(), person.gender(),
74+
person.email(), hashed_passwd(person.password()),
75+
address.address(), person.age(),
76+
payment.credit_card_number(),
77+
payment.credit_card_expiration_date(), text.word(),
78+
account_balance(), np.random.randint(1, 11)]
79+
for x in range(10000)])
80+
81+
user_df.columns = ["user_id", "first_name", "last_name",
82+
"gender", "email", "password_hashed", "address",
83+
"age", "credit_card_num", "credit_card_exp",
84+
"security_answer", "account_balance",
85+
"marketing_level"]
86+
87+
# Generate sales, based on a noisy linear model
88+
user_df['sales'] = generate_sales(user_df)
89+
user_df['sales'] = user_df['sales'] - user_df['sales'].min()
90+
user_df['sales'] /= 40
91+
92+
print("Summary statistics on numerical data:")
93+
print(user_df.describe())
94+
95+
##################################################
96+
97+
##################################################
98+
### Scuff the data up a bit!
99+
##################################################
100+
# We'll 'disappear' 10% of some columns, and create
101+
# some dupes
102+
103+
104+
def makeDataMissing(df, col_name, frac=0.1):
105+
"""Randomly assign a fraction of a column, col_name,
106+
of a dataframe, df, as missing (np.nan).
107+
This makes use of the sample method associated with
108+
Series and DataFrame objects.
109+
110+
A copy of the column is returned."""
111+
rnd_Idx = df.sample(frac=frac).index
112+
col_out = df[col_name].copy()
113+
col_out[rnd_Idx] = np.nan
114+
return col_out
115+
116+
117+
def makeDupes(df, frac=0.1):
118+
"""Take a DataFrame, df, and randomly append
119+
a fraction of its own rows."""
120+
rnd_Idx = df.sample(frac=frac).index
121+
return df.append(df.loc[rnd_Idx, :])
122+
123+
# Ten percent of customers weren't comfortable with volunteering their gender:
124+
user_df['gender'] = makeDataMissing(user_df, 'gender')
125+
126+
# others couldn't be bothered with the address:
127+
user_df['address'] = makeDataMissing(user_df, 'address')
128+
129+
# We'll apply duplicates later.
130+
131+
##################################################
132+
133+
##################################################
134+
### Perform some Exploratory Data Analysis
135+
##################################################
136+
137+
user_df.sample(5)
138+
139+
user_df.describe()
140+
141+
# Note the median balance is 1.8, while the mean is 5.3
142+
# Recall we generated a heavily skewed distribution!
143+
144+
# We designed it according to the famous "80-20 rule"
145+
# The top twenty percent own 80% of the balances.
146+
# Let's test it. Take the 80th percentile:
147+
critical80 = np.quantile(user_df["account_balance"], 0.8)
148+
## 4.013269256450965
149+
150+
the_few = user_df.loc[user_df["account_balance"] > critical80,
151+
"account_balance"].sum()
152+
153+
tot_balance = user_df["account_balance"].sum()
154+
155+
the_few/tot_balance
156+
## 0.7298469832819879
157+
# So here, the top 20% 'only' have 73% of the account balance
158+
159+
# Plot the Pareto distribution
160+
user_df['log_account_balance'] = np.log10(user_df['account_balance'])
161+
user_df['log_account_balance'].hist(bins=20)
162+
163+
# Some limitations of mimesis
164+
# If you want realistic distributions of certain numerical variables
165+
# then you should simulate populations yourself. E.g.:
166+
167+
user_df["age"].plot(kind="kde")
168+
169+
# The way ages are generated are not exactly samples of any real population!
170+
# This will depend on the underlying demographic dynamics.
171+
172+
from pandas.plotting import scatter_matrix
173+
scatter_matrix(user_df[['age', 'account_balance', 'marketing_level', 'sales']])
174+
175+
import seaborn as sns
176+
177+
sns.pairplot(user_df[['age', 'account_balance', 'marketing_level', 'sales']],
178+
hue='marketing_level')
179+
180+
181+
##################################################
182+
183+
184+
##################################################
185+
### Export data to SQL, Excel and print summary
186+
##################################################
187+
188+
print("Account balance for top 20% of users: {} \nFraction of total \
189+
balance owned by top 20%: {}%\n".format(critical80,
190+
100*the_few/tot_balance))
191+
192+
# Generate user info, along with 10% dupes:
193+
main_user_df = makeDupes(user_df[['user_id', 'first_name', 'last_name', 'email',
194+
'password_hashed', 'gender', 'address', 'age',
195+
'credit_card_num', 'credit_card_exp',
196+
'security_answer', 'account_balance']])
197+
198+
199+
def df_sql_write(df, file_name="test.sql", table_name="test_table"):
200+
"""Function to generate an sqlite3 database from a pandas dataframe, df,
201+
with a table name, table_name. This is modified directly from the pandas
202+
documentation on connecting to databases:
203+
https://pandas.pydata.org/pandas-docs/stable/io.html#reading-tables"""
204+
if os.path.exists(file_name):
205+
os.remove(file_name)
206+
sql_db = sqlite3.connect(file_name)
207+
df.to_sql(name=table_name, con=sql_db, index=False)
208+
sql_db.close()
209+
210+
211+
# Write out user info to SQL database (in random order)
212+
df_sql_write(main_user_df.sample(frac=1.0), 'user_data.sql', table_name='user_accounts')
213+
214+
# Write out campaign data to Excel spreadsheet
215+
campaign_df = user_df[['user_id', 'marketing_level', 'sales']].sample(frac=1.0)
216+
217+
campaign_df.to_excel('advertising_campaign.xlsx', index=False)
218+
219+
## Extract DB thus:
220+
#with sqlite3.connect('user_data.sql') as cnx:
221+
# df1 = pd.read_sql_query("SELECT * FROM user_accounts", cnx)
222+
#
223+
#cnx.close()

README.md

+2
Original file line numberDiff line numberDiff line change
@@ -41,3 +41,5 @@ This is a collection of short Python scripts to solve and automate tasks and sim
4141
21 | [**IntentionTextDiscover**](https://github.com/fnplus/Python-scripts-collection/tree/master/IntentionTextDiscover) | For given a text intention class based on bayes teorem. | None |
4242
22 | [**E-Certificate Writer**](https://github.com/fnplus/Python-scripts-collection/tree/master/E-Certificate-Writer) | Useful for writing names of participants on E-Certificates using Python3. Returns the certificates in PDF format. | img2pdf==0.3.3, numpy==1.17.2, pandas==0.25.1, Pillow==6.2.0, python-dateutil==2.8.0, pytz==2019.3, six==1.12.0
4343
23 | [**YouTubeDownloader**](https://github.com/fnplus/Python-scripts-collection/tree/master/YouTubeDownloader) | You can download YouTube videos with the URLs provided. | pytube==9.5.3 |
44+
24 | [**SendEmail**](https://github.com/fnplus/Python-scripts-collection/tree/master/SendEmail) | Send email using Python. Prompts the user for their email address and sends to address input. Handles user's password securely using `getpass`. | None |
45+
25 | [**GenerateSyntheticCustomerDatabase**](https://github.com/fnplus/Python-scripts-collection/tree/master/GenerateSyntheticCustomerDatabase) | Generate a mock (synthetic) dataset of arbitrary length with 'customers'. This includes their names, credit card details and hashed passwords. This is all synthetic, so no data breach here! | mimesis, scipy, pandas, numpy |

SendEmail/SendEmail.py

+69
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import smtplib
2+
from email.mime.multipart import MIMEMultipart
3+
from email.mime.text import MIMEText
4+
5+
import getpass
6+
7+
email_user = input("Email address to send from:\n")
8+
9+
email_domain = email_user.split("@")[1]
10+
11+
if email_domain in ["outlook.com", "hotmail.com"]:
12+
smtpserver = "smtp.live.com:587" # Kludge for hotmail/outlook addresses
13+
else:
14+
smtpserver = "smtp." + email_domain + ":587" # Guess for SMTP server; works for gmail, yahoo etc.
15+
16+
to_addr = input("Address to send to:\n")
17+
18+
subject = input("What is your email subject?\n")
19+
20+
body_text = input("What message did you want to email? (currently this is only one line)\n\n")
21+
22+
body_html= """\
23+
<html>
24+
<head></head>
25+
<body>
26+
<h1>I sent this email automatically, using Python!</h1>
27+
<p>
28+
"""
29+
30+
body_html += body_text
31+
32+
body_html += """</p>
33+
</body>
34+
</html>
35+
"""
36+
37+
def send_mail(to_addr, subject="Test email",
38+
body_text="Test message",
39+
body_html="Test message",
40+
from_addr=email_user, email_user=email_user,
41+
email_passwd=email_passwd,
42+
smtpserver="smtp.live.com:587"):
43+
"""A function to send email, in MIME multi-part (plain-text and HTML).
44+
45+
For example: to send to myself:
46+
send_mail(to_addr, subject, body_text=body_text, body_html=body_html)
47+
"""
48+
49+
# Construct the message header
50+
message = MIMEMultipart('alternative')
51+
message['From'] = from_addr
52+
message['To'] = to_addr
53+
message['Subject'] = subject
54+
55+
# Append the body text
56+
message.attach(MIMEText(body_text, 'plain'))
57+
message.attach(MIMEText(body_html, 'html'))
58+
59+
# Connect to the SMTP server
60+
server = smtplib.SMTP(smtpserver)
61+
server.starttls()
62+
server.login(email_user, email_passwd)
63+
problems = server.sendmail(from_addr, to_addr, message.as_string())
64+
server.quit()
65+
66+
67+
email_passwd = getpass.getpass()
68+
69+
send_mail(to_addr, subject, body_text, body_html, email_user, email_user, email_passwd, smtpserver)

0 commit comments

Comments
 (0)