-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpandasTesting.py
57 lines (41 loc) · 1.27 KB
/
pandasTesting.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import pandas
import numpy
data = pandas.read_csv(r"C:\Users\David\OneDrive\Documents\DevOps\ML\Churn_Modeling.csv")
print(data.shape)
data.drop(['RowNumber','CustomerId','Surname'], axis = 1, inplace = True)
summary = data.describe()
data.isnull().sum()
data.Age[data.Exited == 1].count()
#Germany
gt = data.Geography[data.Geography == 'Germany'].count()
gl = data.Geography[data.Geography == 'Germany'][data.Exited == 1].count()
#Spain
st = data.Geography[data.Geography == 'Spain'].count()
sl = data.Geography[data.Geography == 'Spain'][data.Exited == 1].count()
#France
ft = data.Geography[data.Geography == 'France'].count()
fl = data.Geography[data.Geography == 'France'][data.Exited == 1].count()
data.Age.replace(numpy.nan,data.Age.mean(),inplace=True)
data.Age.var()
data['Age'].var()
#################
#dataViz
import matplotlib.pyplot as pyplot
x = numpy.arange(10,20,0,1)
y = numpy.sin(x)*4+10
z = 2*x+0.5*y
plt.figure(figsize = (12,5))
plt.plot(x,y,'r',label='x vs y')
plt.plot(x,z,'b',label='x vs z')
plt.title("This is my graph")
plt.xlabel("value of x")
plt.ylabel("value of y")
plt.show()
plt.figure(figsize=(12,5))
plt.scatter(x,y)
countries = data.Geography.unique()
t = [ft,st,gt]
l = [fl,sl,gl]
plt.figure(figsize = (12,8))
plt.pie(t,labels=countries)
plt.show()