python数据分析pandas基础
from pandas import Series,DataFrame import pandas as pd
In [4]: obj = Series([1, -2, 3, -4]) obj
Out[4]: 0 1 1 -2 2 3 3 -4 dtype: int64
In [5]: obj2 = Series([1, -2, 3, -4], index=["a", "b", "c", "d"]) obj2
Out[5]: a 1 b -2 c 3 d -4 dtype: int64
In [6]: obj2.values
Out[6]: array([ 1, -2, 3, -4], dtype=int64)
In [7]: obj2.index
Out[7]: Index(["a", "b", "c", "d"], dtype="object")
In [8]: obj2["b"]
Out[8]: -2
In [10]: obj2["c"] = 23 obj2[["c", "d"]]
Out[10]: c 23 d -4 dtype: int64
In [11]: obj2
Out[11]: a 1 b -2 c 23 d -4 dtype: int64
In [12]: obj2[obj2 < 0 ]
Out[12]: b -2 d -4 dtype: int64
In [13]: obj2 * 2
Out[13]: a 2 b -4 c 46 d -8 dtype: int64
In [16]: import numpy as np
In [18]: np.abs(obj2)
Out[18]: a 1 b 2 c 23 d 4 dtype: int64
In [20]: data = { "张三":92, "李四":78, "王五":68, "小明":82 }
In [21]: obj3 = Series(data) obj3
Out[21]: 小明 82 张三 92 李四 78 王五 68 dtype: int64
In [22]: names = ["张三", "李四", "王五", "小明"] obj4 = Series(data, index=names) obj4
Out[22]: 张三 92 李四 78 王五 68 小明 82 dtype: int64
In [23]: obj4.name = "math" obj4.index.name = "students"
In [24]: obj4
Out[24]: students 张三 92 李四 78 王五 68 小明 82 Name: math, dtype: int64dataframe
In [1]: import numpy as np from pandas import Series,DataFrame import pandas as pd
In [2]: data = { "name":["张三", "李四", "王五", "小明"], "sex":["female", "female", "male", "male"], "year":[2001, 2001, 2003, 2002], "city":["北京", "上海", "广州", "北京"] } df = DataFrame(data) df
Out[2]:
city
name
sex
year
0
北京
张三
female
2001
1
上海
李四
female
2001
2
广州
王五
male
2003
3
北京
小明
male
2002
In [3]: df = DataFrame(data, columns=["name", "sex", "year", "city"]) df
Out[3]:
name
sex
year
city
0
张三
female
2001
北京
1
李四
female
2001
上海
2
王五
male
2003
广州
3
小明
male
2002
北京
In [4]: df = DataFrame(data, columns=["name", "sex", "year", "city"],index=["a", "b", "c", "d"]) df
Out[4]:
name
sex
year
city
a
张三
female
2001
北京
b
李四
female
2001
上海
c
王五
male
2003
广州
d
小明
male
2002
北京
In [5]: df.index
Out[5]: Index(["a", "b", "c", "d"], dtype="object")
In [6]: df.columns
Out[6]: Index(["name", "sex", "year", "city"], dtype="object")
In [7]: data2 = { "sex":{"张三":"female","李四":"female","王五":"male"}, "city":{"张三":"北京","李四":"上海","王五":"广州"} } df2 = DataFrame(data2) df2
Out[7]:
city
sex
张三
北京
female
李四
上海
female
王五
广州
male
In [8]: df.index.name = "id" df.columns.name = "std_info"
In [9]: df
Out[9]:
std_info
name
sex
year
city
id
a
张三
female
2001
北京
b
李四
female
2001
上海
c
王五
male
2003
广州
d
小明
male
2002
北京
In [10]: obj = Series([1, -2, 3, -4], index=["a", "b", "c", "d"]) obj
Out[10]: a 1 b -2 c 3 d -4 dtype: int64
In [11]: obj.index
Out[11]: Index(["a", "b", "c", "d"], dtype="object")
In [12]: df.index
Out[12]: Index(["a", "b", "c", "d"], dtype="object", name="id")
In [13]: df.columns
Out[13]: Index(["name", "sex", "year", "city"], dtype="object", name="std_info")
In [14]: index = obj.index index[1] = "f" --------------------------------------------------------------------------- TypeError Traceback (most recent call last) in () 1 index = obj.index ----> 2 index[1] = "f" F:Anacondaenvsdata-analysislibsite-packagespandascoreindexesbase.py in __setitem__(self, key, value) 1668 1669 def __setitem__(self, key, value): -> 1670 raise TypeError("Index does not support mutable operations") 1671 1672 def __getitem__(self, key): TypeError: Index does not support mutable operations
In [15]: df
Out[15]:
std_info
name
sex
year
city
id
a
张三
female
2001
北京
b
李四
female
2001
上海
c
王五
male
2003
广州
d
小明
male
2002
北京
In [16]: "sex" in df.columns
Out[16]: True
In [17]: "f" in df.index
Out[17]: False
In [20]: obj = Series([1, -2, 3, -4], index=["b", "a", "c", "d"]) obj
Out[20]: b 1 a -2 c 3 d -4 dtype: int64
In [21]: obj2 = obj.reindex(["a", "b", "c", "d", "e"]) obj2
Out[21]: a -2.0 b 1.0 c 3.0 d -4.0 e NaN dtype: float64
In [27]: obj = Series([1, -2, 3, -4], index=[0,2,3,5]) obj
Out[27]: 0 1 2 -2 3 3 5 -4 dtype: int64
In [28]: obj2 = obj.reindex(range(6),method="ffill") obj2
Out[28]: 0 1 1 1 2 -2 3 3 4 3 5 -4 dtype: int64
In [29]: df = DataFrame(np.arange(9).reshape(3,3),index=["a","c","d"],columns=["name","id","sex"]) df
Out[29]:
name
id
sex
a
0
1
2
c
3
4
5
d
6
7
8
In [30]: df2 = df.reindex(["a", "b", "c", "d"]) df2
Out[30]:
name
id
sex
a
0.0
1.0
2.0
b
NaN
NaN
NaN
c
3.0
4.0
5.0
d
6.0
7.0
8.0
In [31]: df3 = df.reindex(columns=["name", "year", "id"], fill_value=0) df3
Out[31]:
name
year
id
a
0
0
1
c
3
0
4
d
6
0
7
In [49]: data = { "name":["张三", "李四", "王五", "小明"], "grade":[68, 78, 63, 92] } df = DataFrame(data) df
Out[49]:
grade
name
0
68
张三
1
78
李四
2
63
王五
3
92
小明
In [50]: df2 = df.sort_values(by="grade") df2
Out[50]:
grade
name
2
63
王五
0
68
张三
1
78
李四
3
92
小明
In [51]: df3 = df2.reset_index() df3
Out[51]:
index
grade
name
0
2
63
王五
1
0
68
张三
2
1
78
李四
3
3
92
小明
In [52]: df4 = df2.reset_index(drop=True) df4
Out[52]:
grade
name
0
63
王五
1
68
张三
2
78
李四
3
92
小明
In [45]: data = { "name":["张三", "李四", "王五", "小明"], "sex":["female", "female", "male", "male"], "year":[2001, 2001, 2003, 2002], "city":["北京", "上海", "广州", "北京"] } df = DataFrame(data) df
Out[45]:
city
name
sex
year
0
北京
张三
female
2001
1
上海
李四
female
2001
2
广州
王五
male
2003
3
北京
小明
male
2002
In [47]: df2 = df.set_index("name") df2
Out[47]:
city
sex
year
name
张三
北京
female
2001
李四
上海
female
2001
王五
广州
male
2003
小明
北京
male
2002
In [48]: df3 = df2.reset_index() df3
Out[48]:
name
city
sex
year
0
张三
北京
female
2001
1
李四
上海
female
2001
2
王五
广州
male
2003
3
小明
北京
male
2002 索引和选取
In [1]: import numpy as np from pandas import Series,DataFrame import pandas as pd
In [3]: obj = Series([1, -2, 3, -4], index=["a", "b", "c", "d"]) obj
Out[3]: a 1 b -2 c 3 d -4 dtype: int64
In [4]: obj[1]
Out[4]: -2
In [5]: obj["b"]
Out[5]: -2
In [6]: obj[["a","c"]]
Out[6]: a 1 c 3 dtype: int64
In [7]: obj[0:2]
Out[7]: a 1 b -2 dtype: int64
In [8]: obj["a":"c"]
Out[8]: a 1 b -2 c 3 dtype: int64
In [53]: data = { "name":["张三", "李四", "王五", "小明"], "sex":["female", "female", "male", "male"], "year":[2001, 2001, 2003, 2002], "city":["北京", "上海", "广州", "北京"] } df = DataFrame(data) df
Out[53]:
city
name
sex
year
0
北京
张三
female
2001
1
上海
李四
female
2001
2
广州
王五
male
2003
3
北京
小明
male
2002
In [17]: df["city"]
Out[17]: 0 北京 1 上海 2 广州 3 北京 Name: city, dtype: object
In [18]: df.name
Out[18]: 0 张三 1 李四 2 王五 3 小明 Name: name, dtype: object
In [20]: df[["city","sex"]]
Out[20]:
city
sex
0
北京
female
1
上海
female
2
广州
male
3
北京
male
In [26]: df2 = df.set_index("name") df2
Out[26]:
city
sex
year
name
张三
北京
female
2001
李四
上海
female
2001
王五
广州
male
2003
小明
北京
male
2002
In [27]: df2[0:2]
Out[27]:
city
sex
year
name
张三
北京
female
2001
李四
上海
female
2001
In [28]: df2["李四":"王五"]
Out[28]:
city
sex
year
name
李四
上海
female
2001
王五
广州
male
2003
In [29]: df2
Out[29]:
city
sex
year
name
张三
北京
female
2001
李四
上海
female
2001
王五
广州
male
2003
小明
北京
male
2002
In [31]: df2.loc["张三"]
Out[31]: city 北京 sex female year 2001 Name: 张三, dtype: object
In [33]: df2.loc[["张三","王五"]]
Out[33]:
city
sex
year
name
张三
北京
female
2001
王五
广州
male
2003
In [35]: df2.iloc[1]
Out[35]: city 上海 sex female year 2001 Name: 李四, dtype: object
In [36]: df2.iloc[[1,3]]
Out[36]:
city
sex
year
name
李四
上海
female
2001
小明
北京
male
2002
In [41]: df2.ix[["张三","王五"],0:2]
Out[41]:
city
sex
name
张三
北京
female
王五
广州
male
In [75]: pd.set_option("mode.chained_assignment",None)
In [43]: df2.ix[:,["sex","year"]] #获取列
Out[43]:
sex
year
name
张三
female
2001
李四
female
2001
王五
male
2003
小明
male
2002
In [44]: df2.ix[[1,3],:] #获取行
Out[44]:
city
sex
year
name
李四
上海
female
2001
小明
北京
male
2002
In [45]: df2["sex"] == "female"
Out[45]: name 张三 True 李四 True 王五 False 小明 False Name: sex, dtype: bool
In [46]: df2[df2["sex"] == "female"]
Out[46]:
city
sex
year
name
张三
北京
female
2001
李四
上海
female
2001
In [48]: df2[(df2["sex"] == "female") & (df2["city"] == "北京")]
Out[48]:
city
sex
year
name
张三
北京
female
2001 行和列的操作
In [54]: df
Out[54]:
city
name
sex
year
0
北京
张三
female
2001
1
上海
李四
female
2001
2
广州
王五
male
2003
3
北京
小明
male
2002
In [57]: new_data = { "city":"武汉", "name":"小李", "sex":"male", "year":2002 }
In [59]: df = df.append(new_data,ignore_index=True) #忽略索引值 df
Out[59]:
city
name
sex
year
0
北京
张三
female
2001
1
上海
李四
female
2001
2
广州
王五
male
2003
3
北京
小明
male
2002
4
武汉
小李
male
2002
In [60]: df["class"] = 2018 df
Out[60]:
city
name
sex
year
class
0
北京
张三
female
2001
2018
1
上海
李四
female
2001
2018
2
广州
王五
male
2003
2018
3
北京
小明
male
2002
2018
4
武汉
小李
male
2002
2018
In [61]: df["math"] = [92,78,58,69,82] df
Out[61]:
city
name
sex
year
class
math
0
北京
张三
female
2001
2018
92
1
上海
李四
female
2001
2018
78
2
广州
王五
male
2003
2018
58
3
北京
小明
male
2002
2018
69
4
武汉
小李
male
2002
2018
82
In [63]: new_df = df.drop(2) #删除行 new_df
Out[63]:
city
name
sex
year
class
math
0
北京
张三
female
2001
2018
92
1
上海
李四
female
2001
2018
78
3
北京
小明
male
2002
2018
69
4
武汉
小李
male
2002
2018
82
In [64]: new_df = new_df.drop("class",axis=1) #删除列 new_df
Out[64]:
city
name
sex
year
math
0
北京
张三
female
2001
92
1
上海
李四
female
2001
78
3
北京
小明
male
2002
69
4
武汉
小李
male
2002
82
In [65]: new_df.rename(index={3:2,4:3},columns={"math":"Math"},inplace=True) #inplace可在原数据上修改 new_df
Out[65]:
city
name
sex
year
Math
0
北京
张三
female
2001
92
1
上海
李四
female
2001
78
2
北京
小明
male
2002
69
3
武汉
小李
male
2002
82
In [67]: obj1 = Series([3.2,5.3,-4.4,-3.7],index=["a","c","g","f"]) obj1
Out[67]: a 3.2 c 5.3 g -4.4 f -3.7 dtype: float64
In [68]: obj2 = Series([5.0,-2,4.4,3.4],index=["a","b","c","d"]) obj2
Out[68]: a 5.0 b -2.0 c 4.4 d 3.4 dtype: float64
In [69]: obj1 + obj2
Out[69]: a 8.2 b NaN c 9.7 d NaN f NaN g NaN dtype: float64
In [70]: df1 = DataFrame(np.arange(9).reshape(3,3),columns=["a","b","c"], index=["apple","tea","banana"]) df1
Out[70]:
a
b
c
apple
0
1
2
tea
3
4
5
banana
6
7
8
In [71]: df2 = DataFrame(np.arange(9).reshape(3,3),columns=["a","b","d"], index=["apple","tea","coco"]) df2
Out[71]:
a
b
d
apple
0
1
2
tea
3
4
5
coco
6
7
8
In [72]: df1 + df2
Out[72]:
a
b
c
d
apple
0.0
2.0
NaN
NaN
banana
NaN
NaN
NaN
NaN
coco
NaN
NaN
NaN
NaN
tea
6.0
8.0
NaN
NaN
In [73]: df1
Out[73]:
a
b
c
apple
0
1
2
tea
3
4
5
banana
6
7
8
In [76]: s = df1.ix["apple"] s
Out[76]: a 0 b 1 c 2 Name: apple, dtype: int32
In [77]: df1 - s
Out[77]:
a
b
c
apple
0
0
0
tea
3
3
3
banana
6
6
6
In [78]: data = { "fruit":["apple", "orange", "grape", "banana"], "price":["25元", "42元", "35元", "14元"] } df1 = DataFrame(data) df1
Out[78]:
fruit
price
0
apple
25元
1
orange
42元
2
grape
35元
3
banana
14元
In [79]: def f(x): return x.split("元")[0] df1["price"] = df1["price"].map(f) df1
Out[79]:
fruit
price
0
apple
25
1
orange
42
2
grape
35
3
banana
14
In [80]: df2 = DataFrame(np.random.randn(3,3),columns=["a","b","c"],index=["app","win","mac"]) df2
Out[80]:
a
b
c
app
1.507962
-2.140018
0.053571
win
0.729671
0.207060
0.397773
mac
-0.191497
-0.765726
-0.266327
In [81]: f = lambda x:x.max()-x.min() df2.apply(f)
Out[81]: a 1.699460 b 2.347079 c 0.664100 dtype: float64
In [82]: df2
Out[82]:
a
b
c
app
1.507962
-2.140018
0.053571
win
0.729671
0.207060
0.397773
mac
-0.191497
-0.765726
-0.266327
In [84]: df2.applymap(lambda x:"%.2f"%x)
Out[84]:
a
b
c
app
1.51
-2.14
0.05
win
0.73
0.21
0.40
mac
-0.19
-0.77
-0.27
In [86]: obj1 = Series([-2,3,2,1],index=["b","a","d","c"]) obj1
Out[86]: b -2 a 3 d 2 c 1 dtype: int64
In [87]: obj1.sort_index() #升序
Out[87]: a 3 b -2 c 1 d 2 dtype: int64
In [88]: obj1.sort_index(ascending=False) #降序
Out[88]: d 2 c 1 b -2 a 3 dtype: int64
In [91]: obj1.sort_values()
Out[91]: b -2 c 1 d 2 a 3 dtype: int64
In [92]: df2
Out[92]:
a
b
c
app
1.507962
-2.140018
0.053571
win
0.729671
0.207060
0.397773
mac
-0.191497
-0.765726
-0.266327
In [93]: df2.sort_values(by="b")
Out[93]:
a
b
c
app
1.507962
-2.140018
0.053571
mac
-0.191497
-0.765726
-0.266327
win
0.729671
0.207060
0.397773
In [2]: df = DataFrame(np.random.randn(9).reshape(3,3),columns=["a","b","c"]) df
Out[2]:
a
b
c
0
0.660215
-1.137716
-0.302954
1
1.496589
-0.768645
-2.091506
2
0.170316
-2.682284
-0.041099
In [3]: df.sum()
Out[3]: a 2.327120 b -4.588645 c -2.435558 dtype: float64
In [4]: df.sum(axis=1)
Out[4]: 0 -0.780455 1 -1.363562 2 -2.553067 dtype: float64
In [5]: data = { "name":["张三", "李四", "王五", "小明"], "sex":["female", "female", "male", "male"], "math":[78, 79, 83, 92], "city":["北京", "上海", "广州", "北京"] } df = DataFrame(data) df
Out[5]:
city
math
name
sex
0
北京
78
张三
female
1
上海
79
李四
female
2
广州
83
王五
male
3
北京
92
小明
male
In [6]: df.describe()
Out[6]:
math
count
4.000000
mean
83.000000
std
6.377042
min
78.000000
25%
78.750000
50%
81.000000
75%
85.250000
max
92.000000
In [7]: obj = Series(["a","b","a","c","b"]) obj
Out[7]: 0 a 1 b 2 a 3 c 4 b dtype: object
In [8]: obj.unique()
Out[8]: array(["a", "b", "c"], dtype=object)
In [9]: obj.value_counts()
Out[9]: a 2 b 2 c 1 dtype: int64
In [11]: obj = Series(np.random.randn(9), index=[["one","one","one","two","two","two","three","three","three"], ["a","b","c","a","b","c","a","b","c"]]) obj
Out[11]: one a 0.697195 b -0.887408 c 0.451851 two a 0.390779 b -2.058070 c 0.760594 three a -0.305534 b -0.720491 c -0.259225 dtype: float64
In [12]: obj.index
Out[12]: MultiIndex(levels=[["one", "three", "two"], ["a", "b", "c"]], labels=[[0, 0, 0, 2, 2, 2, 1, 1, 1], [0, 1, 2, 0, 1, 2, 0, 1, 2]])
In [13]: obj["two"]
Out[13]: a 0.390779 b -2.058070 c 0.760594 dtype: float64
In [15]: obj[:,"a"] #内层选取
Out[15]: one 0.697195 two 0.390779 three -0.305534 dtype: float64
In [16]: df = DataFrame(np.arange(16).reshape(4,4), index=[["one","one","two","two"],["a","b","a","b"]], columns=[["apple","apple","orange","orange"],["red","green","red","green"]]) df
Out[16]:
apple
orange
red
green
red
green
one
a
0
1
2
3
b
4
5
6
7
two
a
8
9
10
11
b
12
13
14
15
In [17]: df["apple"]
Out[17]:
red
green
one
a
0
1
b
4
5
two
a
8
9
b
12
13
In [18]: df.swaplevel(0,1)
Out[18]:
apple
orange
red
green
red
green
a
one
0
1
2
3
b
one
4
5
6
7
a
two
8
9
10
11
b
two
12
13
14
15
In [19]: df.sum(level=0)
Out[19]:
apple
orange
red
green
red
green
one
4
6
8
10
two
20
22
24
26
In [20]: df.sum(level=1,axis=1)
Out[20]:
green
red
one
a
4
2
b
12
10
two
a
20
18
b
28
26 pandas数据可视化
In [6]: import numpy as np from pandas import Series,DataFrame import pandas as pd import matplotlib as mpl import matplotlib.pyplot as plt #导入matplotlib库 %matplotlib inline #魔法函数
In [7]: s = Series(np.random.normal(size=10)) s
Out[7]: 0 -0.468142 1 -1.408927 2 -0.182548 3 -0.043023 4 0.121437 5 0.539194 6 0.011423 7 -0.938207 8 1.589460 9 0.460753 dtype: float64
In [8]: s.plot()
Out[8]:
In [10]: df = DataFrame({"normal": np.random.normal(size=100), "gamma": np.random.gamma(1, size=100), "poisson": np.random.poisson(size=100)}) df.cumsum()
Out[10]:
gamma
normal
poisson
0
1.804045
1.788000
0.0
1
1.835715
0.089426
0.0
2
3.850210
0.870177
0.0
3
6.082898
0.902761
0.0
4
8.837446
0.959945
1.0
5
9.307126
1.658268
3.0
6
9.518029
3.118419
6.0
7
9.758011
3.861418
6.0
8
10.481856
3.405625
6.0
9
12.405202
4.892910
7.0
10
13.086167
4.776206
7.0
11
13.457807
3.217277
8.0
12
13.574663
1.821368
9.0
13
13.695523
2.829581
10.0
14
13.819044
3.015490
11.0
15
15.801080
2.629254
13.0
16
17.043867
2.052196
14.0
17
17.089774
3.687834
15.0
18
17.499338
2.635491
16.0
19
18.257891
2.636466
18.0
20
19.101743
2.272298
19.0
21
24.158020
-0.113947
20.0
22
25.112218
-0.594266
23.0
23
25.986628
-1.326405
23.0
24
28.383365
-1.349211
23.0
25
28.753694
-1.527589
23.0
26
28.908734
-1.312111
25.0
27
30.607696
0.228251
26.0
28
31.081009
1.067429
27.0
29
31.330353
1.098605
28.0
...
...
...
...
70
72.302929
14.123995
66.0
71
72.794689
14.860449
67.0
72
73.629651
14.828726
67.0
73
74.610837
14.168664
68.0
74
78.773897
13.334949
70.0
75
80.916582
13.722037
71.0
76
81.994526
14.717187
72.0
77
83.927355
13.784763
72.0
78
86.004903
13.343261
75.0
79
86.609627
12.151334
75.0
80
87.199249
13.345584
77.0
81
87.213180
12.311815
77.0
82
87.553190
13.864232
77.0
83
89.157662
14.439016
78.0
84
89.213456
14.401503
80.0
85
89.471336
15.838362
81.0
86
89.552332
14.406933
81.0
87
91.565291
14.520602
82.0
88
94.179919
12.017739
82.0
89
95.075841
13.279973
83.0
90
95.192719
13.089789
83.0
91
96.148316
12.268122
84.0
92
97.146898
11.830559
84.0
93
97.456375
13.035484
86.0
94
99.877122
11.966609
87.0
95
103.015620
12.313341
88.0
96
103.116648
12.715195
88.0
97
103.490265
12.168645
89.0
98
103.925893
11.502630
89.0
99
105.008619
11.193637
89.0
100 rows 3 columns
In [11]: df.cumsum().plot()
Out[11]:
In [12]: data = { "name":["张三", "李四", "王五", "小明", "Peter"], "sex":["female", "female", "male", "male","male"], "year":[2001, 2001, 2003, 2002, 2002], "city":["北京", "上海", "广州", "北京", "北京"] } df = DataFrame(data) df
Out[12]:
city
name
sex
year
0
北京
张三
female
2001
1
上海
李四
female
2001
2
广州
王五
male
2003
3
北京
小明
male
2002
4
北京
Peter
male
2002
In [14]: df["sex"].value_counts()
Out[14]: male 3 female 2 Name: sex, dtype: int64
In [16]: df["sex"].value_counts().plot(kind="bar")
Out[16]:
In [18]: df2 = DataFrame(np.random.randint(0,100,size=(3,3)), index=("one","two","three"), columns = ["A","B","C"]) df2
Out[18]:
A
B
C
one
29
5
88
two
35
42
43
three
87
85
76
In [19]: df2.plot(kind="barh")
Out[19]:
In [20]: df2.plot(kind="barh",stacked=True,alpha=0.5)
Out[20]:
In [28]: s = Series(np.random.normal(size=100)) s.hist(bins=20,grid=False)
Out[28]:
In [29]: s.plot(kind="kde")
Out[29]:
In [31]: df3 = DataFrame(np.arange(10),columns=["X"]) df3["Y"] = 2 * df3["X"] + 5 df3
Out[31]:
X
Y
0
0
5
1
1
7
2
2
9
3
3
11
4
4
13
5
5
15
6
6
17
7
7
19
8
8
21
9
9
23
In [34]: df3.plot(kind="scatter",x="X",y="Y")
Out[34]:
In [51]: import numpy as np from pandas import Series,DataFrame import pandas as pd import seaborn as sns #导入seaborn库
In [52]: tips=sns.load_dataset("tips") tips.head()
Out[52]:
total_bill
tip
sex
smoker
day
time
size
0
16.99
1.01
Female
No
Sun
Dinner
2
1
10.34
1.66
Male
No
Sun
Dinner
3
2
21.01
3.50
Male
No
Sun
Dinner
3
3
23.68
3.31
Male
No
Sun
Dinner
2
4
24.59
3.61
Female
No
Sun
Dinner
4
In [54]: tips.shape
Out[54]: (244, 7)
In [55]: tips.describe()
Out[55]:
total_bill
tip
size
count
244.000000
244.000000
244.000000
mean
19.785943
2.998279
2.569672
std
8.902412
1.383638
0.951100
min
3.070000
1.000000
1.000000
25%
13.347500
2.000000
2.000000
50%
17.795000
2.900000
2.000000
75%
24.127500
3.562500
3.000000
max
50.810000
10.000000
6.000000
In [56]: tips.info() RangeIndex: 244 entries, 0 to 243 Data columns (total 7 columns): total_bill 244 non-null float64 tip 244 non-null float64 sex 244 non-null category smoker 244 non-null category day 244 non-null category time 244 non-null category size 244 non-null int64 dtypes: category(4), float64(2), int64(1) memory usage: 7.2 KB
In [57]: tips.plot(kind="scatter",x="total_bill",y="tip")
Out[57]:
In [62]: male_tip = tips[tips["sex"] == "Male"]["tip"].mean() male_tip
Out[62]: 3.0896178343949052
In [63]: female_tip = tips[tips["sex"] == "Female"]["tip"].mean() female_tip
Out[63]: 2.833448275862069
In [66]: s = Series([male_tip,female_tip],index=["male","female"]) s
Out[66]: male 3.089618 female 2.833448 dtype: float64
In [67]: s.plot(kind="bar")
Out[67]:
In [68]: tips["day"].unique()
Out[68]: [Sun, Sat, Thur, Fri] Categories (4, object): [Sun, Sat, Thur, Fri]
In [71]: sun_tip = tips[tips["day"] == "Sun"]["tip"].mean() sat_tip = tips[tips["day"] == "Sat"]["tip"].mean() thur_tip = tips[tips["day"] == "Thur"]["tip"].mean() fri_tip = tips[tips["day"] == "Fri"]["tip"].mean()
In [72]: s = Series([thur_tip,fri_tip,sat_tip,sun_tip],index=["Thur","Fri","Sat","Sun"]) s
Out[72]: Thur 2.771452 Fri 2.734737 Sat 2.993103 Sun 3.255132 dtype: float64
In [73]: s.plot(kind="bar")
Out[73]:
In [74]: tips["percent_tip"] = tips["tip"]/(tips["total_bill"]+tips["tip"]) tips.head(10)
Out[74]:
total_bill
tip
sex
smoker
day
time
size
percent_tip
0
16.99
1.01
Female
No
Sun
Dinner
2
0.056111
1
10.34
1.66
Male
No
Sun
Dinner
3
0.138333
2
21.01
3.50
Male
No
Sun
Dinner
3
0.142799
3
23.68
3.31
Male
No
Sun
Dinner
2
0.122638
4
24.59
3.61
Female
No
Sun
Dinner
4
0.128014
5
25.29
4.71
Male
No
Sun
Dinner
4
0.157000
6
8.77
2.00
Male
No
Sun
Dinner
2
0.185701
7
26.88
3.12
Male
No
Sun
Dinner
4
0.104000
8
15.04
1.96
Male
No
Sun
Dinner
2
0.115294
9
14.78
3.23
Male
No
Sun
Dinner
2
0.179345
In [76]: tips["percent_tip"].hist(bins=50)
Out[76]:
生鲜巨变每日优鲜叮咚买菜盒马分道扬镳?此前,在疫情影响下,人们出门购物频次减少,传统买菜模式也受到了一定的冲击。在此背景下,既能够解决人们买菜难题又能够减少人与人接触的生鲜电商,赢得了众多消费者的青睐。而随着大量用户涌
解锁全领域,处理你的麻烦事洛森M2,M3组合工具工具这种东西,喜欢的人会有收藏情结,可以在手中把玩不喜欢的就是一堆冷金属而已,用之即来,挥之即去。好的工具不仅制作精致,每处连接都是恰到好处,堪称艺术品,当然大部分男性都是有那么一
昆明旅行社推出多条旅游路线泰国6日游价格来源昆明日报掌上春城掌上春城讯目前曼谷直飞6日游价格为3680元人。昆明去哪儿旅行社门店工作人员说。1月31日,记者走访多家昆明本土旅行社了解到,近期有不少顾客咨询目前可以出行的国
三亚等热门旅游目的地机票价格回落,错峰出游正当时现代快报讯(记者刘伟娟文摄)2月1日,现代快报记者从多家航空公司及旅行平台获悉,春节假期过后,三亚昆明哈尔滨等热门旅游目的地机票价格回落,吸引了一批错峰游客流。由于寒假余额尚足,学
新春伊始,全国文旅市场迎来开门红新春伊始,全国文化和旅游市场迎来开门红,节后文化旅游市场继续保持热度。国家电影局统计数据显示,截至1月31日18时,中国电影2023年1月的总票房已超过100亿元人民币,创历年来1
中国裁判组将执法国际足联世俱杯揭幕战新华社日内瓦1月31日电国际足联31日在官网宣布,中国裁判员马宁周飞张铖傅明将执法2022摩洛哥世俱杯揭幕战。中国裁判员马宁。新华社记者孟鼎博摄揭幕战将在2月1日举行,对阵双方是埃
抖音超市,正式上线了零售板块,抖音再下一城!最近抖音悄悄在APP上线了抖音超市,用户只需要在搜索栏输入抖音超市即可进入购物页面。图源抖音APP以后在抖音,除了观看各大直播间,也能够直接下单购买各类日常
终于还是入了68键的坑,杜伽FUSION三模机械键盘上手体验熟悉香蕉哥的朋友应该都知道香蕉哥是一个机械键盘爱好者,之前也分享过数十篇的机械键盘使用经验。但是之前玩的比较多的,还是104按键全尺寸的机械键盘,今天要和大家分享的则是迷你小巧易收
胡桃or夜兰胡桃夜兰马上就要复刻了,大家准备是抽胡桃呢,还是夜兰呢?我的胡桃一命了,而且还配了一把专武,胡桃伤害打蒸发伤害真的爆表,我玩原神没冲过钱,肝!!!主要是没钱我想静静我想静静我想静静
女足世界杯揭幕战预计10万人观战,中国女足有望获得更大突破近期,国际足联秘书长萨穆拉表示,2023年女足世界杯开幕式将会吸引10万人观战。在举世瞩目的男足世界杯落下帷幕之后,女足世界杯同样值得足球爱好者期待。相对于男足来说,女足为中国球迷
诺亦腾携VTS虚拟直播套装构筑新武林ampampamp陈鸿宇乐世界科技与诗意相拥虚实与民谣邂逅1月13日,在轴厂(OXYZ3)出品与原创的虚拟音乐江湖新武林中,轴为独乐陈鸿宇打造的虚拟演出众纪元震撼上线,为观众带来一场虚实科技与诗意民谣的精彩碰撞