python数据分析pandas基础
frompandasimportSeries,DataFrameimportpandasaspd
In〔4〕:objSeries(〔1,2,3,4〕)obj
Out〔4〕:01122334dtype:int64
In〔5〕:obj2Series(〔1,2,3,4〕,index〔a,b,c,d〕)obj2
Out〔5〕:a1b2c3d4dtype:int64
In〔6〕:obj2。values
Out〔6〕:array(〔1,2,3,4〕,dtypeint64)
In〔7〕:obj2。index
Out〔7〕:Index(〔a,b,c,d〕,dtypeobject)
In〔8〕:obj2〔b〕
Out〔8〕:2
In〔10〕:obj2〔c〕23obj2〔〔c,d〕〕
Out〔10〕:c23d4dtype:int64
In〔11〕:obj2
Out〔11〕:a1b2c23d4dtype:int64
In〔12〕:obj2〔obj20〕
Out〔12〕:b2d4dtype:int64
In〔13〕:obj22
Out〔13〕:a2b4c46d8dtype:int64
In〔16〕:importnumpyasnp
In〔18〕:np。abs(obj2)
Out〔18〕:a1b2c23d4dtype:int64
In〔20〕:data{张三:92,李四:78,王五:68,小明:82}
In〔21〕:obj3Series(data)obj3
Out〔21〕:小明82张三92李四78王五68dtype:int64
In〔22〕:names〔张三,李四,王五,小明〕obj4Series(data,indexnames)obj4
Out〔22〕:张三92李四78王五68小明82dtype:int64
In〔23〕:obj4。namemathobj4。index。namestudents
In〔24〕:obj4
Out〔24〕:students张三92李四78王五68小明82Name:math,dtype:int64dataframe
In〔1〕:importnumpyasnpfrompandasimportSeries,DataFrameimportpandasaspd
In〔2〕:data{name:〔张三,李四,王五,小明〕,sex:〔female,female,male,male〕,year:〔2001,2001,2003,2002〕,city:〔北京,上海,广州,北京〕}dfDataFrame(data)df
Out〔2〕:
city
name
sex
year
0hr北京
张三
female
2001hr1hr上海
李四
female
2001hr2hr广州
王五
male
2003hr3hr北京
小明
male
2002hrIn〔3〕:dfDataFrame(data,columns〔name,sex,year,city〕)df
Out〔3〕:
name
sex
year
city
0hr张三
female
2001hr北京
1hr李四
female
2001hr上海
2hr王五
male
2003hr广州
3hr小明
male
2002hr北京
In〔4〕:dfDataFrame(data,columns〔name,sex,year,city〕,index〔a,b,c,d〕)df
Out〔4〕:
name
sex
year
city
a
张三
female
2001hr北京
b
李四
female
2001hr上海
c
王五
male
2003hr广州
d
小明
male
2002hr北京
In〔5〕:df。index
Out〔5〕:Index(〔a,b,c,d〕,dtypeobject)
In〔6〕:df。columns
Out〔6〕:Index(〔name,sex,year,city〕,dtypeobject)
In〔7〕:data2{sex:{张三:female,李四:female,王五:male},city:{张三:北京,李四:上海,王五:广州}}df2DataFrame(data2)df2
Out〔7〕:
city
sex
张三
北京
female
李四
上海
female
王五
广州
male
In〔8〕:df。index。nameiddf。columns。namestdinfo
In〔9〕:df
Out〔9〕:
stdinfo
name
sex
year
city
id
a
张三
female
2001hr北京
b
李四
female
2001hr上海
c
王五
male
2003hr广州
d
小明
male
2002hr北京
In〔10〕:objSeries(〔1,2,3,4〕,index〔a,b,c,d〕)obj
Out〔10〕:a1b2c3d4dtype:int64
In〔11〕:obj。index
Out〔11〕:Index(〔a,b,c,d〕,dtypeobject)
In〔12〕:df。index
Out〔12〕:Index(〔a,b,c,d〕,dtypeobject,nameid)
In〔13〕:df。columns
Out〔13〕:Index(〔name,sex,year,city〕,dtypeobject,namestdinfo)
In〔14〕:indexobj。indexindex〔1〕fTypeErrorTraceback(mostrecentcalllast)ipythoninput144f995da5e969inmodule()1indexobj。index2index〔1〕fF:Anacondaenvsdataanalysislibsitepackagespandascoreindexesbase。pyinsetitem(self,key,value)16681669defsetitem(self,key,value):1670raiseTypeError(Indexdoesnotsupportmutableoperations)16711672defgetitem(self,key):TypeError:Indexdoesnotsupportmutableoperations
In〔15〕:df
Out〔15〕:
stdinfo
name
sex
year
city
id
a
张三
female
2001hr北京
b
李四
female
2001hr上海
c
王五
male
2003hr广州
d
小明
male
2002hr北京
In〔16〕:sexindf。columns
Out〔16〕:True
In〔17〕:findf。index
Out〔17〕:False
In〔20〕:objSeries(〔1,2,3,4〕,index〔b,a,c,d〕)obj
Out〔20〕:b1a2c3d4dtype:int64
In〔21〕:obj2obj。reindex(〔a,b,c,d,e〕)obj2
Out〔21〕:a2。0b1。0c3。0d4。0eNaNdtype:float64
In〔27〕:objSeries(〔1,2,3,4〕,index〔0,2,3,5〕)obj
Out〔27〕:01223354dtype:int64
In〔28〕:obj2obj。reindex(range(6),methodffill)obj2
Out〔28〕:011122334354dtype:int64
In〔29〕:dfDataFrame(np。arange(9)。reshape(3,3),index〔a,c,d〕,columns〔name,id,sex〕)df
Out〔29〕:
name
id
sex
a
0hr1hr2hrc
3hr4hr5hrd
6hr7hr8hrIn〔30〕:df2df。reindex(〔a,b,c,d〕)df2
Out〔30〕:
name
id
sex
a
0。0
1。0
2。0
b
NaN
NaN
NaN
c
3。0
4。0
5。0
d
6。0
7。0
8。0
In〔31〕:df3df。reindex(columns〔name,year,id〕,fillvalue0)df3
Out〔31〕:
name
year
id
a
0hr0hr1hrc
3hr0hr4hrd
6hr0hr7hrIn〔49〕:data{name:〔张三,李四,王五,小明〕,grade:〔68,78,63,92〕}dfDataFrame(data)df
Out〔49〕:
grade
name
0hr68hr张三
1hr78hr李四
2hr63hr王五
3hr92hr小明
In〔50〕:df2df。sortvalues(bygrade)df2
Out〔50〕:
grade
name
2hr63hr王五
0hr68hr张三
1hr78hr李四
3hr92hr小明
In〔51〕:df3df2。resetindex()df3
Out〔51〕:
index
grade
name
0hr2hr63hr王五
1hr0hr68hr张三
2hr1hr78hr李四
3hr3hr92hr小明
In〔52〕:df4df2。resetindex(dropTrue)df4
Out〔52〕:
grade
name
0hr63hr王五
1hr68hr张三
2hr78hr李四
3hr92hr小明
In〔45〕:data{name:〔张三,李四,王五,小明〕,sex:〔female,female,male,male〕,year:〔2001,2001,2003,2002〕,city:〔北京,上海,广州,北京〕}dfDataFrame(data)df
Out〔45〕:
city
name
sex
year
0hr北京
张三
female
2001hr1hr上海
李四
female
2001hr2hr广州
王五
male
2003hr3hr北京
小明
male
2002hrIn〔47〕:df2df。setindex(name)df2
Out〔47〕:
city
sex
year
name
张三
北京
female
2001hr李四
上海
female
2001hr王五
广州
male
2003hr小明
北京
male
2002hrIn〔48〕:df3df2。resetindex()df3
Out〔48〕:
name
city
sex
year
0hr张三
北京
female
2001hr1hr李四
上海
female
2001hr2hr王五
广州
male
2003hr3hr小明
北京
male
2002索引和选取
In〔1〕:importnumpyasnpfrompandasimportSeries,DataFrameimportpandasaspd
In〔3〕:objSeries(〔1,2,3,4〕,index〔a,b,c,d〕)obj
Out〔3〕:a1b2c3d4dtype:int64
In〔4〕:obj〔1〕
Out〔4〕:2
In〔5〕:obj〔b〕
Out〔5〕:2
In〔6〕:obj〔〔a,c〕〕
Out〔6〕:a1c3dtype:int64
In〔7〕:obj〔0:2〕
Out〔7〕:a1b2dtype:int64
In〔8〕:obj〔a:c〕
Out〔8〕:a1b2c3dtype:int64
In〔53〕:data{name:〔张三,李四,王五,小明〕,sex:〔female,female,male,male〕,year:〔2001,2001,2003,2002〕,city:〔北京,上海,广州,北京〕}dfDataFrame(data)df
Out〔53〕:
city
name
sex
year
0hr北京
张三
female
2001hr1hr上海
李四
female
2001hr2hr广州
王五
male
2003hr3hr北京
小明
male
2002hrIn〔17〕:df〔city〕
Out〔17〕:0北京1上海2广州3北京Name:city,dtype:object
In〔18〕:df。name
Out〔18〕:0张三1李四2王五3小明Name:name,dtype:object
In〔20〕:df〔〔city,sex〕〕
Out〔20〕:
city
sex
0hr北京
female
1hr上海
female
2hr广州
male
3hr北京
male
In〔26〕:df2df。setindex(name)df2
Out〔26〕:
city
sex
year
name
张三
北京
female
2001hr李四
上海
female
2001hr王五
广州
male
2003hr小明
北京
male
2002hrIn〔27〕:df2〔0:2〕
Out〔27〕:
city
sex
year
name
张三
北京
female
2001hr李四
上海
female
2001hrIn〔28〕:df2〔李四:王五〕
Out〔28〕:
city
sex
year
name
李四
上海
female
2001hr王五
广州
male
2003hrIn〔29〕:df2
Out〔29〕:
city
sex
year
name
张三
北京
female
2001hr李四
上海
female
2001hr王五
广州
male
2003hr小明
北京
male
2002hrIn〔31〕:df2。loc〔张三〕
Out〔31〕:city北京sexfemaleyear2001Name:张三,dtype:object
In〔33〕:df2。loc〔〔张三,王五〕〕
Out〔33〕:
city
sex
year
name
张三
北京
female
2001hr王五
广州
male
2003hrIn〔35〕:df2。iloc〔1〕
Out〔35〕:city上海sexfemaleyear2001Name:李四,dtype:object
In〔36〕:df2。iloc〔〔1,3〕〕
Out〔36〕:
city
sex
year
name
李四
上海
female
2001hr小明
北京
male
2002hrIn〔41〕:df2。ix〔〔张三,王五〕,0:2〕
Out〔41〕:
city
sex
name
张三
北京
female
王五
广州
male
In〔75〕:pd。setoption(mode。chainedassignment,None)
In〔43〕:df2。ix〔:,〔sex,year〕〕获取列
Out〔43〕:
sex
year
name
张三
female
2001hr李四
female
2001hr王五
male
2003hr小明
male
2002hrIn〔44〕:df2。ix〔〔1,3〕,:〕获取行
Out〔44〕:
city
sex
year
name
李四
上海
female
2001hr小明
北京
male
2002hrIn〔45〕:df2〔sex〕female
Out〔45〕:name张三True李四True王五False小明FalseName:sex,dtype:bool
In〔46〕:df2〔df2〔sex〕female〕
Out〔46〕:
city
sex
year
name
张三
北京
female
2001hr李四
上海
female
2001hrIn〔48〕:df2〔(df2〔sex〕female)(df2〔city〕北京)〕
Out〔48〕:
city
sex
year
name
张三
北京
female
2001行和列的操作
In〔54〕:df
Out〔54〕:
city
name
sex
year
0hr北京
张三
female
2001hr1hr上海
李四
female
2001hr2hr广州
王五
male
2003hr3hr北京
小明
male
2002hrIn〔57〕:newdata{city:武汉,name:小李,sex:male,year:2002}
In〔59〕:dfdf。append(newdata,ignoreindexTrue)忽略索引值df
Out〔59〕:
city
name
sex
year
0hr北京
张三
female
2001hr1hr上海
李四
female
2001hr2hr广州
王五
male
2003hr3hr北京
小明
male
2002hr4hr武汉
小李
male
2002hrIn〔60〕:df〔class〕2018df
Out〔60〕:
city
name
sex
year
class
0hr北京
张三
female
2001hr2018hr1hr上海
李四
female
2001hr2018hr2hr广州
王五
male
2003hr2018hr3hr北京
小明
male
2002hr2018hr4hr武汉
小李
male
2002hr2018hrIn〔61〕:df〔math〕〔92,78,58,69,82〕df
Out〔61〕:
city
name
sex
year
class
math
0hr北京
张三
female
2001hr2018hr92hr1hr上海
李四
female
2001hr2018hr78hr2hr广州
王五
male
2003hr2018hr58hr3hr北京
小明
male
2002hr2018hr69hr4hr武汉
小李
male
2002hr2018hr82hrIn〔63〕:newdfdf。drop(2)删除行newdf
Out〔63〕:
city
name
sex
year
class
math
0hr北京
张三
female
2001hr2018hr92hr1hr上海
李四
female
2001hr2018hr78hr3hr北京
小明
male
2002hr2018hr69hr4hr武汉
小李
male
2002hr2018hr82hrIn〔64〕:newdfnewdf。drop(class,axis1)删除列newdf
Out〔64〕:
city
name
sex
year
math
0hr北京
张三
female
2001hr92hr1hr上海
李四
female
2001hr78hr3hr北京
小明
male
2002hr69hr4hr武汉
小李
male
2002hr82hrIn〔65〕:newdf。rename(index{3:2,4:3},columns{math:Math},inplaceTrue)inplace可在原数据上修改newdf
Out〔65〕:
city
name
sex
year
Math
0hr北京
张三
female
2001hr92hr1hr上海
李四
female
2001hr78hr2hr北京
小明
male
2002hr69hr3hr武汉
小李
male
2002hr82hrIn〔67〕:obj1Series(〔3。2,5。3,4。4,3。7〕,index〔a,c,g,f〕)obj1
Out〔67〕:a3。2c5。3g4。4f3。7dtype:float64
In〔68〕:obj2Series(〔5。0,2,4。4,3。4〕,index〔a,b,c,d〕)obj2
Out〔68〕:a5。0b2。0c4。4d3。4dtype:float64
In〔69〕:obj1obj2
Out〔69〕:a8。2bNaNc9。7dNaNfNaNgNaNdtype:float64
In〔70〕:df1DataFrame(np。arange(9)。reshape(3,3),columns〔a,b,c〕,index〔apple,tea,banana〕)df1
Out〔70〕:
a
b
c
apple
0hr1hr2hrtea
3hr4hr5hrbanana
6hr7hr8hrIn〔71〕:df2DataFrame(np。arange(9)。reshape(3,3),columns〔a,b,d〕,index〔apple,tea,coco〕)df2
Out〔71〕:
a
b
d
apple
0hr1hr2hrtea
3hr4hr5hrcoco
6hr7hr8hrIn〔72〕:df1df2
Out〔72〕:
a
b
c
d
apple
0。0
2。0
NaN
NaN
banana
NaN
NaN
NaN
NaN
coco
NaN
NaN
NaN
NaN
tea
6。0
8。0
NaN
NaN
In〔73〕:df1
Out〔73〕:
a
b
c
apple
0hr1hr2hrtea
3hr4hr5hrbanana
6hr7hr8hrIn〔76〕:sdf1。ix〔apple〕s
Out〔76〕:a0b1c2Name:apple,dtype:int32
In〔77〕:df1s
Out〔77〕:
a
b
c
apple
0hr0hr0hrtea
3hr3hr3hrbanana
6hr6hr6hrIn〔78〕:data{fruit:〔apple,orange,grape,banana〕,price:〔25元,42元,35元,14元〕}df1DataFrame(data)df1
Out〔78〕:
fruit
price
0hrapple
25元
1hrorange
42元
2hrgrape
35元
3hrbanana
14元
In〔79〕:deff(x):returnx。split(元)〔0〕df1〔price〕df1〔price〕。map(f)df1
Out〔79〕:
fruit
price
0hrapple
25hr1hrorange
42hr2hrgrape
35hr3hrbanana
14hrIn〔80〕:df2DataFrame(np。random。randn(3,3),columns〔a,b,c〕,index〔app,win,mac〕)df2
Out〔80〕:
a
b
c
app
1。507962
2。140018
0。053571
win
0。729671
0。207060
0。397773
mac
0。191497
0。765726
0。266327
In〔81〕:flambdax:x。max()x。min()df2。apply(f)
Out〔81〕:a1。699460b2。347079c0。664100dtype:float64
In〔82〕:df2
Out〔82〕:
a
b
c
app
1。507962
2。140018
0。053571
win
0。729671
0。207060
0。397773
mac
0。191497
0。765726
0。266327
In〔84〕:df2。applymap(lambdax:。2fx)
Out〔84〕:
a
b
c
app
1。51
2。14
0。05
win
0。73
0。21
0。40
mac
0。19
0。77
0。27
In〔86〕:obj1Series(〔2,3,2,1〕,index〔b,a,d,c〕)obj1
Out〔86〕:b2a3d2c1dtype:int64
In〔87〕:obj1。sortindex()升序
Out〔87〕:a3b2c1d2dtype:int64
In〔88〕:obj1。sortindex(ascendingFalse)降序
Out〔88〕:d2c1b2a3dtype:int64
In〔91〕:obj1。sortvalues()
Out〔91〕:b2c1d2a3dtype:int64
In〔92〕:df2
Out〔92〕:
a
b
c
app
1。507962
2。140018
0。053571
win
0。729671
0。207060
0。397773
mac
0。191497
0。765726
0。266327
In〔93〕:df2。sortvalues(byb)
Out〔93〕:
a
b
c
app
1。507962
2。140018
0。053571
mac
0。191497
0。765726
0。266327
win
0。729671
0。207060
0。397773
In〔2〕:dfDataFrame(np。random。randn(9)。reshape(3,3),columns〔a,b,c〕)df
Out〔2〕:
a
b
c
0hr0。660215
1。137716
0。302954
1hr1。496589
0。768645
2。091506
2hr0。170316
2。682284
0。041099
In〔3〕:df。sum()
Out〔3〕:a2。327120b4。588645c2。435558dtype:float64
In〔4〕:df。sum(axis1)
Out〔4〕:00。78045511。36356222。553067dtype:float64
In〔5〕:data{name:〔张三,李四,王五,小明〕,sex:〔female,female,male,male〕,math:〔78,79,83,92〕,city:〔北京,上海,广州,北京〕}dfDataFrame(data)df
Out〔5〕:
city
math
name
sex
0hr北京
78hr张三
female
1hr上海
79hr李四
female
2hr广州
83hr王五
male
3hr北京
92hr小明
male
In〔6〕:df。describe()
Out〔6〕:
math
count
4。000000
mean
83。000000
std
6。377042
min
78。000000
25
78。750000
50
81。000000
75
85。250000
max
92。000000
In〔7〕:objSeries(〔a,b,a,c,b〕)obj
Out〔7〕:0a1b2a3c4bdtype:object
In〔8〕:obj。unique()
Out〔8〕:array(〔a,b,c〕,dtypeobject)
In〔9〕:obj。valuecounts()
Out〔9〕:a2b2c1dtype:int64
In〔11〕:objSeries(np。random。randn(9),index〔〔one,one,one,two,two,two,three,three,three〕,〔a,b,c,a,b,c,a,b,c〕〕)obj
Out〔11〕:onea0。697195b0。887408c0。451851twoa0。390779b2。058070c0。760594threea0。305534b0。720491c0。259225dtype:float64
In〔12〕:obj。index
Out〔12〕:MultiIndex(levels〔〔one,three,two〕,〔a,b,c〕〕,labels〔〔0,0,0,2,2,2,1,1,1〕,〔0,1,2,0,1,2,0,1,2〕〕)
In〔13〕:obj〔two〕
Out〔13〕:a0。390779b2。058070c0。760594dtype:float64
In〔15〕:obj〔:,a〕内层选取
Out〔15〕:one0。697195two0。390779three0。305534dtype:float64
In〔16〕:dfDataFrame(np。arange(16)。reshape(4,4),index〔〔one,one,two,two〕,〔a,b,a,b〕〕,columns〔〔apple,apple,orange,orange〕,〔red,green,red,green〕〕)df
Out〔16〕:
apple
orange
red
green
red
green
one
a
0hr1hr2hr3hrb
4hr5hr6hr7hrtwo
a
8hr9hr10hr11hrb
12hr13hr14hr15hrIn〔17〕:df〔apple〕
Out〔17〕:
red
green
one
a
0hr1hrb
4hr5hrtwo
a
8hr9hrb
12hr13hrIn〔18〕:df。swaplevel(0,1)
Out〔18〕:
apple
orange
red
green
red
green
a
one
0hr1hr2hr3hrb
one
4hr5hr6hr7hra
two
8hr9hr10hr11hrb
two
12hr13hr14hr15hrIn〔19〕:df。sum(level0)
Out〔19〕:
apple
orange
red
green
red
green
one
4hr6hr8hr10hrtwo
20hr22hr24hr26hrIn〔20〕:df。sum(level1,axis1)
Out〔20〕:
green
red
one
a
4hr2hrb
12hr10hrtwo
a
20hr18hrb
28hr26pandas数据可视化
In〔6〕:importnumpyasnpfrompandasimportSeries,DataFrameimportpandasaspdimportmatplotlibasmplimportmatplotlib。pyplotasplt导入matplotlib库matplotlibinline魔法函数
In〔7〕:sSeries(np。random。normal(size10))s
Out〔7〕:00。46814211。40892720。18254830。04302340。12143750。53919460。01142370。93820781。58946090。460753dtype:float64
In〔8〕:s。plot()
Out〔8〕:matplotlib。axes。subplots。AxesSubplotat0xafc5390
In〔10〕:dfDataFrame({normal:np。random。normal(size100),gamma:np。random。gamma(1,size100),poisson:np。random。poisson(size100)})df。cumsum()
Out〔10〕:
gamma
normal
poisson
0hr1。804045
1。788000
0。0
1hr1。835715
0。089426
0。0
2hr3。850210
0。870177
0。0
3hr6。082898
0。902761
0。0
4hr8。837446
0。959945
1。0
5hr9。307126
1。658268
3。0
6hr9。518029
3。118419
6。0
7hr9。758011
3。861418
6。0
8hr10。481856
3。405625
6。0
9hr12。405202
4。892910
7。0
10hr13。086167
4。776206
7。0
11hr13。457807
3。217277
8。0
12hr13。574663
1。821368
9。0
13hr13。695523
2。829581
10。0
14hr13。819044
3。015490
11。0
15hr15。801080
2。629254
13。0
16hr17。043867
2。052196
14。0
17hr17。089774
3。687834
15。0
18hr17。499338
2。635491
16。0
19hr18。257891
2。636466
18。0
20hr19。101743
2。272298
19。0
21hr24。158020
0。113947
20。0
22hr25。112218
0。594266
23。0
23hr25。986628
1。326405
23。0
24hr28。383365
1。349211
23。0
25hr28。753694
1。527589
23。0
26hr28。908734
1。312111
25。0
27hr30。607696
0。228251
26。0
28hr31。081009
1。067429
27。0
29hr31。330353
1。098605
28。0
。。。
。。。
。。。
。。。
70hr72。302929
14。123995
66。0
71hr72。794689
14。860449
67。0
72hr73。629651
14。828726
67。0
73hr74。610837
14。168664
68。0
74hr78。773897
13。334949
70。0
75hr80。916582
13。722037
71。0
76hr81。994526
14。717187
72。0
77hr83。927355
13。784763
72。0
78hr86。004903
13。343261
75。0
79hr86。609627
12。151334
75。0
80hr87。199249
13。345584
77。0
81hr87。213180
12。311815
77。0
82hr87。553190
13。864232
77。0
83hr89。157662
14。439016
78。0
84hr89。213456
14。401503
80。0
85hr89。471336
15。838362
81。0
86hr89。552332
14。406933
81。0
87hr91。565291
14。520602
82。0
88hr94。179919
12。017739
82。0
89hr95。075841
13。279973
83。0
90hr95。192719
13。089789
83。0
91hr96。148316
12。268122
84。0
92hr97。146898
11。830559
84。0
93hr97。456375
13。035484
86。0
94hr99。877122
11。966609
87。0
95hr103。015620
12。313341
88。0
96hr103。116648
12。715195
88。0
97hr103。490265
12。168645
89。0
98hr103。925893
11。502630
89。0
99hr105。008619
11。193637
89。0
100rows3columns
In〔11〕:df。cumsum()。plot()
Out〔11〕:matplotlib。axes。subplots。AxesSubplotat0xaef4c18
In〔12〕:data{name:〔张三,李四,王五,小明,Peter〕,sex:〔female,female,male,male,male〕,year:〔2001,2001,2003,2002,2002〕,city:〔北京,上海,广州,北京,北京〕}dfDataFrame(data)df
Out〔12〕:
city
name
sex
year
0hr北京
张三
female
2001hr1hr上海
李四
female
2001hr2hr广州
王五
male
2003hr3hr北京
小明
male
2002hr4hr北京
Peter
male
2002hrIn〔14〕:df〔sex〕。valuecounts()
Out〔14〕:male3female2Name:sex,dtype:int64
In〔16〕:df〔sex〕。valuecounts()。plot(kindbar)
Out〔16〕:matplotlib。axes。subplots。AxesSubplotat0xaf1ac50
In〔18〕:df2DataFrame(np。random。randint(0,100,size(3,3)),index(one,two,three),columns〔A,B,C〕)df2
Out〔18〕:
A
B
C
one
29hr5hr88hrtwo
35hr42hr43hrthree
87hr85hr76hrIn〔19〕:df2。plot(kindbarh)
Out〔19〕:matplotlib。axes。subplots。AxesSubplotat0xb5b53c8
In〔20〕:df2。plot(kindbarh,stackedTrue,alpha0。5)
Out〔20〕:matplotlib。axes。subplots。AxesSubplotat0xd576cf8
In〔28〕:sSeries(np。random。normal(size100))s。hist(bins20,gridFalse)
Out〔28〕:matplotlib。axes。subplots。AxesSubplotat0xcf9f5c0
In〔29〕:s。plot(kindkde)
Out〔29〕:matplotlib。axes。subplots。AxesSubplotat0xd266710
In〔31〕:df3DataFrame(np。arange(10),columns〔X〕)df3〔Y〕2df3〔X〕5df3
Out〔31〕:
X
Y
0hr0hr5hr1hr1hr7hr2hr2hr9hr3hr3hr11hr4hr4hr13hr5hr5hr15hr6hr6hr17hr7hr7hr19hr8hr8hr21hr9hr9hr23hrIn〔34〕:df3。plot(kindscatter,xX,yY)
Out〔34〕:matplotlib。axes。subplots。AxesSubplotat0xb1f98d0
In〔51〕:importnumpyasnpfrompandasimportSeries,DataFrameimportpandasaspdimportseabornassns导入seaborn库
In〔52〕:tipssns。loaddataset(tips)tips。head()
Out〔52〕:
totalbill
tip
sex
smoker
day
time
size
0hr16。99
1。01
Female
No
Sun
Dinner
2hr1hr10。34
1。66
Male
No
Sun
Dinner
3hr2hr21。01
3。50
Male
No
Sun
Dinner
3hr3hr23。68
3。31
Male
No
Sun
Dinner
2hr4hr24。59
3。61
Female
No
Sun
Dinner
4hrIn〔54〕:tips。shape
Out〔54〕:(244,7)
In〔55〕:tips。describe()
Out〔55〕:
totalbill
tip
size
count
244。000000
244。000000
244。000000
mean
19。785943
2。998279
2。569672
std
8。902412
1。383638
0。951100
min
3。070000
1。000000
1。000000
25
13。347500
2。000000
2。000000
50
17。795000
2。900000
2。000000
75
24。127500
3。562500
3。000000
max
50。810000
10。000000
6。000000
In〔56〕:tips。info()classpandas。core。frame。DataFrameRangeIndex:244entries,0to243Datacolumns(total7columns):totalbill244nonnullfloat64tip244nonnullfloat64sex244nonnullcategorysmoker244nonnullcategoryday244nonnullcategorytime244nonnullcategorysize244nonnullint64dtypes:category(4),float64(2),int64(1)memoryusage:7。2KB
In〔57〕:tips。plot(kindscatter,xtotalbill,ytip)
Out〔57〕:matplotlib。axes。subplots。AxesSubplotat0xe034828
In〔62〕:maletiptips〔tips〔sex〕Male〕〔tip〕。mean()maletip
Out〔62〕:3。0896178343949052
In〔63〕:femaletiptips〔tips〔sex〕Female〕〔tip〕。mean()femaletip
Out〔63〕:2。833448275862069
In〔66〕:sSeries(〔maletip,femaletip〕,index〔male,female〕)s
Out〔66〕:male3。089618female2。833448dtype:float64
In〔67〕:s。plot(kindbar)
Out〔67〕:matplotlib。axes。subplots。AxesSubplotat0xddd27f0
In〔68〕:tips〔day〕。unique()
Out〔68〕:〔Sun,Sat,Thur,Fri〕Categories(4,object):〔Sun,Sat,Thur,Fri〕
In〔71〕:suntiptips〔tips〔day〕Sun〕〔tip〕。mean()sattiptips〔tips〔day〕Sat〕〔tip〕。mean()thurtiptips〔tips〔day〕Thur〕〔tip〕。mean()fritiptips〔tips〔day〕Fri〕〔tip〕。mean()
In〔72〕:sSeries(〔thurtip,fritip,sattip,suntip〕,index〔Thur,Fri,Sat,Sun〕)s
Out〔72〕:Thur2。771452Fri2。734737Sat2。993103Sun3。255132dtype:float64
In〔73〕:s。plot(kindbar)
Out〔73〕:matplotlib。axes。subplots。AxesSubplotat0xdefe5c0
In〔74〕:tips〔percenttip〕tips〔tip〕(tips〔totalbill〕tips〔tip〕)tips。head(10)
Out〔74〕:
totalbill
tip
sex
smoker
day
time
size
percenttip
0hr16。99
1。01
Female
No
Sun
Dinner
2hr0。056111
1hr10。34
1。66
Male
No
Sun
Dinner
3hr0。138333
2hr21。01
3。50
Male
No
Sun
Dinner
3hr0。142799
3hr23。68
3。31
Male
No
Sun
Dinner
2hr0。122638
4hr24。59
3。61
Female
No
Sun
Dinner
4hr0。128014
5hr25。29
4。71
Male
No
Sun
Dinner
4hr0。157000
6hr8。77
2。00
Male
No
Sun
Dinner
2hr0。185701
7hr26。88
3。12
Male
No
Sun
Dinner
4hr0。104000
8hr15。04
1。96
Male
No
Sun
Dinner
2hr0。115294
9hr14。78
3。23
Male
No
Sun
Dinner
2hr0。179345
In〔76〕:tips〔percenttip〕。hist(bins50)
Out〔76〕:matplotlib。axes。subplots。AxesSubplotat0xe264710