3.3 数据取值与选择

3.3 数据取值与选择

本文源码请见我的GitHub

3.3.1 Series数据选择方法

1
2
3
4
5
#1.将Series看作字典
import pandas as pd
import numpy as np
data = pd.Series([0.25,0.3,0.4,0.5], index = ['a', 'b', 'c', 'd'])
data
a    0.25
b    0.30
c    0.40
d    0.50
dtype: float64
1
data['b']
0.3
1
'a' in data
True
1
'z' in data
False
1
data.keys
<bound method Series.keys of a    0.25
b    0.30
c    0.40
d    0.50
dtype: float64>
1
data.index
Index(['a', 'b', 'c', 'd'], dtype='object')
1
2
#可以使用字典语法调整数据
data['e'] = 1.25
1
data
a    0.25
b    0.30
c    0.40
d    0.50
e    1.25
dtype: float64
1
#2.Series看作一维数组
1
data['a':'c']
a    0.25
b    0.30
c    0.40
dtype: float64
1
data[0:2]
a    0.25
b    0.30
dtype: float64
1
data[data > 0.3]
c    0.40
d    0.50
e    1.25
dtype: float64
1
data[['a', 'e']]
a    0.25
e    1.25
dtype: float64
1
#3.索引器 loc iloc ix
1
2
data = pd.Series(['a','b','c','d'], index= [1,3, 4, 5])
data
1    a
3    b
4    c
5    d
dtype: object
1
2
3
#loc 显式切片

data.loc[1]#显式切片
'a'
1
2
3
# iloc 隐式切片, 遵循左闭又开区间

data.iloc[1]
'b'

3.3.2 DataFrame数据选择方法

1
#1.DataFrame看作字典
1
2
3
4
5
6
7
8
9
10
11
12
13
14
population_dict = {'California' : 1548,
'Texas': 1131,
'New York': 1922,
'Florida': 1955,
'Illinois':1288}
population = pd.Series(population_dict)
area_dict = {'California' : 61548,
'Texas': 51131,
'New York': 19422,
'Florida': 31955,
'Illinois':21288}
area = pd.Series(area_dict)#将字典传入
state = pd.DataFrame({'population' : population, 'area' : area })
state
population area
California 1548 61548
Texas 1131 51131
New York 1922 19422
Florida 1955 31955
Illinois 1288 21288
1
state['area']
California    61548
Texas         51131
New York      19422
Florida       31955
Illinois      21288
Name: area, dtype: int64
1
state.area
California    61548
Texas         51131
New York      19422
Florida       31955
Illinois      21288
Name: area, dtype: int64
1
#取列 data.cloumnName = data.['CloumnName'] 但是如果列名和df的方法名相同就不可以用.索引
1
#2.DataFrame 堪称二维数组
1
state.values
array([[ 1548, 61548],
       [ 1131, 51131],
       [ 1922, 19422],
       [ 1955, 31955],
       [ 1288, 21288]], dtype=int64)
1
state.T
California Texas New York Florida Illinois
population 1548 1131 1922 1955 1288
area 61548 51131 19422 31955 21288
1
state['area']
California    61548
Texas         51131
New York      19422
Florida       31955
Illinois      21288
Name: area, dtype: int64
1
state.iloc[:3,:2] #隐式索引
population area
California 1548 61548
Texas 1131 51131
New York 1922 19422
1
state.loc[:'Florida', :'area'] #显式索引
population area
California 1548 61548
Texas 1131 51131
New York 1922 19422
Florida 1955 31955
1
#3.Other methods
1
state['Florida' : 'Illinois']
population area
Florida 1955 31955
Illinois 1288 21288