Pandas刷题

Pandas刷题

Leecode刷题

176.第二高的薪水

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
import pandas as pd

def second_highest_salary(employee: pd.DataFrame) -> pd.DataFrame:
# 1. 删除所有重复的薪水.
employee = employee.drop_duplicates(["salary"])

# 2. 如果少于 2 个不同的薪水,返回 `np.NaN`。
if len(employee["salary"].unique()) < 2:
return pd.DataFrame({"SecondHighestSalary": [np.NaN]})

# 3. 把表格按 `salary` 降序排序。
employee = employee.sort_values(by="salary", ascending=False)

# 4. 删除 `id` 列。
employee.drop("id", axis=1, inplace=True)

# 5. 重命名 `salary` 列。
employee.rename({"salary": "SecondHighestSalary"}, axis=1, inplace=True)

# 6, 7. 返回第 2 高的薪水
return employee.head(2).tail(1)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
import pandas as pd

def second_highest_salary(employee: pd.DataFrame) -> pd.DataFrame:
# 按照工资降序排序
employee = employee.sort_values(by='salary', ascending=False)

# 去除重复的工资
employee = employee.drop_duplicates(subset=['salary'])

# 选择第N高的工资,如果不存在则返回null
if len(employee) >= 2:
return pd.DataFrame({'SecondHighestSalary': [int(employee.iloc[1]['salary'])]})
else:
return pd.DataFrame({'SecondHighestSalary': [None]})

177.第N高薪水

1
2
3
4
5
6
7
8
9
10
11
import pandas as pd

def nth_highest_salary(employee: pd.DataFrame, N: int) -> pd.DataFrame:

dist = employee.drop_duplicates(subset='salary')
dist['rnk'] = dist['salary'].rank(method='dense', ascending=False)
ans = dist[dist.rnk == N][['salary']]
if not len(ans):
return pd.DataFrame({f'getNthHighestSalary({N})': [None]})
ans = ans.rename(columns={'salary':f'getNthHighestSalary({N})'})
return ans

178.分数排名

1
2
3
4
5
import pandas as pd

def order_scores(scores: pd.DataFrame) -> pd.DataFrame:
scores['rank'] = scores['score'].rank(method='dense', ascending=False)
return scores[['score', 'rank']].sort_values('score', ascending=False)

180.连续出现的数字

1
2
3
4
5
6
7
8
9
10
11
12
def consecutive_numbers(logs: pd.DataFrame) -> pd.DataFrame:
###类似sql的窗口函数 先将先窗口内排序
logs['rank']=logs.groupby('num')['id'].rank(method='first')
##用id-rank 得到一个标签,同样的标签在同一个num下出现三次说明连续
logs['is_']=logs['id']-logs['rank']
##分组count
result_pre1=logs.groupby(by=['is_','num'])['id'].count().reset_index()
result=result_pre1.loc[result_pre1['id']>=3,['num']]
result.columns=['ConsecutiveNums']
result.drop_duplicates(inplace=True)

return result

182.查找重复的电子邮箱

1
2
3
4
5
6
import pandas as pd

def duplicate_emails(person: pd.DataFrame) -> pd.DataFrame:
person["duplicate"] = person["email"].duplicated(keep="first")
df = person[person["duplicate"]].drop_duplicates(subset=["email"], keep="first")
return df[["email"]]
1
2
3
4
5
import pandas as pd

def duplicate_emails(person: pd.DataFrame) -> pd.DataFrame:
df=person[person['email'].duplicated()]['email'].unique()
return pd.DataFrame({'Email':df})
1
2
3
4
5
6
7
8
9
10
import pandas as pd

def duplicate_emails(person: pd.DataFrame) -> pd.DataFrame:
res = person['email'].value_counts()
res = res[res>1]
res = res.index
print(res)
# res.columns= ['email','value']
# res = res[res['value']>1]
return pd.DataFrame(res)