From d7094bb0c6ef0673c27553deb66dc3405e2a9aa1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=89=8D=E7=AB=AF=E8=8F=9C13?= <38740836+fyhhub@users.noreply.github.com> Date: Thu, 7 Nov 2024 21:04:27 +0800 Subject: [PATCH] feat: changes --- .../1.python\345\237\272\347\241\200.md" | 83 ++++++++++++++++++- 1 file changed, 82 insertions(+), 1 deletion(-) diff --git "a/src/python/1.python\345\237\272\347\241\200.md" "b/src/python/1.python\345\237\272\347\241\200.md" index c016c0b78..c94f19a60 100644 --- "a/src/python/1.python\345\237\272\347\241\200.md" +++ "b/src/python/1.python\345\237\272\347\241\200.md" @@ -1,3 +1,84 @@ -# Python基础 +# Pandas使用 +```py +pip install pandas +``` + +## 数据读取 + +### 1. 读取表格 +```py +# -*- coding: utf-8 -*- +df = pd.read_csv(encoding='utf-8', filepath_or_buffer='xxx') + + +df = pd.read_excel('example.xlsx', usecols=['A', 'B']) +``` + +### 2.获取表头 +```py +df.columns +``` + +### 3.获取某一列去重后的值 +```py +df['column_name'].unique() +``` + +## 数据操作 + +### 1.填充空数据 +```py +df['pageUrl'].fillna('') +``` + + +### 2.修改某列的所有数据 +```py +df['pageUrl'] = df['pageUrl'].map(lambda x: 'xxxx' + str(x)) +``` + +### 3.新建表格 +```py +data = { + 'name': ['Alice', 'Bob', 'Charlie', 'David'], + 'age': [23, 30, 45, 25], + 'salary': [70000, 80000, 120000, 95000] +} + +# 创建 DataFrame +df = pd.DataFrame(data) +``` + + + +## 数据筛选 + +### 1.筛选缺失的值 +```py +df['年龄'].isna() +``` + +### 2.筛选出不包含特定值的行 +```py +df[~df['姓名'].isin(['张三'])] +``` + +### 3.筛选日期 +```py +import datetime + +# 将 'timestamp' 列转换为日期时间格式 +df['timestamp'] = df['timestamp'].apply(lambda x: datetime.datetime.strptime(x, '%Y-%m-%d %H:%M:%S')) + +# 提取 '年月日' 并去重 +unique_days = df['timestamp'].dt.date.unique() +``` + +## 数据计算 + +### 1.根据某种条件,对某一列求和 +```py +df[df['关联需求类型'] == '业务需求']['实际工时'].sum() +``` \ No newline at end of file