题库表格字符串转DataFrame主函数table.py175. 组合两个表182. 查找重复的电子邮箱197. 上升的温度511. 游戏玩法分析 I577. 员工奖金584. 寻找用户推荐人595. 大的国家610. 判断三角形1280. 学生们参加各科测试的次数1517. 查找拥有有效邮箱的用户1731. 每位经理的下属员工数量2877. 从表中创建 DataFrame2878. 获取 DataFrame 的大小2879. 显示前三行2880. 数据选取2881. 创建新列2882. 删去重复的行2883. 删去丢失的数据2884. 修改列2885. 重命名列2886. 改变数据类型2887. 填充缺失值2888. 重塑数据:连结2889. 数据重塑:透视2890. 重塑数据:融合拓展(中等)176. 第二高的薪水180. 连续出现的数字拓展(困难)185. 部门工资前三高的所有员工262. 行程和用户601. 体育馆的人流量
ximport pandas as pd
import table
def test(df1, df2, df3):
return pd.DataFrame()
print(test(table.df1, table.df2, table.df3))
xxxxxxxxxx
import pandas as pd
from io import StringIO
table1_str = """
+----+----+----+
| x | y | z |
+----+----+----+
| 13 | 15 | 30 |
| 10 | 20 | 15 |
+----+----+----+
"""
table2_str = """
+-------+-------+
| empId | bonus |
+-------+-------+
| 2 | 500 |
| 4 | 2000 |
+-------+-------+
"""
table3_str = """
+------------+--------------+
| student_id | subject_name |
+------------+--------------+
| 1 | Math |
| 1 | Physics |
| 1 | Programming |
| 2 | Programming |
| 1 | Physics |
| 1 | Math |
| 13 | Math |
| 13 | Programming |
| 13 | Physics |
| 2 | Math |
| 1 | Math |
+------------+--------------+
"""
def str_to_df(table_str):
line_list = table_str.strip().split("\n")
line_list = [
line.strip("|").replace("|", ",").replace(" ", "")
for line in line_list
if not line.startswith("+")
]
str_obj = StringIO("\n".join(line_list))
return pd.read_csv(str_obj)
df1, df2, df3 = str_to_df(table1_str), str_to_df(table2_str), str_to_df(table3_str)
xxxxxxxxxx
import pandas as pd
def combine_two_tables(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
record2_dict = {record2['personId']: record2 for record2 in df2.to_dict('records')}
records = [
{**record1, **record2_dict.get(record1['personId'], {})}
for record1 in df1.to_dict('records')
]
return pd.DataFrame(records, columns=['firstName', 'lastName', 'city', 'state'])
xxxxxxxxxx
import pandas as pd
def duplicate_emails(df1: pd.DataFrame) -> pd.DataFrame:
email_dict = {}
for record in df1.to_dict('records'):
email_dict[record['email']] = email_dict.get(record['email'], 0) + 1
email_list = [email for email, count in email_dict.items() if count > 1]
return pd.DataFrame({'Email': email_list})
xxxxxxxxxx
import pandas as pd
def rising_temperature(df1: pd.DataFrame) -> pd.DataFrame:
df1['recordDate'] = pd.to_datetime(df1['recordDate'])
records = df1.to_dict('records')
id_list = []
temp_dict = {record['recordDate']: record['temperature'] for record in records}
for record in records:
yesterday = record['recordDate'] - pd.Timedelta(days=1)
if record['temperature'] > temp_dict.get(yesterday, record['temperature']):
id_list.append(record['id'])
return pd.DataFrame({'id': id_list})
xxxxxxxxxx
import pandas as pd
def game_analysis(df1: pd.DataFrame) -> pd.DataFrame:
df1['event_date'] = pd.to_datetime(df1['event_date'])
login_dict = {}
for record in df1.to_dict('records'):
if record['event_date'] <= login_dict.get(record['player_id'], record['event_date']):
login_dict[record['player_id']] = record['event_date']
return pd.DataFrame(login_dict.items(), columns=['player_id', 'first_login'])
xxxxxxxxxx
import pandas as pd
def employee_bonus(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
bonus_dict = {record2['empId']: record2['bonus'] for record2 in df2.to_dict('records')}
records = [
(record1['name'], bonus_dict.get(record1['empId']))
for record1 in df1.to_dict('records')
if bonus_dict.get(record1['empId'], 0) < 1000
]
return pd.DataFrame(records, columns=['name', 'bonus'])
xxxxxxxxxx
import pandas as pd
def find_customer_referee(df1: pd.DataFrame) -> pd.DataFrame:
name_list = [
record['name']
for record in df1.to_dict('records')
if record.get('referee_id') != 2
]
return pd.DataFrame({'name': name_list})
xxxxxxxxxx
import pandas as pd
def big_countries(df1: pd.DataFrame) -> pd.DataFrame:
records = [
record
for record in df1.to_dict('records')
if record.get('area') >= 3000000 or record.get('population') >= 25000000
]
return pd.DataFrame(records, columns=['name', 'population', 'area'])
xxxxxxxxxx
import pandas as pd
def triangle_judgement(df1: pd.DataFrame) -> pd.DataFrame:
records = df1.to_dict('records')
for record in records:
x, y, z = record.values()
if x + y > z and x + z > y and y + z > x:
record['triangle'] = 'Yes'
else:
record['triangle'] = 'No'
return pd.DataFrame(records)
xxxxxxxxxx
import pandas as pd
def students_and_examinations(df1: pd.DataFrame, df2: pd.DataFrame, df3: pd.DataFrame) -> pd.DataFrame:
df1 = df1.sort_values(by='student_id')
df2 = df2.sort_values(by='subject_name')
records2 = df2.to_dict('records')
count_dict = {}
for record3 in df3.to_dict('records'):
key = (record3['student_id'], record3['subject_name'])
count_dict[key] = count_dict.get(key, 0) + 1
tuple_list = []
for record1 in df1.to_dict('records'):
for record2 in records2:
attended_exams = count_dict.get((record1['student_id'], record2['subject_name']), 0)
tuple_list.append((record1['student_id'], record1['student_name'], record2['subject_name'], attended_exams))
return pd.DataFrame(tuple_list, columns=['student_id', 'student_name', 'subject_name', 'attended_exams'])
xxxxxxxxxx
import pandas as pd
def valid_emails(df1: pd.DataFrame) -> pd.DataFrame:
pattern = r"[a-zA-Z][a-zA-Z0-9_/.-]*@leetcode\.com$"
records = [record for record in df1.to_dict('records') if re.match(pattern, record['mail'])]
return pd.DataFrame(records, columns=['user_id', 'name', 'mail'])
xxxxxxxxxx
import pandas as pd
def count_employees(df1: pd.DataFrame) -> pd.DataFrame:
df1 = df1.sort_values(by='employee_id')
records = df1.to_dict('records')
report_dict = {}
for record in records:
if not pd.isna(record['reports_to']):
count, total_age = report_dict.get(record['reports_to'], (0, 0))
report_dict[record['reports_to']] = (count + 1, total_age + record['age'])
records = [record for record in records if report_dict.get(record['employee_id']) is not None]
for record in records:
count, total_age = report_dict.get(record['employee_id'])
record['reports_count'] = count
record['average_age'] = int(total_age/count + 0.5)
return pd.DataFrame(records, columns=['employee_id', 'name', 'reports_count', 'average_age'])
xxxxxxxxxx
import pandas as pd
def createDataframe(student_data: List[List[int]]) -> pd.DataFrame:
return pd.DataFrame(student_data, columns=['student_id', 'age'])
xxxxxxxxxx
import pandas as pd
def getDataframeSize(df1: pd.DataFrame) -> List[int]:
records = df1.to_dict('records')
return [len(records), len(records[0])]
xxxxxxxxxx
import pandas as pd
def selectFirstRows(df1: pd.DataFrame) -> pd.DataFrame:
return df1[:3]
xxxxxxxxxx
import pandas as pd
def selectData(df1: pd.DataFrame) -> pd.DataFrame:
records = [record for record in df1.to_dict('records') if record['student_id'] == 101]
return pd.DataFrame(records, columns=['name', 'age'])
xxxxxxxxxx
import pandas as pd
def createBonusColumn(df1: pd.DataFrame) -> pd.DataFrame:
df1['bonus'] = df1['salary']*2
return df1
xxxxxxxxxx
import pandas as pd
def createBonusColumn(df1: pd.DataFrame) -> pd.DataFrame:
records = [{**record, 'bonus': record['salary']*2} for record in df1.to_dict('records')]
return pd.DataFrame(records)
xxxxxxxxxx
import pandas as pd
def dropDuplicateEmails(df1: pd.DataFrame) -> pd.DataFrame:
record_dict = {}
for record in df1.to_dict('records'):
if record_dict.get(record['email']) is None:
record_dict[record['email']] = record
return pd.DataFrame(record_dict.values())
xxxxxxxxxx
import pandas as pd
def dropMissingData(df1: pd.DataFrame) -> pd.DataFrame:
return df1[pd.notna(df1['name'])]
xxxxxxxxxx
import pandas as pd
def dropMissingData(df1: pd.DataFrame) -> pd.DataFrame:
records = [record for record in df1.to_dict('records') if pd.notna(record['name'])]
return pd.DataFrame(records)
xxxxxxxxxx
import pandas as pd
def modifySalaryColumn(df1: pd.DataFrame) -> pd.DataFrame:
df1['salary'] = df1['salary']*2
return df1
xxxxxxxxxx
import pandas as pd
def modifySalaryColumn(df1: pd.DataFrame) -> pd.DataFrame:
records = [{**record, 'salary': record['salary']*2} for record in df1.to_dict('records')]
return pd.DataFrame(records)
xxxxxxxxxx
import pandas as pd
def renameColumns(df1: pd.DataFrame) -> pd.DataFrame:
df1.columns = ['student_id', 'first_name', 'last_name', 'age_in_years']
return df1
xxxxxxxxxx
import pandas as pd
def renameColumns(df1: pd.DataFrame) -> pd.DataFrame:
return pd.DataFrame(df1.values, columns=['student_id', 'first_name', 'last_name', 'age_in_years'])
xxxxxxxxxx
import pandas as pd
def changeDatatype(df: pd.DataFrame) -> pd.DataFrame:
df['grade'] = [int(num) for num in df['grade'].tolist()]
return df
xxxxxxxxxx
import pandas as pd
def fillMissingValues(df1: pd.DataFrame) -> pd.DataFrame:
df1['quantity'] = [0 if pd.isna(q) else q for q in df1['quantity'].tolist()]
return df1
xxxxxxxxxx
import pandas as pd
def concatenateTables(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
records1 = df1.to_dict('records')
records2 = df2.to_dict('records')
return pd.DataFrame(records1 + records2)
xxxxxxxxxx
import pandas as pd
def pivotTable(df1: pd.DataFrame) -> pd.DataFrame:
temp_dict = {(record['city'], record['month']): record['temperature'] for record in df1.to_dict('records')}
cities, months = sorted(df1['city'].unique()), sorted(df1['month'].unique())
records = [
{
'month': month,
**{city: temp_dict.get((city, month), None) for city in cities}
}
for month in months
]
return pd.DataFrame(records).set_index('month')
xxxxxxxxxx
import pandas as pd
def meltTable(df1: pd.DataFrame) -> pd.DataFrame:
tuple_list = []
for record in df1.to_dict('records'):
product = record['product']
del record['product']
for quarter, sales in record.items():
tuple_list.append((product, quarter, sales))
products = df1['product'].tolist()
tuple_list = sorted(tuple_list, key=lambda x: (x[1], products.index(x[0])))
return pd.DataFrame(tuple_list, columns=['product', 'quarter', 'sales'])
xxxxxxxxxx
import pandas as pd
def second_highest_salary(df1: pd.DataFrame) -> pd.DataFrame:
salary = sorted(df1['salary'].unique().tolist(), reverse=True) + [None, None]
return pd.DataFrame([{'SecondHighestSalary': salary[1]}])
xxxxxxxxxx
import pandas as pd
def consecutive_numbers(df1: pd.DataFrame) -> pd.DataFrame:
df1 = df1.sort_values(by='id')
records = df1.to_dict('records')
count_dict = {}
for i, record in enumerate(records):
if record['id'] - 1 == records[i - 1]['id'] and record['num'] == records[i - 1]['num']:
count_dict[record['num']] = count_dict.get(record['num'], 0) + 1
elif count_dict.get(record['num'], 0) < 3:
count_dict[record['num']] = 1
num_list = [num for num, count in count_dict.items() if count >= 3]
return pd.DataFrame({'ConsecutiveNums': num_list})
xxxxxxxxxx
import pandas as pd
def top_three_salaries(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
records1 = df1.to_dict('records')
department_dict = {record2['id']: (record2['name'], 0, 0, 0) for record2 in df2.to_dict('records')}
for record1 in records1:
department, first, second, third = department_dict[record1['departmentId']]
if record1['salary'] > first:
first, second, third = record1['salary'], first, second
elif second < record1['salary'] < first:
second, third = record1['salary'], second
elif third < record1['salary'] < second:
third = record1['salary']
department_dict[record1['departmentId']] = (department, first, second, third)
records = []
for record1 in records1:
department, first, second, third = department_dict[record1['departmentId']]
if first >= record1['salary'] >= third:
records.append((department, record1['name'], record1['salary']))
return pd.DataFrame(records, columns=['Department', 'Employee', 'Salary'])
xxxxxxxxxx
import pandas as pd
def trips_and_users(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
df1['request_at'] = pd.to_datetime(df1['request_at'])
records1 = df1.to_dict('records')
banned_dict = {record2['users_id']: record2['banned'] for record2 in df2.to_dict('records')}
records1 = [
record1
for record1 in records1
if banned_dict[record1['client_id']] != 'Yes' and
banned_dict[record1['driver_id']] != 'Yes' and
pd.to_datetime('2013-10-01') <= record1['request_at'] <= pd.to_datetime('2013-10-03')
]
cancel_dict = {}
for record1 in records1:
total, cancel = cancel_dict.get(record1['request_at'], (0, 0))
if record1['status'] != 'completed':
cancel += 1
cancel_dict[record1['request_at']] = (total + 1, cancel)
tuple_list = []
for request_at, (total, cancel) in cancel_dict.items():
tuple_list.append((request_at, int((cancel/total + 0.005)*100) / 100))
return pd.DataFrame(tuple_list, columns=['Day', 'Cancellation Rate'])
xxxxxxxxxx
import pandas as pd
def human_traffic(df1: pd.DataFrame) -> pd.DataFrame:
records1 = [record1 for record1 in df1.to_dict('records') if record1['people'] >= 100]
tuple_list = []
start, end = 0, 0
for i, record1 in enumerate(records1):
if i == 0:
continue
elif record1['id'] == records1[end]['id'] + 1:
end = i
if i == len(records1) - 1:
tuple_list.append((start, end))
else:
tuple_list.append((start, end))
start, end = i, i
records = []
for start, end in tuple_list:
if end - start >= 2:
records = records + records1[start:end + 1]
return pd.DataFrame(records, columns=['id', 'visit_date', 'people'])