上节课完成了对单个班级成绩的爬取,本次任务为爬取多个班级的期中期末成绩。

并存储到excel和数据库中。

为,下一次课的成绩分析做预备。

数据爬取

爬取多个班级

class_list=['22301','22302','22303','22304','22305','22306']

for class_name in class_list:
  data = {
    "active": "initStuScoreInfo",
    "sAuth": "@05@O@09@O@82@",
    "EXAMTYPE": "qm",
    "XNXQBM": "2023101",
    "BJBH": class_name
    }  

  res_json=fetch_data(url,headers,cookies,data)
  if res_json!=None:
      print(Processing_data(res_json))

对Processing_data进行修改,使其输出增加班级名称

'''
description: 这个是对获取的数据进行处理
param {*} res_json 请求的网页的json数据,class_name班级号
return {*} 班级,学生姓名,语文,数学,英语
'''
def Processing_data(class_name,res_json):
    subject_codes={
        "语文":None,
        "数学":None,
        "英语":None
    }
    stu_list=[]
    for item in res_json[0]["examSubData"]:
        if item['subName'] in subject_codes:
            subject_codes[item['subName']]=item['code']
    # print(subject_codes)
    for stu in res_json[0]["listData"]:
        row_data=[class_name,stu['XM']]
        for xueke in subject_codes.values():
            row_data.append(stu[xueke])
        stu_list.append(row_data)
    return stu_list

爬取期中和期末成绩

class_list=['22301','22302','22303','22304','22305','22306']
qzqm_list=['qz','qm']

for class_name in class_list:
  for qzqm in qzqm_list:
    data = {
      "active": "initStuScoreInfo",
      "sAuth": "@05@O@09@O@82@",
      "EXAMTYPE": qzqm,
      "XNXQBM": "2023101",
      "BJBH": class_name
      }  

    res_json=fetch_data(url,headers,cookies,data)
    if res_json!=None:
        print(Processing_data(class_name,res_json))

对Processing_data进行修改,使其输出时整合期中期末成绩

'''
description: 这个是对获取的数据进行处理
param {*} res_json 请求的网页的json数据
return {*} 输出一个list,期中每个元素也是一个list,包含学生姓名,语文,数学,英语
'''
def Processing_data(class_name,qzqm,res_json):
    subject_codes={
        "语文":None,
        "数学":None,
        "英语":None
    }
    stu_list=[]
    for item in res_json[0]["examSubData"]:
        if item['subName'] in subject_codes:
            subject_codes[item['subName']]=item['code']
    # print(subject_codes)
    for stu in res_json[0]["listData"]:
        if qzqm =="qz":
            row_data=[class_name,"期中",stu['XM']]
        else:
            row_data=[class_name,"期末",stu['XM']]
        for xueke in subject_codes.values():
            row_data.append(stu[xueke])
        stu_list.append(row_data)
    return stu_list

OK 截止此处,基本能规范输出各班的期中期末语数英成绩了

接下来,需要存入数据库中

数据入库

步骤1 环境准备,略

参见之前笔记

注意这次的表结构不同

班级姓名期中语文期中数学期中英语期末语文期末数学期末英语
字符字符intintintintintint

表结构设计

CREATE TABLE student_scores (
    id INT AUTO_INCREMENT PRIMARY KEY,
    class VARCHAR(50),
    name VARCHAR(50),
    mid_term_chinese INT,
    mid_term_math INT,
    mid_term_english INT,
    final_term_chinese INT,
    final_term_math INT,
    final_term_english INT
);

步骤2 数据库连接框架

import pymysql
'''
description: 连接数据库
return {*} 返回connection和cursor
'''
def get_database_connection():
    connection = pymysql.connect(host='localhost',
                                 user='username',
                                 password='password',
                                 db='dbname',
                                 charset='utf8mb4',
                                 cursorclass=pymysql.cursors.DictCursor)
    return connection, connection.cursor()
'''
description: 执行sql的插入语句
return {*}无返回
'''
def exe_sql(sql, data):
    try:
        cursor.execute(sql, data)
        connection.commit()
        print("执行了:", data)
    except pymysql.MySQLError as e:
        print(f"出错: {e}")

try:
    connection, cursor = get_database_connection()
except pymysql.MySQLError as e:
    print(f"连接数据库时出错: {e}")

# 在此处定义您的 SQL 查询和数据
sql = "INSERT INTO 表名 (列1, 列2) VALUES (%s, %s)"
test_item = ["数值1", "数值2"]

# 执行 SQL 查询
exe_sql(sql, test_item)

connection.close()

步骤3 写入数据

for stu_data in stu_data_list:
    if stu_data[1] =="期中":
        # 在此处定义您的 SQL 查询和数据
        sql = "INSERT INTO student_scores (class, name,mid_term_chinese,mid_term_math,mid_term_english ) VALUES (%s,%s,%s,%s, %s)"
        test_item = [stu_data[0], stu_data[2], stu_data[3], stu_data[4], stu_data[5]]
        #new_list = stu_data[:1] + stu_data[2:]
        # 执行 SQL 查询
        exe_sql(sql, test_item)
    elif stu_data[1] =="期末":
        # 在此处定义您的 SQL 查询和数据
        sql =  "UPDATE student_scores SET final_term_chinese = %s, final_term_math = %s, final_term_english = %s WHERE class = %s AND name = %s"
        test_item = [stu_data[3], stu_data[4], stu_data[5], stu_data[0], stu_data[2]]
        # 执行 SQL 查询
        exe_sql(sql, test_item)

最终结果