Published on

基于 ExcelJS 的数据规范化导入

Authors
  • avatar
    Name
    Shelton Ma
    Twitter

1. Excel文件导入

使用 ExcelJS 实现 Excel 文件的规范化导入流程。包括文件格式校验、工作表和表头结构验证,将数据行解析为 JSON 格式并映射为业务对象,结合 Zod 进行字段级数据验证,并支持重复数据校验,确保导入数据的完整性与规范性。

1. 接受文件上传

export async function upload(
  req: express.Request,
  res: express.Response
): Promise<void> {
  if (!req.file) {
    throw new Error("文件不能为空");
  }
  const file: Express.Multer.File = req.file;
  const result = await dataParse(req.body, file);
  res.success(result);
}

2. 解析文件

  1. 读取文件并解析数据

    /**
    * 此处使用Multer上传文件, 处理过程包括
    * 1. 验证文件格式/工作表/表头是否满足
    * 2. 行数据转成json格式
    * 3. json数据转成对象
    * 4. 使用zod验证
    * 5. 数据重复校验
    * @param file file path or file object
    * @returns 
    */
    import ExcelJS from "exceljs";
    
    export const dataParse = async (
      { dryRun = "true" }:parseData,
      file: Express.Multer.File | string
    ): Promise<{ data: any; message: string }> => {
      const workbook = new ExcelJS.Workbook();
    
      // 支持文件路径和文件对象 
      if (typeof file === "string") {
        workbook.xlsx.readFile(file);
      } else if (file.buffer) {
        await workbook.xlsx.load(file.buffer);
      } else {
        throw new Error(
          "不支持的文件类型,请传入路径字符串或包含 buffer 的上传文件对象"
        );
      }
    
      // 默认解析第一数据表
      const worksheet = workbook.getWorksheet(1);
      if (!worksheet) {
        throw new Error(
          `数据有误需要确认, 请确保文件格式为xlsx, 如果是xls, 使用excel转换格式后重新导入`
        );
      }
    
      const excelHeaders = [
        "姓名",
        "性别",
        "出生年月",
        "部门",
      ];
      // 验证表头是否满足需求
      validateHeaders(worksheet, excelHeaders);
      // 数据转json
      const rowData = excelRowToJson(worksheet, excelHeaders);
    
      // json数据转对象
      const users = rowData.map((row) => {
        
        return {
          name: row["姓名"],
          gender: row["性别"],
          birth: row["出生年月"],
          department: row["部门"],
          createdAt: new Date(),
          updatedAt: new Date(),
        };
      });
    
      // 数据格式校验
      const { valid: uploadedData, errors } = validateRows(
        users,
        userSchema
      );
      if (errors.length > 0) {
        throw new Error(
          `数据有误需要确认, 请检查以下行数据: ${errors
            .map(
              (error) =>
                `${error.index + 2}行 异常: ${JSON.stringify(
                  error.result,
                  null,
                  2
                )}`
            )
            .join(", ")}`
        );
      }
      if (!uploadedData || uploadedData.length === 0) {
        throw new Error(`有效数据为空, 请检查数据是否符合要求`);
      }
    
      // 数据重复校验
      const { selfDuplicates, dbDuplicates, validData } =
        await validateUserDuplicates(uploadedData);
    
      if (selfDuplicates.length > 0) {
        throw new Error(
          `Excel传入数据有重复, 需要按照 姓名-性别-部门-出生日期 进行去重`
        );
      }
      if (dbDuplicates.length > 0) {
        throw new Error(
          `Excel传入数据与数据库已有数据重复, 需要按照 姓名-性别-部门-出生日期 进行去重, 如有疑问联系管理员, 重复数据: ${JSON.stringify(
            dbDuplicates.map((row) => {
              return {
                "姓名": row.name,
                "性别": row.gender,
                "出生日期": row.birth,
                "部门": row.department,
              };
            })
          )}`
        );
      }
    
      // 最终上传数据为 validData
      const dryRunStr = parseBoolean(dryRun);
      if (dryRunStr) {
        // 空跑, 可以返回给用户, 展示即将上传的数据格式
        console.log("数据试跑通过, 如果确认, 将导入至数据库");
        return {
          data: validData,
          message: "数据试跑通过, 如果确认, 将导入至数据库",
        };
      } else {
        // 当用户点击确认, 数据将上传至数据库
        const result = await User.create(validData);
        return { data: result, message: "上传成功" };
      }
    };
    
    
  2. 验证表头是否符合要求

    import ExcelJS from "exceljs";
    
    /**
    *
    * @param worksheet ExcelJS.Worksheet
    * @param requiredHeaders 需要验证的表头数组
    * @returns {void}
    * @throws {Error} 如果表头缺失或格式不正确
    * @description 验证 Excel 表头是否包含必要的字段
    */
    export const validateHeaders = (
      worksheet: ExcelJS.Worksheet,
      requiredHeaders: string[]
    ): void => {
      const headerRow = worksheet.getRow(1);
      if (!headerRow || !headerRow.values) {
        throw new Error("Excel 表头为空或第 1 行数据缺失");
      }
    
      const headers = Array.isArray(headerRow.values)
        ? headerRow.values.filter(Boolean)
        : [];
      const missingHeaders = requiredHeaders.filter(
        (header) => !headers.includes(header)
      );
      if (missingHeaders.length > 0) {
        throw new Error(
          `Excel 表头缺失:[ ${missingHeaders.join(
            ", "
          )} ], 请确保 Excel 表头包含以下字段: ${requiredHeaders.join(", ")}`
        );
      }
    };
    
  3. row转json

    // /../utils/excel.ts
    /**
     *
    * @param worksheet ExcelJS.Worksheet
    * @param headers 表头数组
    * @description 将 Excel 行数据转换为 JSON 对象
    * @returns {Record<string, any>[]} JSON 对象数组
    */
    export const excelRowToJson = (
      worksheet: ExcelJS.Worksheet,
      headers: string[]
    ) => {
      const jsonData: Record<string, any>[] = [];
      worksheet.eachRow((row, rowNumber) => {
        if (rowNumber === 1) {
          // 跳过表头
          return;
        }
        const rowData: Record<string, any> = {};
        headers.forEach((header, index) => {
          const cellValue = row.getCell(index + 1).value;
          rowData[header] = cellValue;
        });
        jsonData.push(rowData);
      });
      return jsonData;
    };
    
  4. schema校验

    import { SafeParseError, z, ZodTypeAny } from "zod";
    
    export const userSchema = z.object({
      name: z.string(), 
      gender: z.string(),
      birth: z.string(),
      department: z.string(),
    });
    
    /**
    * 使用ZodSchema验证多个对象, 可用于批量上传数据的验证
    *
    * @param rows - The array of objects to validate.
    * @param schema - The Zod schema to validate against.
    * @returns An object containing valid data and errors.
    */
    export function validateRows<TSchema extends ZodTypeAny>(
      rows: any[],
      schema: TSchema
    ): {
      valid: z.infer<TSchema>[];
      errors: { index: number; result: SafeParseError<z.infer<TSchema>> }[];
    } {
      const valid: z.infer<TSchema>[] = [];
      const errors: { index: number; result: SafeParseError<z.infer<TSchema>> }[] =
        [];
    
      rows.forEach((row, index) => {
        const result = schema.safeParse(row);
        if (result.success) {
          const data = result.data;
          valid.push(data);
        } else {
          errors.push({
            index,
            result,
          });
        }
      });
    
      return { valid, errors };
    }
    
  5. 数据重复性校验: excel数据表去重/数据库比对

    
    import User, { IUserModel } from "src/app/models/user";
    import { userData } from "../validators/user";
    
    /**
    * 验证上传数据是否有重复, 比对自身/数据库
    * @param uploadedData
    * @returns {
    *  selfDuplicates: userData[]; // 自身重复
    *  dbDuplicates: IUserModel[]; // 数据库重复
    *  validData: userData[]; // 可导入数据
    * }
    */
    export async function validateUserDuplicates(
      uploadedData: userData[]
    ): Promise<{
      selfDuplicates: userData[];
      dbDuplicates: IUserModel[];
      validData: userData[];
    }> {
      // 自身重复
      const seen = new Map<string, number>();
      const selfDuplicates: userData[] = [];
      const uniqueData: userData[] = [];
    
      for (const item of uploadedData) {
        const key = `${item.name}-${item.birth}-${item.department}-${item.gender}`;
        seen.set(key, (seen.get(key) || 0) + 1);
      }
    
      for (const item of uploadedData) {
        const key = `${item.name}-${item.birth}-${item.department}-${item.gender}`;
        if (seen.get(key)! > 1) {
          selfDuplicates.push(item);
        } else {
          uniqueData.push(item);
        }
      }
    
      // 与数据库做重复验证
      const conditions = uploadedData.map((item) => ({
        name: item.name,
        birth: item.birth,
        department: item.department,
        gender: item.gender,
      }));
      const dbDuplicates = await User.find({
        $or: conditions,
      });
    
      // 返回可导入数据
      const validData = uniqueData.filter((item) => {
        return !dbDuplicates.some((dbItem) => {
          return (
            dbItem.name === item.name &&
            dbItem.birth === item.birth &&
            dbItem.department === item.department &&
            dbItem.gender === item.gender
          );
        });
      });
    
      return { selfDuplicates, dbDuplicates, validData };
    }
    
  6. validateUserDuplicates的范型版本

    type KeyGenerator<T> = (item: T) => string;
    
    interface DuplicateValidationResult<T, U> {
      selfDuplicates: T[];
      dbDuplicates: U[];
      validData: T[];
    }
    
    export async function validateDuplicates<T, U>(
      uploadedData: T[],
      dbQueryFn: (items: T[]) => Promise<U[]>,
      isDuplicate: (dbItem: U, item: T) => boolean,
      keyGen: KeyGenerator<T>
    ): Promise<DuplicateValidationResult<T, U>> {
      // 自身重复检查
      const seen = new Map<string, number>();
      const selfDuplicates: T[] = [];
      const uniqueData: T[] = [];
    
      for (const item of uploadedData) {
        const key = keyGen(item);
        seen.set(key, (seen.get(key) || 0) + 1);
      }
    
      for (const item of uploadedData) {
        const key = keyGen(item);
        if (seen.get(key)! > 1) {
          selfDuplicates.push(item);
        } else {
          uniqueData.push(item);
        }
      }
    
      // 与数据库重复检查
      const dbDuplicates = await dbQueryFn(uploadedData);
    
      // 过滤可导入数据
      const validData = uniqueData.filter((item) => {
        return !dbDuplicates.some((dbItem) => isDuplicate(dbItem, item));
      });
    
      return { selfDuplicates, dbDuplicates, validData };
    }
    
    // 调用
    import User, { IUserModel } from "src/app/models/user";
    import { userData } from "../validators/user";
    
    export async function validateUserDuplicates(uploadedData: userData[]) {
      return validateDuplicates<userData, IUserModel>(
        uploadedData,
        async (items) => {
          const conditions = items.map((item) => ({
            name: item.name,
            birth: item.birth,
            department: item.department,
            gender: item.gender,
          }));
          return User.find({ $or: conditions });
        },
        (dbItem, item) =>
          dbItem.name === item.name &&
          dbItem.birth === item.birth &&
          dbItem.department === item.department &&
          dbItem.gender === item.gender,
        (item) => `${item.name}-${item.birth}-${item.department}-${item.gender}`
      );
    }