- Published on
基于 ExcelJS 的数据规范化导入
- Authors
- Name
- Shelton Ma
1. Excel文件导入
使用 ExcelJS 实现 Excel 文件的规范化导入流程。包括文件格式校验、工作表和表头结构验证,将数据行解析为 JSON 格式并映射为业务对象,结合 Zod 进行字段级数据验证,并支持重复数据校验,确保导入数据的完整性与规范性。
1. 接受文件上传
export async function upload(
req: express.Request,
res: express.Response
): Promise<void> {
if (!req.file) {
throw new Error("文件不能为空");
}
const file: Express.Multer.File = req.file;
const result = await dataParse(req.body, file);
res.success(result);
}
2. 解析文件
读取文件并解析数据
/** * 此处使用Multer上传文件, 处理过程包括 * 1. 验证文件格式/工作表/表头是否满足 * 2. 行数据转成json格式 * 3. json数据转成对象 * 4. 使用zod验证 * 5. 数据重复校验 * @param file file path or file object * @returns */ import ExcelJS from "exceljs"; export const dataParse = async ( { dryRun = "true" }:parseData, file: Express.Multer.File | string ): Promise<{ data: any; message: string }> => { const workbook = new ExcelJS.Workbook(); // 支持文件路径和文件对象 if (typeof file === "string") { workbook.xlsx.readFile(file); } else if (file.buffer) { await workbook.xlsx.load(file.buffer); } else { throw new Error( "不支持的文件类型,请传入路径字符串或包含 buffer 的上传文件对象" ); } // 默认解析第一数据表 const worksheet = workbook.getWorksheet(1); if (!worksheet) { throw new Error( `数据有误需要确认, 请确保文件格式为xlsx, 如果是xls, 使用excel转换格式后重新导入` ); } const excelHeaders = [ "姓名", "性别", "出生年月", "部门", ]; // 验证表头是否满足需求 validateHeaders(worksheet, excelHeaders); // 数据转json const rowData = excelRowToJson(worksheet, excelHeaders); // json数据转对象 const users = rowData.map((row) => { return { name: row["姓名"], gender: row["性别"], birth: row["出生年月"], department: row["部门"], createdAt: new Date(), updatedAt: new Date(), }; }); // 数据格式校验 const { valid: uploadedData, errors } = validateRows( users, userSchema ); if (errors.length > 0) { throw new Error( `数据有误需要确认, 请检查以下行数据: ${errors .map( (error) => `第${error.index + 2}行 异常: ${JSON.stringify( error.result, null, 2 )}` ) .join(", ")}` ); } if (!uploadedData || uploadedData.length === 0) { throw new Error(`有效数据为空, 请检查数据是否符合要求`); } // 数据重复校验 const { selfDuplicates, dbDuplicates, validData } = await validateUserDuplicates(uploadedData); if (selfDuplicates.length > 0) { throw new Error( `Excel传入数据有重复, 需要按照 姓名-性别-部门-出生日期 进行去重` ); } if (dbDuplicates.length > 0) { throw new Error( `Excel传入数据与数据库已有数据重复, 需要按照 姓名-性别-部门-出生日期 进行去重, 如有疑问联系管理员, 重复数据: ${JSON.stringify( dbDuplicates.map((row) => { return { "姓名": row.name, "性别": row.gender, "出生日期": row.birth, "部门": row.department, }; }) )}` ); } // 最终上传数据为 validData const dryRunStr = parseBoolean(dryRun); if (dryRunStr) { // 空跑, 可以返回给用户, 展示即将上传的数据格式 console.log("数据试跑通过, 如果确认, 将导入至数据库"); return { data: validData, message: "数据试跑通过, 如果确认, 将导入至数据库", }; } else { // 当用户点击确认, 数据将上传至数据库 const result = await User.create(validData); return { data: result, message: "上传成功" }; } };
验证表头是否符合要求
import ExcelJS from "exceljs"; /** * * @param worksheet ExcelJS.Worksheet * @param requiredHeaders 需要验证的表头数组 * @returns {void} * @throws {Error} 如果表头缺失或格式不正确 * @description 验证 Excel 表头是否包含必要的字段 */ export const validateHeaders = ( worksheet: ExcelJS.Worksheet, requiredHeaders: string[] ): void => { const headerRow = worksheet.getRow(1); if (!headerRow || !headerRow.values) { throw new Error("Excel 表头为空或第 1 行数据缺失"); } const headers = Array.isArray(headerRow.values) ? headerRow.values.filter(Boolean) : []; const missingHeaders = requiredHeaders.filter( (header) => !headers.includes(header) ); if (missingHeaders.length > 0) { throw new Error( `Excel 表头缺失:[ ${missingHeaders.join( ", " )} ], 请确保 Excel 表头包含以下字段: ${requiredHeaders.join(", ")}` ); } };
row转json
// /../utils/excel.ts /** * * @param worksheet ExcelJS.Worksheet * @param headers 表头数组 * @description 将 Excel 行数据转换为 JSON 对象 * @returns {Record<string, any>[]} JSON 对象数组 */ export const excelRowToJson = ( worksheet: ExcelJS.Worksheet, headers: string[] ) => { const jsonData: Record<string, any>[] = []; worksheet.eachRow((row, rowNumber) => { if (rowNumber === 1) { // 跳过表头 return; } const rowData: Record<string, any> = {}; headers.forEach((header, index) => { const cellValue = row.getCell(index + 1).value; rowData[header] = cellValue; }); jsonData.push(rowData); }); return jsonData; };
schema校验
import { SafeParseError, z, ZodTypeAny } from "zod"; export const userSchema = z.object({ name: z.string(), gender: z.string(), birth: z.string(), department: z.string(), }); /** * 使用ZodSchema验证多个对象, 可用于批量上传数据的验证 * * @param rows - The array of objects to validate. * @param schema - The Zod schema to validate against. * @returns An object containing valid data and errors. */ export function validateRows<TSchema extends ZodTypeAny>( rows: any[], schema: TSchema ): { valid: z.infer<TSchema>[]; errors: { index: number; result: SafeParseError<z.infer<TSchema>> }[]; } { const valid: z.infer<TSchema>[] = []; const errors: { index: number; result: SafeParseError<z.infer<TSchema>> }[] = []; rows.forEach((row, index) => { const result = schema.safeParse(row); if (result.success) { const data = result.data; valid.push(data); } else { errors.push({ index, result, }); } }); return { valid, errors }; }
数据重复性校验: excel数据表去重/数据库比对
import User, { IUserModel } from "src/app/models/user"; import { userData } from "../validators/user"; /** * 验证上传数据是否有重复, 比对自身/数据库 * @param uploadedData * @returns { * selfDuplicates: userData[]; // 自身重复 * dbDuplicates: IUserModel[]; // 数据库重复 * validData: userData[]; // 可导入数据 * } */ export async function validateUserDuplicates( uploadedData: userData[] ): Promise<{ selfDuplicates: userData[]; dbDuplicates: IUserModel[]; validData: userData[]; }> { // 自身重复 const seen = new Map<string, number>(); const selfDuplicates: userData[] = []; const uniqueData: userData[] = []; for (const item of uploadedData) { const key = `${item.name}-${item.birth}-${item.department}-${item.gender}`; seen.set(key, (seen.get(key) || 0) + 1); } for (const item of uploadedData) { const key = `${item.name}-${item.birth}-${item.department}-${item.gender}`; if (seen.get(key)! > 1) { selfDuplicates.push(item); } else { uniqueData.push(item); } } // 与数据库做重复验证 const conditions = uploadedData.map((item) => ({ name: item.name, birth: item.birth, department: item.department, gender: item.gender, })); const dbDuplicates = await User.find({ $or: conditions, }); // 返回可导入数据 const validData = uniqueData.filter((item) => { return !dbDuplicates.some((dbItem) => { return ( dbItem.name === item.name && dbItem.birth === item.birth && dbItem.department === item.department && dbItem.gender === item.gender ); }); }); return { selfDuplicates, dbDuplicates, validData }; }
validateUserDuplicates的范型版本
type KeyGenerator<T> = (item: T) => string; interface DuplicateValidationResult<T, U> { selfDuplicates: T[]; dbDuplicates: U[]; validData: T[]; } export async function validateDuplicates<T, U>( uploadedData: T[], dbQueryFn: (items: T[]) => Promise<U[]>, isDuplicate: (dbItem: U, item: T) => boolean, keyGen: KeyGenerator<T> ): Promise<DuplicateValidationResult<T, U>> { // 自身重复检查 const seen = new Map<string, number>(); const selfDuplicates: T[] = []; const uniqueData: T[] = []; for (const item of uploadedData) { const key = keyGen(item); seen.set(key, (seen.get(key) || 0) + 1); } for (const item of uploadedData) { const key = keyGen(item); if (seen.get(key)! > 1) { selfDuplicates.push(item); } else { uniqueData.push(item); } } // 与数据库重复检查 const dbDuplicates = await dbQueryFn(uploadedData); // 过滤可导入数据 const validData = uniqueData.filter((item) => { return !dbDuplicates.some((dbItem) => isDuplicate(dbItem, item)); }); return { selfDuplicates, dbDuplicates, validData }; } // 调用 import User, { IUserModel } from "src/app/models/user"; import { userData } from "../validators/user"; export async function validateUserDuplicates(uploadedData: userData[]) { return validateDuplicates<userData, IUserModel>( uploadedData, async (items) => { const conditions = items.map((item) => ({ name: item.name, birth: item.birth, department: item.department, gender: item.gender, })); return User.find({ $or: conditions }); }, (dbItem, item) => dbItem.name === item.name && dbItem.birth === item.birth && dbItem.department === item.department && dbItem.gender === item.gender, (item) => `${item.name}-${item.birth}-${item.department}-${item.gender}` ); }