单线程从Excel导入100w数据到MySQL

从Excel导入100万数据到MySQL,首先是分批读取Excel中的100w数据,这一点EasyExcel有自己的解决方案,这里我用的是每批20w,其次就是往DB里插入,怎么去插入,当然不能一条一条的循环,应该批量插入这20w条数据,同样也不能使用MyBatis的批量插入,因为效率也低。

我们应该使用分批读取 + JDBC批处理 + 手动事务控制去插入,这样效率比较快。


前言

这里我使用的是单线程,多线程当然也可以实现,但是多线程容易造成OOM,需要合理的设置线程大小和JVM参数,这里就使用单线程测试下批量操作即可。多线程的话可以自己单独去实现一下,但要注意使用的正确性。

实战

创建SpringBoot项目

POM文件

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.7.8</version>
<relativePath/> <!-- lookup parent from repository -->
</parent>
<groupId>com.itjing</groupId>
<artifactId>excel-batch-deal</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>excel-batch-deal</name>
<description>excel-batch-deal</description>
<properties>
<java.version>1.8</java.version>
</properties>
<dependencies>

<!-- easyexcel -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>easyexcel</artifactId>
<version>3.0.5</version>
</dependency>

<!-- mybatis和mysql驱动 -->
<dependency>
<groupId>org.mybatis.spring.boot</groupId>
<artifactId>mybatis-spring-boot-starter</artifactId>
<version>2.2.2</version>
</dependency>
<dependency>
<groupId>com.mysql</groupId>
<artifactId>mysql-connector-j</artifactId>
<scope>runtime</scope>
</dependency>

<!-- hutool -->
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
<version>5.8.9</version>
</dependency>

<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>

<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
</dependencies>

<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<configuration>
<excludes>
<exclude>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
</exclude>
</excludes>
</configuration>
</plugin>
</plugins>
</build>

</project>

YAML配置文件

注意jdbc批处理需要在url加上rewriteBatchedStatements=true

1
2
3
4
5
6
7
8
9
10
11
12
13
server:
port: 22222
spring:
application:
name: excel-batch-deal
datasource:
driver-class-name: com.mysql.cj.jdbc.Driver
url: jdbc:mysql://127.0.0.1:3306/stream_query?useUnicode=true&characterEncoding=UTF-8&autoReconnect=true&useSSL=false&serverTimezone=GMT%2B8&allowPublicKeyRetrieval=true&nullCatalogMeansCurrent=true&allowMultiQueries=true&rewriteBatchedStatements=true
username: root
password: root

mybatis:
mapper-locations: classpath:mapper/**/*.xml

Excel文件对应的实体

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
package com.itjing.excel.vo;

import com.alibaba.excel.annotation.ExcelProperty;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;

/**
* @Description: 用户导出VO
* @Author: lijing
* @CreateTime: 2023-01-23 15:31
*/
@Data
@AllArgsConstructor
@NoArgsConstructor
public class UserVO {

@ExcelProperty("用户名")
private String name;

@ExcelProperty("年龄")
private Integer age;

@ExcelProperty("邮箱")
private String email;

@ExcelProperty("电话")
private String phone;

}

用户导入监听器

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
package com.itjing.excel.listener;

import com.alibaba.excel.context.AnalysisContext;
import com.alibaba.excel.event.AnalysisEventListener;
import com.itjing.excel.service.UserService;
import com.itjing.excel.vo.UserVO;
import lombok.extern.slf4j.Slf4j;

import java.util.ArrayList;
import java.util.List;

/**
* @Description: 用户导入监听器
* @Author: lijing
* @CreateTime: 2023-01-24 12:06
*/
@Slf4j
public class UserListener extends AnalysisEventListener<UserVO> {

/**
* 分批导入,批量值
*/
private static final int BATCH_COUNT = 200000;

private List<UserVO> cachedDataList = new ArrayList<>();

private UserService userService;

public List<UserVO> getCachedDataList() {
return cachedDataList;
}

public UserListener(UserService userService) {
this.userService = userService;
}

@Override
public void invoke(UserVO data, AnalysisContext context) {
cachedDataList.add(data);
// size是否达到批量值,这里其实就是分批,当数据达到批量值的时候执行一次插入
if (cachedDataList.size() >= BATCH_COUNT) {
try {
saveData();
} catch (Exception e) {
log.error("数据解析异常!" + e.getMessage());
}
cachedDataList.clear();
}
}

/**
* Excel中所有数据解析完毕会调用此方法
*
* @param context
*/
@Override
public void doAfterAllAnalysed(AnalysisContext context) {
try {
saveData();
cachedDataList.clear();
log.info("所有数据解析完成!");
} catch (Exception e) {
log.error("数据解析异常!" + e.getMessage());
e.printStackTrace();
}
}


/**
* 保存数据到DB
*/
private void saveData() {
userService.importDBFromExcel(cachedDataList);
}

}

用户业务层

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
package com.itjing.excel.service;

import com.itjing.excel.dto.UserDTO;
import com.itjing.excel.vo.UserVO;

import java.util.List;

/**
* @Description: 用户业务接口
* @Author: lijing
* @CreateTime: 2023-01-23 15:39
*/
public interface UserService {

/**
* 保存数据到DB
* @param cachedDataList
*/
void importDBFromExcel(List<UserVO> cachedDataList);
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
package com.itjing.excel.service.impl;

import cn.hutool.db.DbUtil;
import com.itjing.excel.dao.UserMapper;
import com.itjing.excel.dto.UserDTO;
import com.itjing.excel.service.UserService;
import com.itjing.excel.vo.UserVO;
import lombok.extern.slf4j.Slf4j;
import org.apache.ibatis.cursor.Cursor;
import org.apache.ibatis.session.ResultContext;
import org.apache.ibatis.session.ResultHandler;
import org.apache.ibatis.session.SqlSession;
import org.mybatis.spring.SqlSessionTemplate;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;
import org.springframework.util.StopWatch;

import javax.sql.DataSource;
import java.io.IOException;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

/**
* @Description:
* @Author: lijing
* @CreateTime: 2023-01-23 15:40
*/
@Service
@Slf4j
public class UserServiceImpl implements UserService {

@Autowired
private DataSource dataSource;

/**
* 保存数据到DB
*
* @param cachedDataList
*/
@Override
public void importDBFromExcel(List<UserVO> cachedDataList) {
if (!CollectionUtils.isEmpty(cachedDataList)) {
// JDBC批处理+事务操作完成对数据的插入
Connection conn = null;
PreparedStatement ps = null;
StopWatch sw = new StopWatch();
sw.start();
try {
conn = dataSource.getConnection();
// 控制事务不自动提交
conn.setAutoCommit(false);
String sql = "insert into user (name,age,email,phone) values (?,?,?,?)";
ps = conn.prepareStatement(sql);
// 循环结果集:这里循环不支持"烂布袋"表达式
for (int i = 0; i < cachedDataList.size(); i++) {
UserVO item = cachedDataList.get(i);
ps.setString(1, item.getName());
ps.setInt(2, item.getAge());
ps.setString(3, item.getEmail());
ps.setString(4, item.getPhone());
// 将一组参数添加到此 PreparedStatement 对象的批处理命令中
ps.addBatch();
//if (i % 50000 == 0) {
// ps.executeBatch();
// ps.clearBatch();
//}
}
// 执行批处理
ps.executeBatch();
ps.clearBatch();
// 手动提交事务
conn.commit();
} catch (Exception e) {
e.printStackTrace();
} finally {
// 关连接
DbUtil.close(conn, ps);
}
sw.stop();
long totalTimeMillis = sw.getTotalTimeMillis();
log.info("数据插入共耗时:" + totalTimeMillis + "毫秒");
}
}
}

用户控制器

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
package com.itjing.excel.controller;

import com.alibaba.excel.EasyExcel;
import com.itjing.excel.dto.UserDTO;
import com.itjing.excel.listener.UserListener;
import com.itjing.excel.service.UserService;
import com.itjing.excel.util.ExcelUtil;
import com.itjing.excel.vo.UserVO;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.util.StopWatch;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;

import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
import java.util.List;

/**
* @Description:
* @Author: lijing
* @CreateTime: 2023-01-23 15:35
*/
@RestController
@RequestMapping("/user")
@Slf4j
public class UserController {

/**
* 从excel导入100万数据到mysql
*/
@GetMapping("/importExcel")
public void importExcel() {
String fileName = "C:\\Users\\Administrator\\Desktop\\用户表.xlsx";
// 记录开始读取Excel时间,也是导入程序开始时间
StopWatch sw = new StopWatch();
sw.start();
// 读取所有Sheet的数据,每次读完一个Sheet就会调用这个方法
EasyExcel.read(fileName, UserVO.class, new UserListener(userService)).doReadAll();
sw.stop();
log.info("------结束读取耗时" + (sw.getTotalTimeMillis()) + "ms------");
}
}

本人电脑测试导入100w条数据大概1分钟,这和电脑配置也有一定的关系。

点击查看

本文标题:单线程从Excel导入100w数据到MySQL

文章作者:LiJing

发布时间:2023年01月24日 - 14:00:42

最后更新:2023年06月03日 - 09:57:02

原始链接:https://blog-next.xiaojingge.com/posts/752882170.html

许可协议: 署名-非商业性使用-禁止演绎 4.0 国际 转载请保留原文链接及作者。

-------------------本文结束 感谢您的阅读-------------------