用 SQLite 试了一下,亿级数据,上万并发,好像没啥问题?
- 单表数据:1.3 亿,100 GB
- 事务每秒:4.6 W 随机读,1 W 随机写
- 内存占用:16 MB ( Python 脚本,包括 SQLite 内存缓存)
- 测试硬件:六七年前轻薄本,SATA 低端固态
- 测试内容:模拟 500 设备,每秒各保存 200 浮点数据,连续三天
## 脚本使用方法
- 随机写入测试
```shell
# 从上次保存时间戳开始(不存在则为年初),每递增一秒,就写入 500 设备,各 200 浮点数据。直至写入 1W 记录为止
$ python3
test.py -w -d 设备数(默认 500 ) -n 写入行数(默认 1W )
```
- 随机读取测试
```shell
# 从 500 设备中,随机选一台,再随机选某个时间,取数据。直至读取 1W 记录为止
$ python3
test.py -r -d 设备数(默认 500 ) -n 读取行数(默认 1W )
# 最多运行 10 秒
$ timeout -s INT 10 python3
test.py -r
# 八进程同时测试
$ seq 8 | xargs -P 8 -I{} python3
test.py -r
```
## 测试脚本代码
```python
# V 站吞空格,缩进改为全角空格了
import time
import apsw
import random
import struct
import argparse
import itertools
from datetime import datetime, timezone
DEFAULT_DEVICES = 500
DEFAULT_RECORDS = 10000
SQLITE_MAX_WAL_PAGES = 10000
DB_PATH = '/数据库路径/文件名.db'
DEFAULT_START_TIME = int(datetime.strptime('2024-01-01 00:00:00', '%Y-%m-%d %H:%M:%S').replace(tzinfo=timezone.utc).timestamp())
count = 0
db: apsw.Connection
devices = DEFAULT_DEVICES
records = DEFAULT_RECORDS
dev_time_range: list[range] = []
def parse_args():
parser = argparse.ArgumentParser(description="SQLite 测试读写多传感器数据")
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('-r', action='store_true', help="随机读取")
group.add_argument('-w', action='store_true', help="随机写入")
parser.add_argument('-d', type=int, default=DEFAULT_DEVICES, help=f"设备数(默认 {DEFAULT_DEVICES})")
parser.add_argument('-n', type=int, default=DEFAULT_RECORDS, help=f"要测试的记录数(默认 {DEFAULT_RECORDS})")
global devices, records
args = parser.parse_args()
devices = args.d
records = args.n
return args
# 随机写的页面足够多时,确保落盘并重置 WAL 文件
def sqlite3_wal_hook(db: apsw.Connection, name: str, pages: int):
if pages > SQLITE_MAX_WAL_PAGES:
db.wal_checkpoint(mode=apsw.SQLITE_CHECKPOINT_RESTART)
return apsw.SQLITE_OK
def init_db():
global db
db = apsw.Connection(DB_PATH)
db.execute('PRAGMA journal_mode = WAL')
db.execute('PRAGMA busy_timeout = 5000')
db.execute('PRAGMA synchronous = NORMAL')
db.setwalhook(sqlite3_wal_hook)
db.execute('''
CREATE TABLE IF NOT EXISTS device_data (
id INTEGER PRIMARY KEY,
dev_id AS (id >> 32),
created AS (id & 0xFFFFFFFF),
data BLOB
)
''')
# 获取每个设备,已保存数据的时间范围
def get_dev_time_range():
rows = db.execute('''
SELECT dev_min.created, dev_max.created + 1
FROM (SELECT (max(id) >> 32) + 1 dev_count FROM device_data)
JOIN json_each(REPLACE(FORMAT('[%*.*s]', dev_count, dev_count, '0'), ' ', '0,')) dev
JOIN device_data dev_min ON
dev_min.id = (SELECT min(id) FROM device_data WHERE id >= dev.key << 32)
JOIN device_data dev_max ON
dev_max.id = (SELECT max(id) FROM device_data WHERE id <= dev.key << 32 | 0xFFFFFFFF)
''').fetchall()
dev_time_range.extend(list(itertools.starmap(range, rows))[:devices])
dev_time_range.extend([range(DEFAULT_START_TIME, DEFAULT_START_TIME)] * max(devices - len(rows), 0))
def test_read():
global count
items = list(enumerate(dev_time_range))
weights = list(itertools.accumulate(map(lambda i: i.stop - i.start, dev_time_range)))
while count < records:
# 以每设备时长为权重,随机抽取一个设备,再从其时间范围随机抽取时间点
dev, time_range = random.choices(items, cum_weights=weights)[0]
db.execute('''
SELECT data
FROM device_data
WHERE id = ? << 32 | ?
''', (dev, random.choice(time_range))).fetchone()
count += 1
def test_write():
global count
start_time = min(dev_time_range, key=lambda i: i.stop).stop
for ts in itertools.count(start_time):
for dev in range(devices):
if count >= records:
return
elif ts in dev_time_range[dev]:
continue
floats = [random.random() for i in range(200)]
data = struct.pack('200f', *floats)
db.execute('BEGIN IMMEDIATE')
db.execute('''
INSERT INTO device_data (id, data)
VALUES (? << 32 | ?, ?)
''', (dev, ts, data))
db.execute('COMMIT')
count += 1
def test(is_read: bool):
init_db()
get_dev_time_range()
start_time = time.time()
try:
test_read() if is_read else test_write()
except KeyboardInterrupt:
pass
finally:
duration = time.time() - start_time
print(f'在 {duration:6.2f} 秒内,随机{"写读"[is_read]} {count:6d} 行,平均 {count / duration:8.2f} 行/秒')
if __name__ == '__main__':
args = parse_args()
test(args.r)
```
## 1.3 亿 100 GB 数据库,文件结构信息分析
https://i.imgur.com/yL0rJ7E.png