近期要统一个人数据到NAS端管理,顺手把小米笔记的数据迁移到了为知笔记中。本文记录了整个过程以及要踩的坑。
数据下载
MIUI笔记APP是没有导入导出功能的,而且没有笔记历史版本的功能,所以我直接选择从小米云中下载已经同步了的便签数据。
首先打开小米云 https://i.mi.com/ ,登录,找到云笔记

按下F12,找到加载全部笔记、加载笔记详情、加载历史版本和历史版本详情的API,总结了如下文档:
获取全部笔记
- API:https://i.mi.com/note/full/page
输出样例:
{
"result": "ok",
"retriable": false,
"code": 0,
"data": {
"entries": [
{
"snippet": "xxx",
"modifyDate": 笔记修改时间,
"colorId": 0,
"subject": "",
"alertDate": 0,
"type": "note",
"folderId": "xxx",
"setting": {
"themeId": 0,
"stickyTime": 0,
"version": 0
},
"deleteTime": 0,
"alertTag": 0,
"id": "xxxxx", # 这里是笔记ID
"tag": "xxx",
"createDate": 笔记创建时间,
"status": "normal",
"extraInfo": "{\"title\":\"\",\"note_content_type\":\"common\",\"mind_content\":\"\",\"mind_content_plain_text\":\"\"}"
},
...
}
}
获取单个笔记详情
- API:https://i.mi.com/note/note/笔记ID/
输出样例:
{
"result": "ok",
"retriable": false,
"code": 0,
"data": {
"entry": {
"snippet": "xxx",
"modifyDate": 笔记修改时间,
"colorId": 0,
"subject": "",
"alertDate": 0,
"type": "note",
"folderId": "xxx",
"content": "这里是笔记内容",
"setting": {
"themeId": 0,
"stickyTime": 置顶时间,
"version": 0
},
"deleteTime": 0,
"alertTag": 0,
"id": "xxx",
"tag": "xxx",
"createDate": 笔记创建时间,
"status": "normal",
"extraInfo": "{\"note_content_type\":\"common\",\"mind_content_plain_text\":\"\",\"title\":\"\",\"mind_content\":\"\"}"
}
},
"description": "成功",
}
获取单个笔记的历史版本
- API:https://i.mi.com/note/full/history/times?id=笔记ID
输出样例:
{
"result": "ok",
"retriable": false,
"code": 0,
"data": {
"tvList": [
{
"updateTime": 更新时间,
"version": 这里是版本ID
},
...
]
},
"description": "成功",
}
获取某一版本内容
- API:https://i.mi.com/note/full/history?id=笔记ID&version=版本ID
{
"result": "ok",
"retriable": false,
"code": 0,
"data": {
"entry": {
"modifyDate": 修改时间,
"colorId": 0,
"alertDate": 0,
"type": "note",
"folderId": 0,
"content": "这里是该版本笔记内容",
"setting": {
"themeId": 0,
"stickyTime": 0,
"version": 0
},
"deleteTime": 0,
"alertTag": 0,
"id": "笔记ID",
"tag": "笔记标签",
"createDate": 笔记创建时间,
"status": "normal",
"extraInfo": "{\"title\":\"\",\"note_content_type\":\"common\",\"mind_content\":\"\",\"mind_content_plain_text\":\"\"}"
}
},
"description": "成功"
}
根据这个API文档,不难写出一个爬虫,把所有笔记都爬下来
import os
import json
import requests
import time
# 你需要提供的全局cookie
cookies = {
"uLocale":"zh_CN",
"iplocale":"zh_CN",
"userId":"xxx",
"i.mi.com_isvalid_servicetoken":"true",
"i.mi.com_ph":"xxx",
"i.mi.com_istrudev":"true",
"serviceToken":"xxx",
"i.mi.com_slh":"xxx"
}
# 请求头,模仿Chrome浏览器
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Accept': 'application/json',
'Content-Type': 'application/json',
}
# 获取当前时间戳
def get_current_timestamp():
return int(time.time() * 1000)
# 获取全部笔记
def get_all_notes():
ts = get_current_timestamp()
url = f'https://i.mi.com/note/full/page?ts={ts}&limit=200'
response = requests.get(url, headers=headers, cookies=cookies)
return response.json()
# 获取单个笔记的内容
def get_note_content(note_id):
ts = get_current_timestamp()
url = f'https://i.mi.com/note/note/{note_id}/?ts={ts}'
response = requests.get(url, headers=headers, cookies=cookies)
return response.json()
# 获取单个笔记的历史版本
def get_note_history(note_id):
ts = get_current_timestamp()
url = f'https://i.mi.com/note/full/history/times?ts={ts}&id={note_id}'
response = requests.get(url, headers=headers, cookies=cookies)
return response.json()
# 获取指定版本的内容
def get_note_version(note_id, version_id):
ts = get_current_timestamp()
url = f'https://i.mi.com/note/full/history?ts={ts}&id={note_id}&version={version_id}'
response = requests.get(url, headers=headers, cookies=cookies)
return response.json()
# 存储笔记的内容到文件
def save_note(note_data, note_folder):
# 存储最新笔记
os.makedirs(note_folder, exist_ok=True)
with open(os.path.join(note_folder, 'index.html'), 'w', encoding='utf-8') as f:
f.write(note_data['data']['entry']['content'])
# 存储笔记的JSON信息
with open(os.path.join(note_folder, 'info'), 'w', encoding='utf-8') as f:
json.dump(note_data, f, ensure_ascii=False, indent=4)
# 存储历史版本的笔记
def save_note_versions(note_id, versions, note_folder):
for version in versions:
version_id = version['version']
version_data = get_note_version(note_id, version_id)
timestamp = version['updateTime']
# 存储该版本的笔记内容
version_folder = os.path.join(note_folder, f'index.html_{timestamp}')
with open(version_folder, 'w', encoding='utf-8') as f:
f.write(version_data['data']['entry']['content'])
# 存储该版本的JSON信息
version_info = os.path.join(note_folder, f'info_{timestamp}')
with open(version_info, 'w', encoding='utf-8') as f:
json.dump(version_data, f, ensure_ascii=False, indent=4)
def main():
# 获取所有笔记
notes_data = get_all_notes()
print(notes_data)
if notes_data['result'] == 'ok':
for note in notes_data['data']['entries']:
note_id = note['id']
print(f'正在处理笔记ID: {note_id}')
# 获取笔记内容
note_content_data = get_note_content(note_id)
note_folder = f'./notes/{note_id}'
save_note(note_content_data, note_folder)
# 获取并保存历史版本
note_history_data = get_note_history(note_id)
if note_history_data['result'] == 'ok' and 'tvList' in note_history_data['data']:
save_note_versions(note_id, note_history_data['data']['tvList'], note_folder)
else:
print(f'笔记ID {note_id} 没有历史版本。')
if __name__ == '__main__':
main()
此时我们得到了一个notes文件夹,里面每个子文件夹都是一个笔记的全部版本,index.html为最新版本,index.html_xxx为历史版本,info为原始json数据
为知笔记容器分析
数据有了,要想导入到为知笔记中,还得分析一下它的工作原理。
首先进入为知笔记的容器中
docker exec -it iznote /bin/bash
发现里面有个MySQL容器,还有一个nodejs应用。在为知笔记中,每个用户都有一个UUID,在/wiz/storage/data_root/document2目录下可以看到所有用户的UUID(注意这里有系统用户的),笔记数据和历史版本数据就在这个目录下保存着
在里面看看为知的源码,发现/wiz/app/entrypoint.sh中容器内部MySQL的密码

为了在外部访问这个MySQL服务器,我们先添加个root@%的权限。
mysql -uroot -pxxxxxx
use mysql;
grant all privileges on *.* to 'root@%' identified by '密码' with grant option;
flush privileges
接着重新创建容器,把3306映射出来,即可外部访问了。
通过对MySQL里面的数据表的分析,发现所有笔记主要保存到wizksent.wiz_document表内:
CREATE TABLE `wiz_document` (
`ID` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
`DOCUMENT_GUID` binary(16) NOT NULL COMMENT '笔记GUID',
`KB_GUID` binary(16) NOT NULL COMMENT '知识库GUID',
`VERSION` bigint(20) NOT NULL DEFAULT '-1' COMMENT '版本号',
`DOCUMENT_DATA_MD5` binary(16) DEFAULT NULL COMMENT '笔记数据md5',
`DT_DATA_MODIFIED` datetime DEFAULT NULL COMMENT '笔记数据修改时间',
`DOCUMENT_INFO_MD5` binary(16) DEFAULT NULL COMMENT '笔记基本信息md5',
`DT_INFO_MODIFIED` datetime DEFAULT NULL COMMENT '笔记基本信息修改时间',
`DOCUMENT_DATA_SIZE` int(11) NOT NULL COMMENT '笔记数据大小',
`DOCUMENT_TITLE` varchar(255) DEFAULT NULL COMMENT '笔记标题',
`DOCUMENT_CATEGORY` varchar(260) DEFAULT NULL COMMENT '笔记目录',
`DOCUMENT_OWNER` varchar(150) DEFAULT NULL COMMENT '笔记所有者,用作标识普通editor能否删除和修改',
`DOCUMENT_OWNER_GUID` binary(16) DEFAULT NULL COMMENT '文档作者的USERGUID',
`DOCUMENT_ICON_INDEX` int(11) DEFAULT NULL COMMENT '笔记ICON索引',
`DOCUMENT_PROTECT` tinyint(4) DEFAULT NULL COMMENT '笔记是否加密',
`DOCUMENT_READ_COUNT` int(11) DEFAULT NULL COMMENT '笔记阅读次数',
`DOCUMENT_ATTACHMENT_COUNT` int(11) DEFAULT NULL COMMENT '笔记附件数',
`DOCUMENT_TYPE` varchar(16) DEFAULT NULL COMMENT '笔记类型',
`DOCUMENT_FILE_TYPE` varchar(16) DEFAULT NULL COMMENT '笔记的文件类型',
`DT_CREATED` datetime NOT NULL COMMENT '创建日期',
`DT_ACCESSED` datetime DEFAULT NULL COMMENT '笔记最后访问日期',
`GPS_LATITUDE` float(10,6) DEFAULT NULL COMMENT '纬度',
`GPS_LONGITUDE` float(10,6) DEFAULT NULL COMMENT '经度',
`DOCUMENT_URL` varchar(2048) DEFAULT NULL COMMENT '笔记 URL',
`STYLE_GUID` binary(16) DEFAULT NULL COMMENT '样式GUID',
`DOCUMENT_PARAM_MD5` binary(16) DEFAULT NULL COMMENT '笔记参数md5',
`DT_PARAM_MODIFIED` datetime DEFAULT NULL COMMENT '笔记参数修改时间',
`DOCUMENT_SEO` varchar(100) DEFAULT NULL COMMENT '博客发布插件使用',
`DOCUMENT_AUTHOR` varchar(32) DEFAULT NULL COMMENT '助手插件设置的 author',
`DOCUMENT_KEYWORDS` varchar(300) DEFAULT NULL COMMENT '助手插件设置的 keywords',
`DOCUMENT_ABSTRACT_TEXT` varchar(255) DEFAULT NULL,
`IS_DOCUMENT_ABSTRACT_IMAGE` tinyint(4) NOT NULL DEFAULT '0',
`BODY_TEXT` longtext,
`DOCUMENT_COVER_IMAGE` tinyint(4) DEFAULT NULL,
`MARKERS` varchar(64) DEFAULT NULL,
PRIMARY KEY (`ID`),
UNIQUE KEY `uniq_doc_kb` (`DOCUMENT_GUID`,`KB_GUID`),
KEY `kb_version` (`KB_GUID`,`VERSION`),
KEY `index_kbguid_category` (`KB_GUID`,`DOCUMENT_CATEGORY`(191)),
KEY `idx_kb_datamodified` (`KB_GUID`,`DT_DATA_MODIFIED`),
FULLTEXT KEY `ft_index` (`DOCUMENT_TITLE`,`BODY_TEXT`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
其中DOCUMENT_GUID为笔记的GUID,KB_GUID为用户知识库的GUID,这个在MySQL中以二进制保存,所以是直接看就是乱码。
这么看来思路就非常清晰了,我们要先把笔记转成对应的格式,放在以DOCUMENT_GUID命名的文件夹下,最后把所有文件夹放在wiz/storage/data_root/document2/KB_GUID下面,应该就能实现导入了!
数据导入
通过上面的分析,写了个脚本直接导入:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import json
import re
import random
import pymysql
import datetime
def generate_guid():
"""
生成一个 GUID,前缀为 ff03,后面28位随机十六进制字符,总共32位。
这里固定了个前缀是因为防止导入出错,文件混淆无法删除导入的数据。
固定前缀后,如果导入出错,直接rm -rf ff03*即可回滚
"""
return "ff03" + ''.join(random.choices('0123456789abcdef', k=28))
def replace_newlines(content):
"""
将内容中的所有换行符替换成 <br>
"""
return content.replace("\n", "<br>")
def remove_html(text):
"""
删除文本中的所有 HTML 标签以及换行符
"""
# 利用正则去除所有标签
cleaned = re.sub(r'<[^>]*>', '', text)
cleaned = cleaned.replace("\n", "").replace("\r", "")
return cleaned
def format_guid(guid_str):
"""将无横杠的GUID字符串格式化为标准GUID格式"""
if len(guid_str) != 32:
raise ValueError("GUID字符串长度必须为32个字符")
parts = [
guid_str[0:8],
guid_str[8:12],
guid_str[12:16],
guid_str[16:20],
guid_str[20:]
]
return '-'.join(parts)
def process_note(note_folder, output_base, db_conn):
# 读取 info 文件
info_file = os.path.join(note_folder, "info")
with open(info_file, "r", encoding="utf-8") as f:
info_data = json.load(f)
entry = info_data["data"]["entry"]
# 提取笔记信息
snippet = entry.get("snippet", "")
modify_ms = entry.get("modifyDate")
subject = entry.get("subject", "").strip()
content = entry.get("content", "")
create_ms = entry.get("createDate")
# 如果笔记标题为空,则使用创建时间(格式化为 "YYYY-MM-DD HH:MM:SS")
if not subject:
subject = datetime.datetime.fromtimestamp(create_ms/1000).strftime("%Y-%m-%d %H:%M:%S")
# 转换时间戳(单位:毫秒)为日期字符串
dt_modify = datetime.datetime.fromtimestamp(modify_ms/1000).strftime("%Y-%m-%d %H:%M:%S")
dt_create = datetime.datetime.fromtimestamp(create_ms/1000).strftime("%Y-%m-%d %H:%M:%S")
# 生成笔记 GUID
note_guid = generate_guid()
# 创建入库文件夹
output_folder = os.path.join(output_base, format_guid(note_guid))
os.makedirs(output_folder, exist_ok=True)
# ------------------
# 处理最新版本文件
# ------------------
latest_index_path = os.path.join(note_folder, "index.html")
with open(latest_index_path, "r", encoding="utf-8") as f:
latest_content = f.read()
# 替换换行符
latest_content_processed = replace_newlines(latest_content)
# 保存到入库文件夹
with open(os.path.join(output_folder, "index.html"), "w", encoding="utf-8") as f:
f.write(latest_content_processed)
# 生成最新版本对应的 note.info 文件
note_info_data = {
"versionInfo": {
"version": modify_ms, # 使用最新版本的时间戳
"editorGuid": "00000000-0000-0000-0000-000000000000",
"clientType": "web",
"clientVersion": "4.0"
},
"resources": []
}
with open(os.path.join(output_folder, "note.info"), "w", encoding="utf-8") as f:
json.dump(note_info_data, f, ensure_ascii=False)
# --------------------
# 处理历史版本文件
# --------------------
for file_name in os.listdir(note_folder):
if file_name.startswith("index.html_"):
# 从文件名中获取时间戳部分,文件名格式:index.html_xxx
timestamp = file_name[len("index.html_"):]
version_path = os.path.join(note_folder, file_name)
with open(version_path, "r", encoding="utf-8") as f:
version_content = f.read()
version_content_processed = replace_newlines(version_content)
# 新的文件名:index.html_00{timestamp}00000000-0000-0000-0000-000000000000_web_4.0
new_index_name = f"index.html_00{timestamp}_00000000-0000-0000-0000-000000000000_web_4.0"
with open(os.path.join(output_folder, new_index_name), "w", encoding="utf-8") as f:
f.write(version_content_processed)
# 为历史版本生成对应的 note.info 文件
version_info_filename = f"note.info_00{timestamp}_00000000-0000-0000-0000-000000000000_web_4.0"
version_note_info_data = {
"versionInfo": {
"version": int(timestamp), # 转换为整数
"editorGuid": "00000000-0000-0000-0000-000000000000",
"clientType": "web",
"clientVersion": "4.0"
},
"resources": []
}
with open(os.path.join(output_folder, version_info_filename), "w", encoding="utf-8") as f:
json.dump(version_note_info_data, f, ensure_ascii=False)
# -----------------------
# 将笔记注册到 MySQL 中
# -----------------------
# 固定值定义
kb_guid_str = "00000000000000000000000000000000" # 去掉横杠后的 KB_GUID
note_guid_bin = bytes.fromhex(note_guid) # 将生成的 GUID 转为二进制
kb_guid_bin = bytes.fromhex(kb_guid_str)
version_val = -1
document_data_size = 100
document_category = "/小米便签导入/"
document_owner = "admin@wiz.cn"
document_protect = 0
document_read_count = 0
document_attachment_count = 0
document_type = ""
dt_param_modified = "1970-01-01 08:00:00"
document_icon_index = -1
# 去除 HTML 标签和换行符,生成摘要和内容文本
document_abstract_text = remove_html(snippet)
body_text = remove_html(content)
# 执行 SQL 插入
with db_conn.cursor() as cursor:
sql = """
INSERT INTO wiz_document
(DOCUMENT_GUID, KB_GUID, VERSION, DT_DATA_MODIFIED, DT_INFO_MODIFIED, DOCUMENT_DATA_SIZE, DOCUMENT_TITLE, DOCUMENT_CATEGORY, DOCUMENT_OWNER, DT_CREATED, DT_ACCESSED, DOCUMENT_ABSTRACT_TEXT, BODY_TEXT, DOCUMENT_PROTECT, DOCUMENT_READ_COUNT, DOCUMENT_ATTACHMENT_COUNT, DOCUMENT_TYPE, DT_PARAM_MODIFIED, DOCUMENT_ICON_INDEX)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
"""
cursor.execute(sql, (
note_guid_bin,
kb_guid_bin,
version_val,
dt_modify, # DT_DATA_MODIFIED
dt_modify, # DT_INFO_MODIFIED
document_data_size,
subject,
document_category,
document_owner,
dt_create, # DT_CREATED
dt_modify, # DT_ACCESSED
document_abstract_text[:254],
body_text,
document_protect,
document_read_count,
document_attachment_count,
document_type,
dt_param_modified,
document_icon_index
))
db_conn.commit()
# -----------------------
# 写入 success 文件
# -----------------------
with open(os.path.join(output_folder, "success"), "w", encoding="utf-8") as f:
f.write("success")
def main():
# 定义笔记和输出的根目录
notes_base = "notes"
output_base = "output"
os.makedirs(output_base, exist_ok=True)
# 建立 MySQL 数据库连接,请根据实际情况修改连接参数
db_conn = pymysql.connect(
host="172.17.0.2",
user="root",
port=3306,
password="password",
database="wizksent",
charset="utf8mb4"
)
# 遍历 notes 目录下的所有子文件夹(每个文件夹代表一篇笔记)
for note_dir in os.listdir(notes_base):
note_path = os.path.join(notes_base, note_dir)
if os.path.isdir(note_path):
try:
process_note(note_path, output_base, db_conn)
print(f"已处理笔记:{note_dir}")
except Exception as e:
print(f"处理笔记 {note_dir} 时出错:{e}")
db_conn.close()
if __name__ == "__main__":
main()