feat: 添加修复upload_path脚本
This commit is contained in:
67
scripts/fix_upload_path.py
Normal file
67
scripts/fix_upload_path.py
Normal file
@@ -0,0 +1,67 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
修复旧数据的 upload_path 字段
|
||||
通过 file_hash 匹配 uploads 目录中的 PDF 文件
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
import os
|
||||
import hashlib
|
||||
|
||||
DB_PATH = 'instance/pdf_translate.db'
|
||||
UPLOADS_DIR = 'uploads'
|
||||
|
||||
def compute_file_hash(filepath):
|
||||
"""计算文件MD5"""
|
||||
hasher = hashlib.md5()
|
||||
with open(filepath, 'rb') as f:
|
||||
hasher.update(f.read())
|
||||
return hasher.hexdigest()
|
||||
|
||||
def main():
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# 获取所有 upload_path 为空的翻译记录
|
||||
cursor.execute('SELECT id, file_hash, original_filename, output_path FROM translations WHERE upload_path IS NULL')
|
||||
records = cursor.fetchall()
|
||||
|
||||
print(f"找到 {len(records)} 条需要修复的记录")
|
||||
|
||||
if not records:
|
||||
print("无需修复")
|
||||
conn.close()
|
||||
return
|
||||
|
||||
# 找所有上传的PDF文件
|
||||
pdf_files = []
|
||||
for root, dirs, files in os.walk(UPLOADS_DIR):
|
||||
for f in files:
|
||||
if f.endswith('.pdf'):
|
||||
pdf_files.append(os.path.join(root, f))
|
||||
|
||||
print(f"找到 {len(pdf_files)} 个PDF文件")
|
||||
|
||||
# 按hash匹配
|
||||
fixed_count = 0
|
||||
for record in records:
|
||||
id, file_hash, filename, output_path = record
|
||||
|
||||
# 找匹配hash的文件
|
||||
for pdf_path in pdf_files:
|
||||
try:
|
||||
pdf_hash = compute_file_hash(pdf_path)
|
||||
if pdf_hash == file_hash:
|
||||
print(f"ID {id}: 找到匹配 {pdf_path}")
|
||||
cursor.execute('UPDATE translations SET upload_path = ? WHERE id = ?', (pdf_path, id))
|
||||
conn.commit()
|
||||
fixed_count += 1
|
||||
break
|
||||
except Exception as e:
|
||||
print(f"处理 {pdf_path} 失败: {e}")
|
||||
|
||||
conn.close()
|
||||
print(f"修复完成,共修复 {fixed_count} 条记录")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user