67 lines
1.9 KiB
Python
67 lines
1.9 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
修复旧数据的 upload_path 字段
|
|
通过 file_hash 匹配 uploads 目录中的 PDF 文件
|
|
"""
|
|
|
|
import sqlite3
|
|
import os
|
|
import hashlib
|
|
|
|
DB_PATH = 'instance/pdf_translate.db'
|
|
UPLOADS_DIR = 'uploads'
|
|
|
|
def compute_file_hash(filepath):
|
|
"""计算文件MD5"""
|
|
hasher = hashlib.md5()
|
|
with open(filepath, 'rb') as f:
|
|
hasher.update(f.read())
|
|
return hasher.hexdigest()
|
|
|
|
def main():
|
|
conn = sqlite3.connect(DB_PATH)
|
|
cursor = conn.cursor()
|
|
|
|
# 获取所有 upload_path 为空的翻译记录
|
|
cursor.execute('SELECT id, file_hash, original_filename, output_path FROM translations WHERE upload_path IS NULL')
|
|
records = cursor.fetchall()
|
|
|
|
print(f"找到 {len(records)} 条需要修复的记录")
|
|
|
|
if not records:
|
|
print("无需修复")
|
|
conn.close()
|
|
return
|
|
|
|
# 找所有上传的PDF文件
|
|
pdf_files = []
|
|
for root, dirs, files in os.walk(UPLOADS_DIR):
|
|
for f in files:
|
|
if f.endswith('.pdf'):
|
|
pdf_files.append(os.path.join(root, f))
|
|
|
|
print(f"找到 {len(pdf_files)} 个PDF文件")
|
|
|
|
# 按hash匹配
|
|
fixed_count = 0
|
|
for record in records:
|
|
id, file_hash, filename, output_path = record
|
|
|
|
# 找匹配hash的文件
|
|
for pdf_path in pdf_files:
|
|
try:
|
|
pdf_hash = compute_file_hash(pdf_path)
|
|
if pdf_hash == file_hash:
|
|
print(f"ID {id}: 找到匹配 {pdf_path}")
|
|
cursor.execute('UPDATE translations SET upload_path = ? WHERE id = ?', (pdf_path, id))
|
|
conn.commit()
|
|
fixed_count += 1
|
|
break
|
|
except Exception as e:
|
|
print(f"处理 {pdf_path} 失败: {e}")
|
|
|
|
conn.close()
|
|
print(f"修复完成,共修复 {fixed_count} 条记录")
|
|
|
|
if __name__ == '__main__':
|
|
main() |