- 实现基于 Laravel 11 和 Filament 3.X 的文档管理系统 - 添加用户认证和分组管理功能 - 实现文档上传、分类和权限控制 - 集成 Word 文档自动转换为 Markdown - 集成 Meilisearch 全文搜索引擎 - 实现文档在线预览功能 - 添加安全日志和审计功能 - 完整的简体中文界面 - 包含完整的项目文档和部署指南 技术栈: - Laravel 11.x - Filament 3.X - Meilisearch 1.5+ - Pandoc 文档转换 - Redis 队列系统 - Pest PHP 测试框架
372 lines
10 KiB
PHP
372 lines
10 KiB
PHP
<?php
|
||
|
||
namespace App\Services;
|
||
|
||
use App\Models\Document;
|
||
use Illuminate\Support\Facades\Log;
|
||
use Illuminate\Support\Facades\Process;
|
||
use Illuminate\Support\Facades\Storage;
|
||
use Illuminate\Support\Str;
|
||
|
||
/**
|
||
* 文档转换服务
|
||
* 负责将 Word 文档转换为 Markdown 格式
|
||
*/
|
||
class DocumentConversionService
|
||
{
|
||
/**
|
||
* 转换驱动
|
||
*
|
||
* @var string
|
||
*/
|
||
protected string $driver;
|
||
|
||
/**
|
||
* Pandoc 可执行文件路径
|
||
*
|
||
* @var string
|
||
*/
|
||
protected string $pandocPath;
|
||
|
||
/**
|
||
* 转换超时时间(秒)
|
||
*
|
||
* @var int
|
||
*/
|
||
protected int $timeout;
|
||
|
||
/**
|
||
* Markdown 预览长度
|
||
*
|
||
* @var int
|
||
*/
|
||
protected int $previewLength;
|
||
|
||
/**
|
||
* 构造函数
|
||
*/
|
||
public function __construct()
|
||
{
|
||
$this->driver = config('documents.conversion.driver', 'pandoc');
|
||
$this->pandocPath = config('documents.conversion.pandoc_path', 'pandoc');
|
||
$this->timeout = config('documents.conversion.timeout', 300);
|
||
$this->previewLength = config('documents.markdown.preview_length', 500);
|
||
}
|
||
|
||
/**
|
||
* 将 Word 文档转换为 Markdown
|
||
*
|
||
* @param Document $document
|
||
* @return array 返回 ['markdown' => string, 'mediaDir' => string|null, 'tempDir' => string]
|
||
* @throws \Exception
|
||
*/
|
||
public function convertToMarkdown(Document $document): array
|
||
{
|
||
if ($this->driver === 'pandoc') {
|
||
return $this->convertWithPandoc($document);
|
||
}
|
||
|
||
throw new \Exception("不支持的转换驱动: {$this->driver}");
|
||
}
|
||
|
||
/**
|
||
* 使用 Pandoc 转换文档
|
||
*
|
||
* @param Document $document
|
||
* @return array 返回 ['markdown' => string, 'mediaDir' => string|null]
|
||
* @throws \Exception
|
||
*/
|
||
protected function convertWithPandoc(Document $document): array
|
||
{
|
||
// 获取文档的完整路径
|
||
$documentPath = Storage::disk('local')->path($document->file_path);
|
||
|
||
if (!file_exists($documentPath)) {
|
||
throw new \Exception("文档文件不存在: {$documentPath}");
|
||
}
|
||
|
||
// 创建临时工作目录
|
||
$tempDir = sys_get_temp_dir() . '/pandoc_' . uniqid();
|
||
mkdir($tempDir, 0755, true);
|
||
|
||
$tempOutputPath = $tempDir . '/output.md';
|
||
|
||
try {
|
||
// 在临时目录中执行 Pandoc 转换命令
|
||
$result = Process::timeout($this->timeout)
|
||
->path($tempDir)
|
||
->run([
|
||
$this->pandocPath,
|
||
$documentPath,
|
||
'-f', $this->getInputFormat($document->mime_type),
|
||
'-t', 'markdown',
|
||
'-o', $tempOutputPath,
|
||
'--wrap=none', // 不自动换行
|
||
'--extract-media=.', // 提取媒体文件到当前目录
|
||
]);
|
||
|
||
if (!$result->successful()) {
|
||
throw new \Exception("Pandoc 转换失败: {$result->errorOutput()}");
|
||
}
|
||
|
||
// 读取转换后的 Markdown 内容
|
||
if (!file_exists($tempOutputPath)) {
|
||
throw new \Exception("转换后的 Markdown 文件不存在");
|
||
}
|
||
|
||
$markdown = file_get_contents($tempOutputPath);
|
||
|
||
if ($markdown === false) {
|
||
throw new \Exception("无法读取转换后的 Markdown 文件");
|
||
}
|
||
|
||
// 检查是否有提取的媒体文件
|
||
$mediaDir = $tempDir . '/media';
|
||
$hasMedia = is_dir($mediaDir) && count(glob($mediaDir . '/*')) > 0;
|
||
|
||
return [
|
||
'markdown' => $markdown,
|
||
'mediaDir' => $hasMedia ? $mediaDir : null,
|
||
'tempDir' => $tempDir,
|
||
];
|
||
} catch (\Exception $e) {
|
||
// 清理临时目录
|
||
$this->deleteDirectory($tempDir);
|
||
throw $e;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 递归删除目录
|
||
*
|
||
* @param string $dir 目录路径
|
||
* @return void
|
||
*/
|
||
protected function deleteDirectory(string $dir): void
|
||
{
|
||
if (!file_exists($dir)) {
|
||
return;
|
||
}
|
||
|
||
if (!is_dir($dir)) {
|
||
unlink($dir);
|
||
return;
|
||
}
|
||
|
||
$files = array_diff(scandir($dir), ['.', '..']);
|
||
foreach ($files as $file) {
|
||
$path = $dir . '/' . $file;
|
||
if (is_dir($path)) {
|
||
$this->deleteDirectory($path);
|
||
} else {
|
||
unlink($path);
|
||
}
|
||
}
|
||
|
||
rmdir($dir);
|
||
}
|
||
|
||
/**
|
||
* 根据 MIME 类型获取 Pandoc 输入格式
|
||
*
|
||
* @param string $mimeType
|
||
* @return string
|
||
*/
|
||
protected function getInputFormat(string $mimeType): string
|
||
{
|
||
return match ($mimeType) {
|
||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document' => 'docx',
|
||
'application/msword' => 'doc',
|
||
default => 'docx',
|
||
};
|
||
}
|
||
|
||
/**
|
||
* 将 Markdown 内容和媒体文件保存到存储
|
||
*
|
||
* @param Document $document
|
||
* @param string $markdown
|
||
* @param string|null $mediaDir 临时媒体目录路径
|
||
* @return string 返回 Markdown 文件路径
|
||
* @throws \Exception
|
||
*/
|
||
public function saveMarkdownToFile(Document $document, string $markdown, ?string $mediaDir = null): string
|
||
{
|
||
// 生成文件路径
|
||
$path = $this->generateMarkdownPath($document);
|
||
$directory = dirname($path);
|
||
|
||
// 如果有媒体文件,先保存它们
|
||
if ($mediaDir && is_dir($mediaDir)) {
|
||
$this->saveMediaFiles($mediaDir, $directory);
|
||
}
|
||
|
||
// 保存 Markdown 文件
|
||
$saved = Storage::disk('markdown')->put($path, $markdown);
|
||
|
||
if (!$saved) {
|
||
throw new \Exception("无法保存 Markdown 文件");
|
||
}
|
||
|
||
return $path;
|
||
}
|
||
|
||
/**
|
||
* 保存媒体文件到 storage
|
||
* 媒体文件保存在文档的 UUID 目录下的 media 子目录中
|
||
*
|
||
* @param string $sourceDir 源媒体目录
|
||
* @param string $targetDir 目标目录(相对于 markdown disk,例如:2025/12/04/{uuid})
|
||
* @return void
|
||
*/
|
||
protected function saveMediaFiles(string $sourceDir, string $targetDir): void
|
||
{
|
||
$files = glob($sourceDir . '/*');
|
||
|
||
foreach ($files as $file) {
|
||
if (is_file($file)) {
|
||
$filename = basename($file);
|
||
// 保存到文档目录下的 media 子目录
|
||
$targetPath = $targetDir . '/media/' . $filename;
|
||
|
||
// 读取文件内容
|
||
$content = file_get_contents($file);
|
||
|
||
// 保存到 storage
|
||
Storage::disk('markdown')->put($targetPath, $content);
|
||
|
||
Log::info('媒体文件已保存', [
|
||
'filename' => $filename,
|
||
'path' => $targetPath,
|
||
]);
|
||
}
|
||
}
|
||
}
|
||
|
||
/**
|
||
* 生成 Markdown 文件路径
|
||
* 使用 UUID 作为目录名,确保每个文档有独立的 media 目录
|
||
*
|
||
* @param Document $document
|
||
* @return string
|
||
*/
|
||
protected function generateMarkdownPath(Document $document): string
|
||
{
|
||
$organizeByDate = config('documents.storage.organize_by_date', true);
|
||
|
||
// 生成唯一的 UUID 作为文档目录
|
||
$uuid = Str::uuid()->toString();
|
||
|
||
if ($organizeByDate) {
|
||
// 按日期组织: YYYY/MM/DD/{uuid}/{uuid}.md
|
||
$date = $document->created_at ?? now();
|
||
$directory = $date->format('Y/m/d') . '/' . $uuid;
|
||
} else {
|
||
// 直接使用 UUID: {uuid}/{uuid}.md
|
||
$directory = $uuid;
|
||
}
|
||
|
||
// 文件名也使用相同的 UUID
|
||
$filename = $uuid . '.md';
|
||
|
||
return "{$directory}/{$filename}";
|
||
}
|
||
|
||
/**
|
||
* 获取 Markdown 内容的预览(前 N 个字符)
|
||
*
|
||
* @param string $markdown
|
||
* @param int|null $length
|
||
* @return string
|
||
*/
|
||
public function getMarkdownPreview(string $markdown, ?int $length = null): string
|
||
{
|
||
$length = $length ?? $this->previewLength;
|
||
|
||
// 移除多余的空白字符
|
||
$cleaned = preg_replace('/\s+/', ' ', $markdown);
|
||
$cleaned = trim($cleaned);
|
||
|
||
// 截取指定长度
|
||
if (mb_strlen($cleaned) <= $length) {
|
||
return $cleaned;
|
||
}
|
||
|
||
return mb_substr($cleaned, 0, $length) . '...';
|
||
}
|
||
|
||
/**
|
||
* 更新文档的 Markdown 信息
|
||
*
|
||
* @param Document $document
|
||
* @param string $markdownPath
|
||
* @return void
|
||
*/
|
||
public function updateDocumentMarkdown(Document $document, string $markdownPath): void
|
||
{
|
||
// 读取 Markdown 内容以生成预览
|
||
$markdown = Storage::disk('markdown')->get($markdownPath);
|
||
|
||
if ($markdown === false) {
|
||
Log::warning('无法读取 Markdown 文件以生成预览', [
|
||
'document_id' => $document->id,
|
||
'markdown_path' => $markdownPath,
|
||
]);
|
||
$preview = '';
|
||
} else {
|
||
$preview = $this->getMarkdownPreview($markdown);
|
||
}
|
||
|
||
// 更新文档记录
|
||
$document->update([
|
||
'markdown_path' => $markdownPath,
|
||
'markdown_preview' => $preview,
|
||
'conversion_status' => 'completed',
|
||
'conversion_error' => null,
|
||
]);
|
||
}
|
||
|
||
/**
|
||
* 处理转换失败
|
||
*
|
||
* @param Document $document
|
||
* @param \Exception $exception
|
||
* @return void
|
||
*/
|
||
public function handleConversionFailure(Document $document, \Exception $exception): void
|
||
{
|
||
Log::error('文档转换失败', [
|
||
'document_id' => $document->id,
|
||
'document_title' => $document->title,
|
||
'error' => $exception->getMessage(),
|
||
'trace' => $exception->getTraceAsString(),
|
||
]);
|
||
|
||
// 更新文档状态
|
||
$document->update([
|
||
'conversion_status' => 'failed',
|
||
'conversion_error' => $exception->getMessage(),
|
||
]);
|
||
}
|
||
|
||
/**
|
||
* 将转换任务加入队列
|
||
*
|
||
* @param Document $document
|
||
* @return void
|
||
*/
|
||
public function queueConversion(Document $document): void
|
||
{
|
||
// 更新文档状态为处理中
|
||
$document->update([
|
||
'conversion_status' => 'processing',
|
||
'conversion_error' => null,
|
||
]);
|
||
|
||
// 分发队列任务
|
||
$queue = config('documents.conversion.queue', 'documents');
|
||
\App\Jobs\ConvertDocumentToMarkdown::dispatch($document)->onQueue($queue);
|
||
}
|
||
}
|
||
|