152 lines
4.2 KiB
PHP
152 lines
4.2 KiB
PHP
<?php
|
||
|
||
namespace App\Services;
|
||
|
||
use App\Models\Document;
|
||
use Illuminate\Support\Facades\Log;
|
||
use Illuminate\Support\Facades\Storage;
|
||
use Illuminate\Support\Str;
|
||
use Paperdoc\Support\DocumentManager;
|
||
|
||
/**
|
||
* 文档转换服务
|
||
* 使用 paperdoc-lib 将文档(DOCX/PPTX/XLSX/PDF)转换为 Markdown
|
||
*/
|
||
class DocumentConversionService
|
||
{
|
||
protected int $previewLength;
|
||
|
||
public function __construct()
|
||
{
|
||
$this->previewLength = config('documents.markdown.preview_length', 500);
|
||
}
|
||
|
||
/**
|
||
* 将文档转换为 Markdown
|
||
*/
|
||
public function convertToMarkdown(Document $document): array
|
||
{
|
||
$documentPath = Storage::disk('local')->path($document->file_path);
|
||
|
||
if (!file_exists($documentPath)) {
|
||
throw new \Exception("文档文件不存在: {$documentPath}");
|
||
}
|
||
|
||
$doc = DocumentManager::open($documentPath, ['ocr' => false]);
|
||
$markdown = DocumentManager::renderAs($doc, 'md');
|
||
|
||
if (empty(trim($markdown))) {
|
||
throw new \Exception('文档转换后内容为空,可能是扫描件或不支持的内容格式');
|
||
}
|
||
|
||
return ['markdown' => $markdown];
|
||
}
|
||
|
||
/**
|
||
* 将 Markdown 内容保存到存储
|
||
*/
|
||
public function saveMarkdownToFile(Document $document, string $markdown): string
|
||
{
|
||
$path = $this->generateMarkdownPath($document);
|
||
|
||
$saved = Storage::disk('markdown')->put($path, $markdown);
|
||
if (!$saved) {
|
||
throw new \Exception("无法保存 Markdown 文件");
|
||
}
|
||
|
||
return $path;
|
||
}
|
||
|
||
/**
|
||
* 生成 Markdown 文件路径
|
||
*/
|
||
protected function generateMarkdownPath(Document $document): string
|
||
{
|
||
$organizeByDate = config('documents.storage.organize_by_date', true);
|
||
$uuid = Str::uuid()->toString();
|
||
|
||
if ($organizeByDate) {
|
||
$date = $document->created_at ?? now();
|
||
$directory = $date->format('Y/m/d') . '/' . $uuid;
|
||
} else {
|
||
$directory = $uuid;
|
||
}
|
||
|
||
return "{$directory}/{$uuid}.md";
|
||
}
|
||
|
||
/**
|
||
* 获取 Markdown 内容的预览(前 N 个字符)
|
||
*/
|
||
public function getMarkdownPreview(string $markdown, ?int $length = null): string
|
||
{
|
||
$length = $length ?? $this->previewLength;
|
||
$cleaned = preg_replace('/\s+/', ' ', $markdown);
|
||
$cleaned = trim($cleaned);
|
||
|
||
if (mb_strlen($cleaned) <= $length) {
|
||
return $cleaned;
|
||
}
|
||
|
||
return mb_substr($cleaned, 0, $length) . '...';
|
||
}
|
||
|
||
/**
|
||
* 更新文档的 Markdown 信息
|
||
*/
|
||
public function updateDocumentMarkdown(Document $document, string $markdownPath): void
|
||
{
|
||
$markdown = Storage::disk('markdown')->get($markdownPath);
|
||
|
||
if ($markdown === false) {
|
||
Log::warning('无法读取 Markdown 文件以生成预览', [
|
||
'document_id' => $document->id,
|
||
'markdown_path' => $markdownPath,
|
||
]);
|
||
$preview = '';
|
||
} else {
|
||
$preview = $this->getMarkdownPreview($markdown);
|
||
}
|
||
|
||
$document->update([
|
||
'markdown_path' => $markdownPath,
|
||
'markdown_preview' => $preview,
|
||
'conversion_status' => 'completed',
|
||
'conversion_error' => null,
|
||
]);
|
||
}
|
||
|
||
/**
|
||
* 处理转换失败
|
||
*/
|
||
public function handleConversionFailure(Document $document, \Exception $exception): void
|
||
{
|
||
Log::error('文档转换失败', [
|
||
'document_id' => $document->id,
|
||
'document_title' => $document->title,
|
||
'file_name' => $document->file_name,
|
||
'error' => $exception->getMessage(),
|
||
'trace' => $exception->getTraceAsString(),
|
||
]);
|
||
|
||
$document->update([
|
||
'conversion_status' => 'failed',
|
||
'conversion_error' => $exception->getMessage(),
|
||
]);
|
||
}
|
||
|
||
/**
|
||
* 将转换任务加入队列
|
||
*/
|
||
public function queueConversion(Document $document): void
|
||
{
|
||
$document->update([
|
||
'conversion_status' => 'processing',
|
||
'conversion_error' => null,
|
||
]);
|
||
|
||
$queue = config('documents.conversion.queue', 'documents');
|
||
\App\Jobs\ConvertDocumentToMarkdown::dispatch($document)->onQueue($queue);
|
||
}
|
||
}
|