fix: 修复文档转换与预览链路中的图片、文件名和错误处理问题

This commit is contained in:
2026-04-24 10:40:29 +08:00
parent 37dd58eff0
commit e935afddfe
16 changed files with 451 additions and 69 deletions

View File

@@ -6,6 +6,8 @@ use App\Models\Document;
use Illuminate\Support\Facades\Log;
use Illuminate\Support\Facades\Storage;
use Illuminate\Support\Str;
use Paperdoc\Contracts\DocumentInterface;
use Paperdoc\Document\Image;
use Paperdoc\Support\DocumentManager;
/**
@@ -23,9 +25,13 @@ class DocumentConversionService
/**
* 将文档转换为 Markdown
*
* @return array{markdown: string, media_files: array<string, string>}
*/
public function convertToMarkdown(Document $document): array
{
$this->ensureConversionDependenciesAvailable();
$documentPath = Storage::disk('local')->path($document->file_path);
if (!file_exists($documentPath)) {
@@ -39,24 +45,96 @@ class DocumentConversionService
throw new \Exception('文档转换后内容为空,可能是扫描件或不支持的内容格式');
}
return ['markdown' => $markdown];
return [
'markdown' => $markdown,
'media_files' => $this->extractMarkdownMediaFiles($doc),
];
}
/**
* 确保文档转换依赖已经安装
*/
protected function ensureConversionDependenciesAvailable(): void
{
if (!class_exists(DocumentManager::class)) {
throw new \RuntimeException(
'文档转换依赖未安装paperdoc-dev/paperdoc-lib。请执行 composer install 后重试。'
);
}
}
/**
* Markdown 内容保存到存储
*
* @param array<string, string> $mediaFiles
*/
public function saveMarkdownToFile(Document $document, string $markdown): string
public function saveMarkdownToFile(Document $document, string $markdown, array $mediaFiles = []): string
{
$path = $this->generateMarkdownPath($document);
$saved = Storage::disk('markdown')->put($path, $markdown);
if (!$saved) {
throw new \Exception("无法保存 Markdown 文件");
throw new \Exception('无法保存 Markdown 文件');
}
$this->storeMarkdownMediaFiles(dirname($path), $mediaFiles);
return $path;
}
/**
* 为已存在的 Markdown 文档补齐缺失的图片资源
*/
public function ensureMarkdownMediaAssets(Document $document): void
{
$this->ensureConversionDependenciesAvailable();
if (empty($document->markdown_path)) {
return;
}
$markdown = $document->getMarkdownContent();
if (empty($markdown)) {
return;
}
if (!preg_match_all('/!\[[^\]]*]\(((?:\.\/)?media\/[^)]+)\)/', $markdown, $matches)) {
return;
}
$documentDir = dirname($document->markdown_path);
$missingRefs = [];
foreach ($matches[1] as $ref) {
$relativePath = $this->normalizeMarkdownMediaPath($ref);
if ($relativePath === null) {
continue;
}
if (!Storage::disk('markdown')->exists($documentDir . '/' . $relativePath)) {
$missingRefs[] = $relativePath;
}
}
if ($missingRefs === []) {
return;
}
$documentPath = Storage::disk('local')->path($document->file_path);
if (!file_exists($documentPath)) {
throw new \Exception("文档文件不存在: {$documentPath}");
}
$doc = DocumentManager::open($documentPath, ['ocr' => false]);
$mediaFiles = array_intersect_key(
$this->extractMarkdownMediaFiles($doc),
array_flip($missingRefs)
);
$this->storeMarkdownMediaFiles($documentDir, $mediaFiles);
}
/**
* 生成 Markdown 文件路径
*/
@@ -103,16 +181,17 @@ class DocumentConversionService
'document_id' => $document->id,
'markdown_path' => $markdownPath,
]);
$preview = '';
} else {
$preview = $this->getMarkdownPreview($markdown);
$this->getMarkdownPreview($markdown);
}
$document->update([
'markdown_path' => $markdownPath,
'conversion_status' => 'completed',
'conversion_error' => null,
]);
Document::withoutSyncingToSearch(function () use ($document, $markdownPath): void {
$document->update([
'markdown_path' => $markdownPath,
'conversion_status' => 'completed',
'conversion_error' => null,
]);
});
}
/**
@@ -128,10 +207,12 @@ class DocumentConversionService
'trace' => $exception->getTraceAsString(),
]);
$document->update([
'conversion_status' => 'failed',
'conversion_error' => $exception->getMessage(),
]);
Document::withoutSyncingToSearch(function () use ($document, $exception): void {
$document->update([
'conversion_status' => 'failed',
'conversion_error' => $exception->getMessage(),
]);
});
}
/**
@@ -139,12 +220,113 @@ class DocumentConversionService
*/
public function queueConversion(Document $document): void
{
$document->update([
'conversion_status' => 'processing',
'conversion_error' => null,
]);
Document::withoutSyncingToSearch(function () use ($document): void {
$document->update([
'conversion_status' => 'processing',
'conversion_error' => null,
]);
});
$queue = config('documents.conversion.queue', 'documents');
\App\Jobs\ConvertDocumentToMarkdown::dispatch($document)->onQueue($queue);
}
/**
* @return array<string, string>
*/
protected function extractMarkdownMediaFiles(DocumentInterface $document): array
{
$mediaFiles = [];
$fallbackIndex = 1;
foreach ($document->getSections() as $section) {
foreach ($section->getElements() as $element) {
if (!$element instanceof Image || !$element->hasData()) {
continue;
}
$relativePath = $this->normalizeMarkdownMediaPath($element->getSrc());
if ($relativePath === null) {
$relativePath = sprintf(
'media/image-%d.%s',
$fallbackIndex++,
$this->guessImageExtension($element)
);
}
$mediaFiles[$relativePath] = $element->getData();
}
}
return $mediaFiles;
}
/**
* @param array<string, string> $mediaFiles
*/
protected function storeMarkdownMediaFiles(string $documentDir, array $mediaFiles): void
{
foreach ($mediaFiles as $relativePath => $contents) {
$targetPath = $documentDir . '/' . ltrim($relativePath, '/');
$targetDirectory = dirname($targetPath);
if ($targetDirectory !== '.' && !Storage::disk('markdown')->exists($targetDirectory)) {
Storage::disk('markdown')->makeDirectory($targetDirectory);
}
Storage::disk('markdown')->put($targetPath, $contents);
}
}
protected function normalizeMarkdownMediaPath(string $path): ?string
{
$path = trim($path);
if ($path === '') {
return null;
}
if (str_contains($path, '://') || str_starts_with($path, 'data:')) {
return null;
}
$path = preg_replace('/^\.?\//', '', $path) ?? $path;
$path = str_replace('\\', '/', $path);
$path = ltrim($path, '/');
if ($path === '' || !str_starts_with($path, 'media/')) {
return null;
}
$segments = array_values(array_filter(
explode('/', $path),
fn (string $segment): bool => $segment !== '' && $segment !== '.'
));
if ($segments === []) {
return null;
}
foreach ($segments as $segment) {
if ($segment === '..') {
return null;
}
}
return implode('/', $segments);
}
protected function guessImageExtension(Image $image): string
{
return match ($image->getMimeType()) {
'image/jpeg' => 'jpg',
'image/png' => 'png',
'image/gif' => 'gif',
'image/webp' => 'webp',
'image/bmp' => 'bmp',
'image/tiff' => 'tiff',
'image/svg+xml' => 'svg',
default => pathinfo($image->getSrc(), PATHINFO_EXTENSION) ?: 'bin',
};
}
}

View File

@@ -3,7 +3,6 @@
namespace App\Services;
use App\Models\Document;
use Illuminate\Support\Facades\Storage;
class DocumentPreviewService
{
@@ -11,8 +10,6 @@ class DocumentPreviewService
* 将文档的 Markdown 内容转换为 HTML 用于预览
* 统一用于 Filament 后台内联预览和独立预览页面
*
* @param Document $document
* @return string HTML 内容
* @throws \Exception
*/
public function convertToHtml(Document $document): string
@@ -23,8 +20,6 @@ class DocumentPreviewService
/**
* Markdown 转换为 HTML
*
* @param Document $document
* @return string HTML 内容
* @throws \Exception
*/
public function convertMarkdownToHtml(Document $document): string
@@ -35,30 +30,57 @@ class DocumentPreviewService
throw new \Exception('Markdown 内容为空');
}
// 获取 Markdown 文件的目录
$markdownDir = dirname($document->markdown_path);
app(DocumentConversionService::class)->ensureMarkdownMediaAssets($document);
// 修复图片路径:将 ./media/ 替换为 /markdown/{dir}/media/
$markdownContent = preg_replace_callback(
'/\(\.\/media\/([^)]+)\)/',
function ($matches) use ($markdownDir) {
$filename = $matches[1];
return '(/markdown/' . $markdownDir . '/media/' . $filename . ')';
},
$markdownContent
);
$markdownContent = $this->stripPreviewFrontMatter($markdownContent);
$markdownContent = $this->rewriteMarkdownMediaPaths($document, $markdownContent);
// 使用 MarkdownRenderService 转换为 HTML
$renderService = app(MarkdownRenderService::class);
return $renderService->render($markdownContent);
}
protected function stripPreviewFrontMatter(string $markdownContent): string
{
if (!preg_match('/\A---\R(?P<frontmatter>.*?\R)---\R*/s', $markdownContent, $matches)) {
return $markdownContent;
}
$frontMatter = $matches['frontmatter'] ?? '';
if (!preg_match('/^(author|source_file):/m', $frontMatter)) {
return $markdownContent;
}
return (string) preg_replace('/\A---\R.*?\R---\R*/s', '', $markdownContent, 1);
}
protected function rewriteMarkdownMediaPaths(Document $document, string $markdownContent): string
{
$documentDir = dirname($document->markdown_path);
return (string) preg_replace_callback(
'/!\[(?<alt>[^\]]*)]\((?<path>(?:\.\/)?media\/[^)]+)\)/',
function (array $matches) use ($documentDir): string {
$relativePath = trim($matches['path'] ?? '');
$relativePath = preg_replace('/^\.?\//', '', $relativePath) ?? $relativePath;
$relativePath = ltrim(str_replace('\\', '/', $relativePath), '/');
$segments = array_filter(
explode('/', $documentDir . '/' . $relativePath),
fn (string $segment): bool => $segment !== ''
);
$url = '/markdown-media/' . implode('/', array_map('rawurlencode', $segments));
return sprintf('![%s](%s)', $matches['alt'] ?? '', $url);
},
$markdownContent
);
}
/**
* 检查文档是否可以预览
*
* @param Document $document
* @return bool
*/
public function canPreview(Document $document): bool
{

View File

@@ -33,7 +33,7 @@ class DocumentService
return Storage::disk('local')->download(
$document->file_path,
$document->file_name
$document->display_file_name
);
}