fix: 修复文档转换与预览链路中的图片、文件名和错误处理问题

This commit is contained in:
2026-04-24 10:40:29 +08:00
parent 37dd58eff0
commit e935afddfe
16 changed files with 451 additions and 69 deletions

View File

@@ -79,6 +79,7 @@ class DocumentResource extends Resource
'application/vnd.openxmlformats-officedocument.presentationml.presentation',
]))
->maxSize(51200) // 50MB
->storeFileNamesIn('file_name')
->disk('local')
->directory('documents/' . date('Y/m/d'))
->visibility('private')

View File

@@ -20,7 +20,7 @@ class CreateDocument extends CreateRecord
$filePath = $data['file'];
$data['file_path'] = $filePath;
$data['file_name'] = basename($filePath);
$data['file_name'] = $data['file_name'] ?? basename($filePath);
$data['file_size'] = Storage::disk('local')->size($filePath);
$data['mime_type'] = Storage::disk('local')->mimeType($filePath);

View File

@@ -51,7 +51,7 @@ class EditDocument extends EditRecord
}
$data['file_path'] = $currentFile;
$data['file_name'] = basename($currentFile);
$data['file_name'] = $data['file_name'] ?? basename($currentFile);
$data['file_size'] = Storage::disk('local')->size($currentFile);
$data['mime_type'] = Storage::disk('local')->mimeType($currentFile);

View File

@@ -120,7 +120,7 @@ class ViewDocument extends ViewRecord
TextEntry::make('uploader.name')
->label('上传者'),
TextEntry::make('file_name')
TextEntry::make('display_file_name')
->label('文件名'),
TextEntry::make('file_size')

View File

@@ -46,7 +46,8 @@ class ConvertDocumentToMarkdown implements ShouldQueue
$markdownPath = $conversionService->saveMarkdownToFile(
$this->document,
$result['markdown']
$result['markdown'],
$result['media_files'] ?? []
);
$conversionService->updateDocumentMarkdown($this->document, $markdownPath);
@@ -56,36 +57,46 @@ class ConvertDocumentToMarkdown implements ShouldQueue
'document_title' => $this->document->title,
'markdown_path' => $markdownPath,
]);
} catch (\Exception $e) {
} catch (\Throwable $e) {
$exception = $this->normalizeException($e);
Log::error('文档转换失败', [
'document_id' => $this->document->id,
'document_title' => $this->document->title,
'file_name' => $this->document->file_name,
'attempt' => $this->attempts(),
'error' => $e->getMessage(),
'error' => $exception->getMessage(),
]);
if ($this->attempts() >= $this->tries) {
$conversionService->handleConversionFailure($this->document, $e);
$conversionService->handleConversionFailure($this->document, $exception);
}
throw $e;
throw $exception;
}
}
public function failed(\Throwable $exception): void
{
$normalized = $this->normalizeException($exception);
Log::error('文档转换任务最终失败', [
'document_id' => $this->document->id,
'document_title' => $this->document->title,
'file_name' => $this->document->file_name,
'error' => $exception->getMessage(),
'error' => $normalized->getMessage(),
]);
$conversionService = app(DocumentConversionService::class);
$conversionService->handleConversionFailure(
$this->document,
$exception instanceof \Exception ? $exception : new \Exception($exception->getMessage())
);
$conversionService->handleConversionFailure($this->document, $normalized);
}
protected function normalizeException(\Throwable $throwable): \Exception
{
if ($throwable instanceof \Exception) {
return $throwable;
}
return new \RuntimeException($throwable->getMessage(), 0, $throwable);
}
}

View File

@@ -7,6 +7,7 @@ use Illuminate\Database\Eloquent\Model;
use Illuminate\Database\Eloquent\Relations\BelongsTo;
use Illuminate\Database\Eloquent\Relations\HasMany;
use Illuminate\Support\Facades\Storage;
use Illuminate\Support\Str;
use Laravel\Scout\Searchable;
class Document extends Model
@@ -114,4 +115,33 @@ class Document extends Model
{
return !empty($this->markdown_path) && $this->conversion_status === 'completed';
}
/**
* 获取用于展示和下载的文件名
* 对历史上误保存为随机存储名的记录回退到“标题.扩展名”
*/
public function getDisplayFileNameAttribute(): string
{
$fileName = trim((string) $this->file_name);
if ($fileName !== '' && ! $this->looksLikeGeneratedStorageName($fileName)) {
return $fileName;
}
$extension = pathinfo($fileName ?: $this->file_path, PATHINFO_EXTENSION);
$title = trim((string) $this->title);
$title = preg_replace('/[<>:"\/\\\\|?*\x00-\x1F]+/u', '-', $title) ?? '';
$title = trim($title, " .-\t\n\r\0\x0B");
$title = $title !== '' ? $title : 'document';
return $extension !== '' ? "{$title}.{$extension}" : $title;
}
protected function looksLikeGeneratedStorageName(string $fileName): bool
{
$baseName = pathinfo($fileName, PATHINFO_FILENAME);
return Str::isUuid($baseName)
|| (bool) preg_match('/^[0-9A-HJKMNP-TV-Z]{26}$/i', $baseName);
}
}

View File

@@ -6,6 +6,8 @@ use App\Models\Document;
use Illuminate\Support\Facades\Log;
use Illuminate\Support\Facades\Storage;
use Illuminate\Support\Str;
use Paperdoc\Contracts\DocumentInterface;
use Paperdoc\Document\Image;
use Paperdoc\Support\DocumentManager;
/**
@@ -23,9 +25,13 @@ class DocumentConversionService
/**
* 将文档转换为 Markdown
*
* @return array{markdown: string, media_files: array<string, string>}
*/
public function convertToMarkdown(Document $document): array
{
$this->ensureConversionDependenciesAvailable();
$documentPath = Storage::disk('local')->path($document->file_path);
if (!file_exists($documentPath)) {
@@ -39,24 +45,96 @@ class DocumentConversionService
throw new \Exception('文档转换后内容为空,可能是扫描件或不支持的内容格式');
}
return ['markdown' => $markdown];
return [
'markdown' => $markdown,
'media_files' => $this->extractMarkdownMediaFiles($doc),
];
}
/**
* 确保文档转换依赖已经安装
*/
protected function ensureConversionDependenciesAvailable(): void
{
if (!class_exists(DocumentManager::class)) {
throw new \RuntimeException(
'文档转换依赖未安装paperdoc-dev/paperdoc-lib。请执行 composer install 后重试。'
);
}
}
/**
* Markdown 内容保存到存储
*
* @param array<string, string> $mediaFiles
*/
public function saveMarkdownToFile(Document $document, string $markdown): string
public function saveMarkdownToFile(Document $document, string $markdown, array $mediaFiles = []): string
{
$path = $this->generateMarkdownPath($document);
$saved = Storage::disk('markdown')->put($path, $markdown);
if (!$saved) {
throw new \Exception("无法保存 Markdown 文件");
throw new \Exception('无法保存 Markdown 文件');
}
$this->storeMarkdownMediaFiles(dirname($path), $mediaFiles);
return $path;
}
/**
* 为已存在的 Markdown 文档补齐缺失的图片资源
*/
public function ensureMarkdownMediaAssets(Document $document): void
{
$this->ensureConversionDependenciesAvailable();
if (empty($document->markdown_path)) {
return;
}
$markdown = $document->getMarkdownContent();
if (empty($markdown)) {
return;
}
if (!preg_match_all('/!\[[^\]]*]\(((?:\.\/)?media\/[^)]+)\)/', $markdown, $matches)) {
return;
}
$documentDir = dirname($document->markdown_path);
$missingRefs = [];
foreach ($matches[1] as $ref) {
$relativePath = $this->normalizeMarkdownMediaPath($ref);
if ($relativePath === null) {
continue;
}
if (!Storage::disk('markdown')->exists($documentDir . '/' . $relativePath)) {
$missingRefs[] = $relativePath;
}
}
if ($missingRefs === []) {
return;
}
$documentPath = Storage::disk('local')->path($document->file_path);
if (!file_exists($documentPath)) {
throw new \Exception("文档文件不存在: {$documentPath}");
}
$doc = DocumentManager::open($documentPath, ['ocr' => false]);
$mediaFiles = array_intersect_key(
$this->extractMarkdownMediaFiles($doc),
array_flip($missingRefs)
);
$this->storeMarkdownMediaFiles($documentDir, $mediaFiles);
}
/**
* 生成 Markdown 文件路径
*/
@@ -103,16 +181,17 @@ class DocumentConversionService
'document_id' => $document->id,
'markdown_path' => $markdownPath,
]);
$preview = '';
} else {
$preview = $this->getMarkdownPreview($markdown);
$this->getMarkdownPreview($markdown);
}
$document->update([
'markdown_path' => $markdownPath,
'conversion_status' => 'completed',
'conversion_error' => null,
]);
Document::withoutSyncingToSearch(function () use ($document, $markdownPath): void {
$document->update([
'markdown_path' => $markdownPath,
'conversion_status' => 'completed',
'conversion_error' => null,
]);
});
}
/**
@@ -128,10 +207,12 @@ class DocumentConversionService
'trace' => $exception->getTraceAsString(),
]);
$document->update([
'conversion_status' => 'failed',
'conversion_error' => $exception->getMessage(),
]);
Document::withoutSyncingToSearch(function () use ($document, $exception): void {
$document->update([
'conversion_status' => 'failed',
'conversion_error' => $exception->getMessage(),
]);
});
}
/**
@@ -139,12 +220,113 @@ class DocumentConversionService
*/
public function queueConversion(Document $document): void
{
$document->update([
'conversion_status' => 'processing',
'conversion_error' => null,
]);
Document::withoutSyncingToSearch(function () use ($document): void {
$document->update([
'conversion_status' => 'processing',
'conversion_error' => null,
]);
});
$queue = config('documents.conversion.queue', 'documents');
\App\Jobs\ConvertDocumentToMarkdown::dispatch($document)->onQueue($queue);
}
/**
* @return array<string, string>
*/
protected function extractMarkdownMediaFiles(DocumentInterface $document): array
{
$mediaFiles = [];
$fallbackIndex = 1;
foreach ($document->getSections() as $section) {
foreach ($section->getElements() as $element) {
if (!$element instanceof Image || !$element->hasData()) {
continue;
}
$relativePath = $this->normalizeMarkdownMediaPath($element->getSrc());
if ($relativePath === null) {
$relativePath = sprintf(
'media/image-%d.%s',
$fallbackIndex++,
$this->guessImageExtension($element)
);
}
$mediaFiles[$relativePath] = $element->getData();
}
}
return $mediaFiles;
}
/**
* @param array<string, string> $mediaFiles
*/
protected function storeMarkdownMediaFiles(string $documentDir, array $mediaFiles): void
{
foreach ($mediaFiles as $relativePath => $contents) {
$targetPath = $documentDir . '/' . ltrim($relativePath, '/');
$targetDirectory = dirname($targetPath);
if ($targetDirectory !== '.' && !Storage::disk('markdown')->exists($targetDirectory)) {
Storage::disk('markdown')->makeDirectory($targetDirectory);
}
Storage::disk('markdown')->put($targetPath, $contents);
}
}
protected function normalizeMarkdownMediaPath(string $path): ?string
{
$path = trim($path);
if ($path === '') {
return null;
}
if (str_contains($path, '://') || str_starts_with($path, 'data:')) {
return null;
}
$path = preg_replace('/^\.?\//', '', $path) ?? $path;
$path = str_replace('\\', '/', $path);
$path = ltrim($path, '/');
if ($path === '' || !str_starts_with($path, 'media/')) {
return null;
}
$segments = array_values(array_filter(
explode('/', $path),
fn (string $segment): bool => $segment !== '' && $segment !== '.'
));
if ($segments === []) {
return null;
}
foreach ($segments as $segment) {
if ($segment === '..') {
return null;
}
}
return implode('/', $segments);
}
protected function guessImageExtension(Image $image): string
{
return match ($image->getMimeType()) {
'image/jpeg' => 'jpg',
'image/png' => 'png',
'image/gif' => 'gif',
'image/webp' => 'webp',
'image/bmp' => 'bmp',
'image/tiff' => 'tiff',
'image/svg+xml' => 'svg',
default => pathinfo($image->getSrc(), PATHINFO_EXTENSION) ?: 'bin',
};
}
}

View File

@@ -3,7 +3,6 @@
namespace App\Services;
use App\Models\Document;
use Illuminate\Support\Facades\Storage;
class DocumentPreviewService
{
@@ -11,8 +10,6 @@ class DocumentPreviewService
* 将文档的 Markdown 内容转换为 HTML 用于预览
* 统一用于 Filament 后台内联预览和独立预览页面
*
* @param Document $document
* @return string HTML 内容
* @throws \Exception
*/
public function convertToHtml(Document $document): string
@@ -23,8 +20,6 @@ class DocumentPreviewService
/**
* Markdown 转换为 HTML
*
* @param Document $document
* @return string HTML 内容
* @throws \Exception
*/
public function convertMarkdownToHtml(Document $document): string
@@ -35,30 +30,57 @@ class DocumentPreviewService
throw new \Exception('Markdown 内容为空');
}
// 获取 Markdown 文件的目录
$markdownDir = dirname($document->markdown_path);
app(DocumentConversionService::class)->ensureMarkdownMediaAssets($document);
// 修复图片路径:将 ./media/ 替换为 /markdown/{dir}/media/
$markdownContent = preg_replace_callback(
'/\(\.\/media\/([^)]+)\)/',
function ($matches) use ($markdownDir) {
$filename = $matches[1];
return '(/markdown/' . $markdownDir . '/media/' . $filename . ')';
},
$markdownContent
);
$markdownContent = $this->stripPreviewFrontMatter($markdownContent);
$markdownContent = $this->rewriteMarkdownMediaPaths($document, $markdownContent);
// 使用 MarkdownRenderService 转换为 HTML
$renderService = app(MarkdownRenderService::class);
return $renderService->render($markdownContent);
}
protected function stripPreviewFrontMatter(string $markdownContent): string
{
if (!preg_match('/\A---\R(?P<frontmatter>.*?\R)---\R*/s', $markdownContent, $matches)) {
return $markdownContent;
}
$frontMatter = $matches['frontmatter'] ?? '';
if (!preg_match('/^(author|source_file):/m', $frontMatter)) {
return $markdownContent;
}
return (string) preg_replace('/\A---\R.*?\R---\R*/s', '', $markdownContent, 1);
}
protected function rewriteMarkdownMediaPaths(Document $document, string $markdownContent): string
{
$documentDir = dirname($document->markdown_path);
return (string) preg_replace_callback(
'/!\[(?<alt>[^\]]*)]\((?<path>(?:\.\/)?media\/[^)]+)\)/',
function (array $matches) use ($documentDir): string {
$relativePath = trim($matches['path'] ?? '');
$relativePath = preg_replace('/^\.?\//', '', $relativePath) ?? $relativePath;
$relativePath = ltrim(str_replace('\\', '/', $relativePath), '/');
$segments = array_filter(
explode('/', $documentDir . '/' . $relativePath),
fn (string $segment): bool => $segment !== ''
);
$url = '/markdown-media/' . implode('/', array_map('rawurlencode', $segments));
return sprintf('![%s](%s)', $matches['alt'] ?? '', $url);
},
$markdownContent
);
}
/**
* 检查文档是否可以预览
*
* @param Document $document
* @return bool
*/
public function canPreview(Document $document): bool
{

View File

@@ -33,7 +33,7 @@ class DocumentService
return Storage::disk('local')->download(
$document->file_path,
$document->file_name
$document->display_file_name
);
}

View File

@@ -12,7 +12,7 @@
</h3>
<div class="mt-2 text-sm text-danger-700 dark:text-danger-300 space-y-1">
<p><strong>文档:</strong>{{ $document->title }}</p>
<p><strong>文件名:</strong>{{ $document->file_name }}</p>
<p><strong>文件名:</strong>{{ $document->display_file_name }}</p>
<p><strong>失败时间:</strong>{{ $document->updated_at->format('Y年m月d日 H:i:s') }}</p>
</div>
</div>

View File

@@ -33,7 +33,7 @@
文档内容预览
</h3>
<span class="text-xs text-gray-500 dark:text-gray-400">
{{ $document->file_name }}
{{ $document->display_file_name }}
</span>
</div>
</div>

View File

@@ -12,7 +12,7 @@
</h3>
<div class="mt-3 text-sm text-danger-700 dark:text-danger-300 space-y-1">
<p><strong>文档:</strong>{{ $document->title }}</p>
<p><strong>文件名:</strong>{{ $document->file_name }}</p>
<p><strong>文件名:</strong>{{ $document->display_file_name }}</p>
<p><strong>失败时间:</strong>{{ $document->updated_at->format('Y年m月d日 H:i:s') }}</p>
</div>
</div>

View File

@@ -57,7 +57,7 @@
文档内容预览
</h3>
<span class="text-xs text-gray-500 dark:text-gray-400">
{{ $document->file_name }}
{{ $document->display_file_name }}
</span>
</div>
</div>

View File

@@ -98,18 +98,24 @@ Route::middleware(['auth'])->group(function () {
});
// 提供 markdown 目录中 media 文件的访问(需要认证)
// 路径格式: /markdown/{path}/media/{filename}
// 其中 path 可以是: 2025/12/04/{uuid} 或 {uuid}
Route::middleware(['auth'])->get('/markdown/{path}/media/{filename}', function ($path, $filename) {
// 构建完整路径
$fullPath = $path . '/media/' . $filename;
// 路径格式: /markdown-media/{path}
Route::middleware(['auth'])->get('/markdown-media/{path}', function ($path) {
$path = trim((string) $path, '/');
if (!Storage::disk('markdown')->exists($fullPath)) {
if ($path === '' || str_contains($path, '../')) {
abort(404);
}
$file = Storage::disk('markdown')->get($fullPath);
$mimeType = Storage::disk('markdown')->mimeType($fullPath);
if (!Storage::disk('markdown')->exists($path)) {
abort(404);
}
$mimeType = Storage::disk('markdown')->mimeType($path);
if (!is_string($mimeType) || !str_starts_with($mimeType, 'image/')) {
abort(404);
}
$file = Storage::disk('markdown')->get($path);
return response($file, 200)->header('Content-Type', $mimeType);
})->where('path', '.*')->where('filename', '[^/]+')->name('markdown.media');
})->where('path', '.*')->name('markdown.media');

View File

@@ -0,0 +1,101 @@
<?php
namespace Tests\Feature;
use App\Models\Document;
use App\Services\DocumentConversionService;
use App\Services\DocumentPreviewService;
use Illuminate\Foundation\Testing\RefreshDatabase;
use Illuminate\Support\Facades\Storage;
use Tests\TestCase;
class DocumentPreviewFormattingTest extends TestCase
{
use RefreshDatabase;
protected function setUp(): void
{
parent::setUp();
Storage::fake('markdown');
config(['scout.driver' => 'null']);
}
public function test_markdown_preview_strips_internal_front_matter(): void
{
$document = Document::factory()->converted()->create([
'title' => '技术文档',
'markdown_path' => '2026/04/24/test/test.md',
]);
Storage::disk('markdown')->put($document->markdown_path, <<<'MD'
---
author: 利爪然死肥宅
source_file: /tmp/demo.docx
---
# 正文标题
这是正文内容。
MD);
$html = app(DocumentPreviewService::class)->convertMarkdownToHtml($document);
$this->assertStringContainsString('正文标题', $html);
$this->assertStringContainsString('这是正文内容。', $html);
$this->assertStringNotContainsString('author:', $html);
$this->assertStringNotContainsString('source_file:', $html);
}
public function test_markdown_preview_rewrites_media_links_to_authenticated_route(): void
{
$document = Document::factory()->converted()->create([
'title' => '技术文档',
'markdown_path' => '2026/04/24/test-links/test.md',
]);
Storage::disk('markdown')->put($document->markdown_path, <<<'MD'
# 图片示例
![示意图](media/image2.png)
MD);
Storage::disk('markdown')->put('2026/04/24/test-links/media/image2.png', 'fake-image-binary');
$html = app(DocumentPreviewService::class)->convertMarkdownToHtml($document);
$this->assertStringContainsString('/markdown-media/2026/04/24/test-links/media/image2.png', $html);
}
public function test_generated_storage_name_falls_back_to_title_for_display(): void
{
$document = Document::factory()->create([
'title' => '技术文档',
'file_name' => '01KPW4SQJTT5X15QPZ412WGSFM.docx',
'file_path' => 'documents/2026/04/23/01KPW4SQJTT5X15QPZ412WGSFM.docx',
]);
$this->assertSame('技术文档.docx', $document->display_file_name);
}
public function test_save_markdown_to_file_persists_media_assets(): void
{
$document = Document::factory()->create([
'title' => '图片文档',
]);
$path = app(DocumentConversionService::class)->saveMarkdownToFile(
$document,
"# 图片文档\n\n![示意图](media/image2.png)\n",
[
'media/image2.png' => 'image-binary',
'media/nested/image3.png' => 'nested-image-binary',
]
);
$documentDir = dirname($path);
Storage::disk('markdown')->assertExists($path);
Storage::disk('markdown')->assertExists($documentDir . '/media/image2.png');
Storage::disk('markdown')->assertExists($documentDir . '/media/nested/image3.png');
}
}

View File

@@ -141,6 +141,35 @@ class SwooleQueueCompatibilityTest extends TestCase
$job->handle($conversionService);
}
/**
* 测试队列任务可以包装底层 Throwable
*
* @test
*/
public function test_queue_job_wraps_throwables_from_conversion_service()
{
$user = User::factory()->create();
$document = Document::factory()->create([
'uploaded_by' => $user->id,
'title' => 'Throwable 测试文档',
'file_path' => 'throwable-test.docx',
]);
$conversionService = $this->createMock(DocumentConversionService::class);
$conversionService->expects($this->once())
->method('convertToMarkdown')
->willThrowException(new \Error('底层转换错误'));
$this->app->instance(DocumentConversionService::class, $conversionService);
$job = new ConvertDocumentToMarkdown($document);
$this->expectException(\RuntimeException::class);
$this->expectExceptionMessage('底层转换错误');
$job->handle($conversionService);
}
/**
* 测试队列任务的重试机制
*