refactor: 重构知识库文件上传和处理, 支持 pdf
This commit is contained in:
@@ -40,13 +40,13 @@ class DocumentResource extends Resource
|
||||
public static function getEloquentQuery(): Builder
|
||||
{
|
||||
$query = parent::getEloquentQuery();
|
||||
|
||||
|
||||
// 应用 accessibleBy 作用域,确保用户只能看到有权限的文档
|
||||
$user = auth()->user();
|
||||
if ($user) {
|
||||
$query->accessibleBy($user);
|
||||
}
|
||||
|
||||
|
||||
return $query;
|
||||
}
|
||||
|
||||
@@ -60,27 +60,32 @@ class DocumentResource extends Resource
|
||||
->maxLength(255)
|
||||
->placeholder('请输入文档标题')
|
||||
->columnSpanFull(),
|
||||
|
||||
|
||||
Forms\Components\Textarea::make('description')
|
||||
->label('文档描述')
|
||||
->rows(3)
|
||||
->maxLength(65535)
|
||||
->placeholder('请输入文档描述(可选)')
|
||||
->columnSpanFull(),
|
||||
|
||||
|
||||
Forms\Components\FileUpload::make('file')
|
||||
->label('文档文件')
|
||||
->required()
|
||||
->acceptedFileTypes(['application/msword', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'])
|
||||
->acceptedFileTypes(config('documents.supported_formats.mime_types', [
|
||||
'application/msword',
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
'application/pdf',
|
||||
'application/vnd.ms-powerpoint',
|
||||
'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
||||
]))
|
||||
->maxSize(51200) // 50MB
|
||||
->disk('local')
|
||||
->directory('documents/' . date('Y/m/d'))
|
||||
->visibility('private')
|
||||
->downloadable()
|
||||
->preserveFilenames() // 保留原始文件名
|
||||
->helperText('仅支持 .doc 和 .docx 格式,最大 50MB')
|
||||
->helperText('支持 .docx/.pptx/.xlsx/.pdf 格式,最大 50MB')
|
||||
->columnSpanFull(),
|
||||
|
||||
|
||||
Forms\Components\Select::make('type')
|
||||
->label('文档类型')
|
||||
->required()
|
||||
@@ -90,18 +95,19 @@ class DocumentResource extends Resource
|
||||
])
|
||||
->default('global')
|
||||
->reactive()
|
||||
->afterStateUpdated(fn ($state, callable $set) =>
|
||||
->afterStateUpdated(
|
||||
fn($state, callable $set) =>
|
||||
$state === 'global' ? $set('group_id', null) : null
|
||||
)
|
||||
->helperText('全局知识库所有用户可见,专用知识库仅指定分组可见'),
|
||||
|
||||
|
||||
Forms\Components\Select::make('group_id')
|
||||
->label('所属分组')
|
||||
->relationship('group', 'name')
|
||||
->searchable()
|
||||
->preload()
|
||||
->required(fn (Forms\Get $get): bool => $get('type') === 'dedicated')
|
||||
->visible(fn (Forms\Get $get): bool => $get('type') === 'dedicated')
|
||||
->required(fn(Forms\Get $get): bool => $get('type') === 'dedicated')
|
||||
->visible(fn(Forms\Get $get): bool => $get('type') === 'dedicated')
|
||||
->helperText('专用知识库必须选择所属分组'),
|
||||
]);
|
||||
}
|
||||
@@ -122,51 +128,51 @@ class DocumentResource extends Resource
|
||||
}
|
||||
return null;
|
||||
}),
|
||||
|
||||
|
||||
Tables\Columns\TextColumn::make('type')
|
||||
->label('文档类型')
|
||||
->badge()
|
||||
->color(fn (string $state): string => match ($state) {
|
||||
->color(fn(string $state): string => match ($state) {
|
||||
'global' => 'success',
|
||||
'dedicated' => 'warning',
|
||||
})
|
||||
->formatStateUsing(fn (string $state): string => match ($state) {
|
||||
->formatStateUsing(fn(string $state): string => match ($state) {
|
||||
'global' => '全局知识库',
|
||||
'dedicated' => '专用知识库',
|
||||
default => $state,
|
||||
})
|
||||
->sortable(),
|
||||
|
||||
|
||||
Tables\Columns\TextColumn::make('group.name')
|
||||
->label('所属分组')
|
||||
->searchable()
|
||||
->sortable()
|
||||
->placeholder('—')
|
||||
->toggleable(),
|
||||
|
||||
|
||||
Tables\Columns\TextColumn::make('uploader.name')
|
||||
->label('上传者')
|
||||
->searchable()
|
||||
->sortable()
|
||||
->toggleable(),
|
||||
|
||||
|
||||
Tables\Columns\TextColumn::make('file_size')
|
||||
->label('文件大小')
|
||||
->formatStateUsing(fn ($state): string => self::formatFileSize($state))
|
||||
->formatStateUsing(fn($state): string => self::formatFileSize($state))
|
||||
->sortable()
|
||||
->toggleable(),
|
||||
|
||||
|
||||
Tables\Columns\TextColumn::make('conversion_status')
|
||||
->label('转换状态')
|
||||
->badge()
|
||||
->color(fn (?string $state): string => match ($state) {
|
||||
->color(fn(?string $state): string => match ($state) {
|
||||
'completed' => 'success',
|
||||
'processing' => 'info',
|
||||
'pending' => 'warning',
|
||||
'failed' => 'danger',
|
||||
default => 'gray',
|
||||
})
|
||||
->formatStateUsing(fn (?string $state): string => match ($state) {
|
||||
->formatStateUsing(fn(?string $state): string => match ($state) {
|
||||
'completed' => '已完成',
|
||||
'processing' => '转换中',
|
||||
'pending' => '等待转换',
|
||||
@@ -175,13 +181,13 @@ class DocumentResource extends Resource
|
||||
})
|
||||
->sortable()
|
||||
->toggleable(),
|
||||
|
||||
|
||||
Tables\Columns\TextColumn::make('created_at')
|
||||
->label('上传时间')
|
||||
->dateTime('Y年m月d日 H:i')
|
||||
->sortable()
|
||||
->toggleable(),
|
||||
|
||||
|
||||
Tables\Columns\TextColumn::make('updated_at')
|
||||
->label('更新时间')
|
||||
->dateTime('Y年m月d日 H:i')
|
||||
@@ -196,21 +202,21 @@ class DocumentResource extends Resource
|
||||
'dedicated' => '专用知识库',
|
||||
])
|
||||
->placeholder('全部类型'),
|
||||
|
||||
|
||||
Tables\Filters\SelectFilter::make('group_id')
|
||||
->label('所属分组')
|
||||
->relationship('group', 'name')
|
||||
->searchable()
|
||||
->preload()
|
||||
->placeholder('全部分组'),
|
||||
|
||||
|
||||
Tables\Filters\SelectFilter::make('uploaded_by')
|
||||
->label('上传者')
|
||||
->relationship('uploader', 'name')
|
||||
->searchable()
|
||||
->preload()
|
||||
->placeholder('全部上传者'),
|
||||
|
||||
|
||||
Tables\Filters\SelectFilter::make('conversion_status')
|
||||
->label('转换状态')
|
||||
->options([
|
||||
@@ -226,32 +232,29 @@ class DocumentResource extends Resource
|
||||
->label('重试转换')
|
||||
->icon('heroicon-o-arrow-path')
|
||||
->color('warning')
|
||||
->visible(fn (Document $record): bool =>
|
||||
->visible(
|
||||
fn(Document $record): bool =>
|
||||
in_array($record->conversion_status, ['failed', 'processing', 'pending'])
|
||||
)
|
||||
->requiresConfirmation()
|
||||
->modalHeading('重试文档转换')
|
||||
->modalDescription(fn (Document $record): string =>
|
||||
'确定要重新转换文档 "' . $record->title . '" 吗?' .
|
||||
"\n\n当前状态:" . match($record->conversion_status) {
|
||||
'failed' => '转换失败',
|
||||
'processing' => '转换中(可能卡住)',
|
||||
'pending' => '等待转换',
|
||||
default => $record->conversion_status,
|
||||
} .
|
||||
($record->conversion_error ? "\n\n错误信息:" . $record->conversion_error : '')
|
||||
->modalDescription(
|
||||
fn(Document $record): string =>
|
||||
'确定要重新转换文档 "' . $record->title . '" 吗?' .
|
||||
"\n\n当前状态:" . match ($record->conversion_status) {
|
||||
'failed' => '转换失败',
|
||||
'processing' => '转换中(可能卡住)',
|
||||
'pending' => '等待转换',
|
||||
default => $record->conversion_status,
|
||||
} .
|
||||
($record->conversion_error ? "\n\n错误信息:" . $record->conversion_error : '')
|
||||
)
|
||||
->modalSubmitActionLabel('确认重试')
|
||||
->action(function (Document $record) {
|
||||
try {
|
||||
// 重置转换状态
|
||||
$record->conversion_status = 'pending';
|
||||
$record->conversion_error = null;
|
||||
$record->save();
|
||||
|
||||
// 重新派发转换任务
|
||||
\App\Jobs\ConvertDocumentToMarkdown::dispatch($record);
|
||||
|
||||
app(\App\Services\DocumentConversionService::class)
|
||||
->queueConversion($record);
|
||||
|
||||
\Filament\Notifications\Notification::make()
|
||||
->success()
|
||||
->title('重试成功')
|
||||
@@ -269,11 +272,13 @@ class DocumentResource extends Resource
|
||||
->label('查看错误')
|
||||
->icon('heroicon-o-exclamation-triangle')
|
||||
->color('danger')
|
||||
->visible(fn (Document $record): bool =>
|
||||
->visible(
|
||||
fn(Document $record): bool =>
|
||||
$record->conversion_status === 'failed' && !empty($record->conversion_error)
|
||||
)
|
||||
->modalHeading('转换错误详情')
|
||||
->modalContent(fn (Document $record): \Illuminate\Contracts\View\View =>
|
||||
->modalContent(
|
||||
fn(Document $record): \Illuminate\Contracts\View\View =>
|
||||
view('filament.modals.conversion-error', [
|
||||
'document' => $record,
|
||||
'error' => $record->conversion_error,
|
||||
@@ -285,12 +290,13 @@ class DocumentResource extends Resource
|
||||
->label('预览 Markdown')
|
||||
->icon('heroicon-o-eye')
|
||||
->color('info')
|
||||
->visible(fn (Document $record): bool => $record->conversion_status === 'completed')
|
||||
->url(fn (Document $record): string => route('documents.preview', $record))
|
||||
->visible(fn(Document $record): bool => $record->conversion_status === 'completed')
|
||||
->url(fn(Document $record): string => route('documents.preview', $record))
|
||||
->openUrlInNewTab()
|
||||
->tooltip(fn (Document $record): ?string =>
|
||||
$record->conversion_status !== 'completed'
|
||||
? '文档尚未完成转换'
|
||||
->tooltip(
|
||||
fn(Document $record): ?string =>
|
||||
$record->conversion_status !== 'completed'
|
||||
? '文档尚未完成转换'
|
||||
: null
|
||||
),
|
||||
Tables\Actions\Action::make('download')
|
||||
@@ -300,11 +306,11 @@ class DocumentResource extends Resource
|
||||
->action(function (Document $record) {
|
||||
$documentService = app(\App\Services\DocumentService::class);
|
||||
$user = auth()->user();
|
||||
|
||||
|
||||
try {
|
||||
// 记录下载日志
|
||||
$documentService->logDownload($record, $user);
|
||||
|
||||
|
||||
// 返回文件下载响应
|
||||
return $documentService->downloadDocument($record, $user);
|
||||
} catch (\Exception $e) {
|
||||
@@ -313,7 +319,7 @@ class DocumentResource extends Resource
|
||||
->title('下载失败')
|
||||
->body($e->getMessage())
|
||||
->send();
|
||||
|
||||
|
||||
return null;
|
||||
}
|
||||
}),
|
||||
@@ -341,13 +347,13 @@ class DocumentResource extends Resource
|
||||
if ($bytes === null) {
|
||||
return '—';
|
||||
}
|
||||
|
||||
|
||||
$units = ['B', 'KB', 'MB', 'GB'];
|
||||
$bytes = max($bytes, 0);
|
||||
$pow = floor(($bytes ? log($bytes) : 0) / log(1024));
|
||||
$pow = min($pow, count($units) - 1);
|
||||
$bytes /= (1 << (10 * $pow));
|
||||
|
||||
|
||||
return round($bytes, 2) . ' ' . $units[$pow];
|
||||
}
|
||||
|
||||
|
||||
@@ -3,8 +3,6 @@
|
||||
namespace App\Filament\Resources\DocumentResource\Pages;
|
||||
|
||||
use App\Filament\Resources\DocumentResource;
|
||||
use App\Services\DocumentService;
|
||||
use Filament\Actions;
|
||||
use Filament\Notifications\Notification;
|
||||
use Filament\Resources\Pages\CreateRecord;
|
||||
use Illuminate\Support\Facades\Auth;
|
||||
@@ -16,37 +14,28 @@ class CreateDocument extends CreateRecord
|
||||
|
||||
protected function mutateFormDataBeforeCreate(array $data): array
|
||||
{
|
||||
// 设置上传者为当前用户
|
||||
$data['uploaded_by'] = Auth::id();
|
||||
|
||||
// 如果是全局文档,确保 group_id 为 null
|
||||
|
||||
if ($data['type'] === 'global') {
|
||||
$data['group_id'] = null;
|
||||
}
|
||||
|
||||
// 处理文件上传
|
||||
|
||||
if (isset($data['file'])) {
|
||||
$filePath = $data['file'];
|
||||
|
||||
// 获取原始文件名(由于使用了 preserveFilenames(),basename 就是原始文件名)
|
||||
$originalFileName = basename($filePath);
|
||||
|
||||
// 保存文件信息
|
||||
|
||||
$data['file_path'] = $filePath;
|
||||
$data['file_name'] = $originalFileName; // 保存原始文件名
|
||||
$data['file_name'] = basename($filePath);
|
||||
$data['file_size'] = Storage::disk('local')->size($filePath);
|
||||
$data['mime_type'] = Storage::disk('local')->mimeType($filePath);
|
||||
|
||||
// 移除临时的 file 字段
|
||||
|
||||
unset($data['file']);
|
||||
}
|
||||
|
||||
|
||||
return $data;
|
||||
}
|
||||
|
||||
protected function afterCreate(): void
|
||||
{
|
||||
// 文档创建后,触发转换任务
|
||||
$conversionService = app(\App\Services\DocumentConversionService::class);
|
||||
$conversionService->queueConversion($this->record);
|
||||
}
|
||||
|
||||
@@ -6,12 +6,15 @@ use App\Filament\Resources\DocumentResource;
|
||||
use Filament\Actions;
|
||||
use Filament\Notifications\Notification;
|
||||
use Filament\Resources\Pages\EditRecord;
|
||||
use Illuminate\Support\Facades\Log;
|
||||
use Illuminate\Support\Facades\Storage;
|
||||
|
||||
class EditDocument extends EditRecord
|
||||
{
|
||||
protected static string $resource = DocumentResource::class;
|
||||
|
||||
private ?string $previousFilePath = null;
|
||||
|
||||
protected function getHeaderActions(): array
|
||||
{
|
||||
return [
|
||||
@@ -24,60 +27,55 @@ class EditDocument extends EditRecord
|
||||
|
||||
protected function mutateFormDataBeforeFill(array $data): array
|
||||
{
|
||||
// 将文件路径设置到 file 字段以便显示
|
||||
$this->previousFilePath = $data['file_path'] ?? null;
|
||||
|
||||
if (isset($data['file_path'])) {
|
||||
$data['file'] = $data['file_path'];
|
||||
}
|
||||
|
||||
|
||||
return $data;
|
||||
}
|
||||
|
||||
protected function mutateFormDataBeforeSave(array $data): array
|
||||
{
|
||||
// 如果是全局文档,确保 group_id 为 null
|
||||
if ($data['type'] === 'global') {
|
||||
$data['group_id'] = null;
|
||||
}
|
||||
|
||||
// 处理文件更新
|
||||
if (isset($data['file']) && $data['file'] !== $this->record->file_path) {
|
||||
$filePath = $data['file'];
|
||||
|
||||
// 删除旧的 Word 文件
|
||||
if ($this->record->file_path && Storage::disk('local')->exists($this->record->file_path)) {
|
||||
Storage::disk('local')->delete($this->record->file_path);
|
||||
|
||||
$currentFile = $data['file'] ?? null;
|
||||
|
||||
// 检测文件是否变更:与填充时记录的原始路径比较
|
||||
if ($currentFile && $currentFile !== $this->previousFilePath) {
|
||||
// 删除旧文件
|
||||
if ($this->previousFilePath && Storage::disk('local')->exists($this->previousFilePath)) {
|
||||
Storage::disk('local')->delete($this->previousFilePath);
|
||||
}
|
||||
|
||||
// 删除旧的 Markdown 文件
|
||||
if ($this->record->markdown_path && Storage::disk('markdown')->exists($this->record->markdown_path)) {
|
||||
Storage::disk('markdown')->delete($this->record->markdown_path);
|
||||
}
|
||||
|
||||
// 获取原始文件名(由于使用了 preserveFilenames(),basename 就是原始文件名)
|
||||
$originalFileName = basename($filePath);
|
||||
|
||||
// 更新文件信息
|
||||
$data['file_path'] = $filePath;
|
||||
$data['file_name'] = $originalFileName; // 保存原始文件名
|
||||
$data['file_size'] = Storage::disk('local')->size($filePath);
|
||||
$data['mime_type'] = Storage::disk('local')->mimeType($filePath);
|
||||
|
||||
// 重置转换状态,准备重新转换
|
||||
|
||||
$data['file_path'] = $currentFile;
|
||||
$data['file_name'] = basename($currentFile);
|
||||
$data['file_size'] = Storage::disk('local')->size($currentFile);
|
||||
$data['mime_type'] = Storage::disk('local')->mimeType($currentFile);
|
||||
|
||||
// 重置转换状态,触发重新转换
|
||||
$data['conversion_status'] = 'pending';
|
||||
$data['markdown_path'] = null;
|
||||
$data['markdown_preview'] = null;
|
||||
$data['conversion_error'] = null;
|
||||
}
|
||||
|
||||
// 移除临时的 file 字段
|
||||
|
||||
unset($data['file']);
|
||||
|
||||
|
||||
return $data;
|
||||
}
|
||||
|
||||
protected function afterSave(): void
|
||||
{
|
||||
// 如果文档的转换状态是 pending,说明文件已更新,需要触发重新转换
|
||||
// 刷新模型以获取最新数据库状态
|
||||
$this->record->refresh();
|
||||
|
||||
if ($this->record->conversion_status === 'pending') {
|
||||
$conversionService = app(\App\Services\DocumentConversionService::class);
|
||||
$conversionService->queueConversion($this->record);
|
||||
|
||||
@@ -34,21 +34,15 @@ class ViewDocument extends ViewRecord
|
||||
->modalSubmitActionLabel('确认重试')
|
||||
->action(function () {
|
||||
try {
|
||||
// 重置转换状态
|
||||
$this->record->conversion_status = 'pending';
|
||||
$this->record->conversion_error = null;
|
||||
$this->record->save();
|
||||
|
||||
// 重新派发转换任务
|
||||
\App\Jobs\ConvertDocumentToMarkdown::dispatch($this->record);
|
||||
|
||||
app(\App\Services\DocumentConversionService::class)
|
||||
->queueConversion($this->record);
|
||||
|
||||
Notification::make()
|
||||
->success()
|
||||
->title('重试成功')
|
||||
->body('文档转换任务已重新加入队列,请稍后查看转换结果。')
|
||||
->send();
|
||||
|
||||
// 刷新页面数据
|
||||
|
||||
$this->refreshFormData([
|
||||
'conversion_status',
|
||||
'conversion_error',
|
||||
|
||||
@@ -18,39 +18,12 @@ class ConvertDocumentToMarkdown implements ShouldQueue
|
||||
{
|
||||
use Dispatchable, InteractsWithQueue, Queueable, SerializesModels;
|
||||
|
||||
/**
|
||||
* 任务最大尝试次数
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
public $tries;
|
||||
|
||||
/**
|
||||
* 任务超时时间(秒)
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
public $timeout;
|
||||
|
||||
/**
|
||||
* 重试延迟(秒)
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
public $backoff;
|
||||
|
||||
/**
|
||||
* 文档实例
|
||||
*
|
||||
* @var Document
|
||||
*/
|
||||
protected Document $document;
|
||||
|
||||
/**
|
||||
* 创建新的任务实例
|
||||
*
|
||||
* @param Document $document
|
||||
*/
|
||||
public function __construct(Document $document)
|
||||
{
|
||||
$this->document = $document;
|
||||
@@ -59,120 +32,60 @@ class ConvertDocumentToMarkdown implements ShouldQueue
|
||||
$this->backoff = config('documents.conversion.retry_delay', 60);
|
||||
}
|
||||
|
||||
/**
|
||||
* 执行任务
|
||||
*
|
||||
* @param DocumentConversionService $conversionService
|
||||
* @return void
|
||||
*/
|
||||
public function handle(DocumentConversionService $conversionService): void
|
||||
{
|
||||
try {
|
||||
Log::info('开始转换文档', [
|
||||
'document_id' => $this->document->id,
|
||||
'document_title' => $this->document->title,
|
||||
'file_name' => $this->document->file_name,
|
||||
'attempt' => $this->attempts(),
|
||||
]);
|
||||
|
||||
// 转换文档为 Markdown
|
||||
$result = $conversionService->convertToMarkdown($this->document);
|
||||
$markdown = $result['markdown'];
|
||||
$mediaDir = $result['mediaDir'] ?? null;
|
||||
$tempDir = $result['tempDir'];
|
||||
$tempDirName = $result['tempDirName'];
|
||||
|
||||
try {
|
||||
// 保存 Markdown 文件和媒体文件
|
||||
$markdownPath = $conversionService->saveMarkdownToFile($this->document, $markdown, $mediaDir);
|
||||
$markdownPath = $conversionService->saveMarkdownToFile(
|
||||
$this->document,
|
||||
$result['markdown']
|
||||
);
|
||||
|
||||
// 更新文档的 Markdown 信息
|
||||
$conversionService->updateDocumentMarkdown($this->document, $markdownPath);
|
||||
} finally {
|
||||
// 清理临时目录
|
||||
if (isset($tempDirName) && \Storage::disk('local')->exists($tempDirName)) {
|
||||
\Storage::disk('local')->deleteDirectory($tempDirName);
|
||||
}
|
||||
}
|
||||
$conversionService->updateDocumentMarkdown($this->document, $markdownPath);
|
||||
|
||||
Log::info('文档转换成功', [
|
||||
'document_id' => $this->document->id,
|
||||
'document_title' => $this->document->title,
|
||||
'markdown_path' => $markdownPath,
|
||||
]);
|
||||
|
||||
// 转换成功后,触发索引(如果需要)
|
||||
// 这将在后续任务中实现
|
||||
// $this->document->searchable();
|
||||
|
||||
} catch (\Exception $e) {
|
||||
Log::error('文档转换失败', [
|
||||
'document_id' => $this->document->id,
|
||||
'document_title' => $this->document->title,
|
||||
'file_name' => $this->document->file_name,
|
||||
'attempt' => $this->attempts(),
|
||||
'error' => $e->getMessage(),
|
||||
'trace' => $e->getTraceAsString(),
|
||||
]);
|
||||
|
||||
// 如果已达到最大重试次数,标记为失败
|
||||
if ($this->attempts() >= $this->tries) {
|
||||
$conversionService->handleConversionFailure($this->document, $e);
|
||||
}
|
||||
|
||||
// 重新抛出异常以触发重试
|
||||
throw $e;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 任务失败时的处理
|
||||
*
|
||||
* @param \Throwable $exception
|
||||
* @return void
|
||||
*/
|
||||
public function failed(\Throwable $exception): void
|
||||
{
|
||||
Log::error('文档转换任务最终失败', [
|
||||
'document_id' => $this->document->id,
|
||||
'document_title' => $this->document->title,
|
||||
'file_name' => $this->document->file_name,
|
||||
'error' => $exception->getMessage(),
|
||||
]);
|
||||
|
||||
// 确保文档状态被标记为失败
|
||||
$conversionService = app(DocumentConversionService::class);
|
||||
$conversionService->handleConversionFailure(
|
||||
$this->document,
|
||||
$exception instanceof \Exception ? $exception : new \Exception($exception->getMessage())
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* 递归删除目录
|
||||
*
|
||||
* @param string $dir 目录路径
|
||||
* @return void
|
||||
*/
|
||||
protected function deleteDirectory(string $dir): void
|
||||
{
|
||||
if (!file_exists($dir)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!is_dir($dir)) {
|
||||
unlink($dir);
|
||||
return;
|
||||
}
|
||||
|
||||
$files = array_diff(scandir($dir), ['.', '..']);
|
||||
foreach ($files as $file) {
|
||||
$path = $dir . '/' . $file;
|
||||
if (is_dir($path)) {
|
||||
$this->deleteDirectory($path);
|
||||
} else {
|
||||
unlink($path);
|
||||
}
|
||||
}
|
||||
|
||||
rmdir($dir);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -4,214 +4,52 @@ namespace App\Services;
|
||||
|
||||
use App\Models\Document;
|
||||
use Illuminate\Support\Facades\Log;
|
||||
use Illuminate\Support\Facades\Process;
|
||||
use Illuminate\Support\Facades\Storage;
|
||||
use Illuminate\Support\Str;
|
||||
use Paperdoc\Support\DocumentManager;
|
||||
|
||||
/**
|
||||
* 文档转换服务
|
||||
* 负责将 Word 文档转换为 Markdown 格式
|
||||
* 使用 paperdoc-lib 将文档(DOCX/PPTX/XLSX/PDF)转换为 Markdown
|
||||
*/
|
||||
class DocumentConversionService
|
||||
{
|
||||
/**
|
||||
* 转换驱动
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected string $driver;
|
||||
|
||||
/**
|
||||
* Pandoc 可执行文件路径
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected string $pandocPath;
|
||||
|
||||
/**
|
||||
* 转换超时时间(秒)
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
protected int $timeout;
|
||||
|
||||
/**
|
||||
* Markdown 预览长度
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
protected int $previewLength;
|
||||
|
||||
/**
|
||||
* 构造函数
|
||||
*/
|
||||
public function __construct()
|
||||
{
|
||||
$this->driver = config('documents.conversion.driver', 'pandoc');
|
||||
$this->pandocPath = config('documents.conversion.pandoc_path', 'pandoc');
|
||||
$this->timeout = config('documents.conversion.timeout', 300);
|
||||
$this->previewLength = config('documents.markdown.preview_length', 500);
|
||||
}
|
||||
|
||||
/**
|
||||
* 将 Word 文档转换为 Markdown
|
||||
*
|
||||
* @param Document $document
|
||||
* @return array 返回 ['markdown' => string, 'mediaDir' => string|null, 'tempDir' => string]
|
||||
* @throws \Exception
|
||||
* 将文档转换为 Markdown
|
||||
*/
|
||||
public function convertToMarkdown(Document $document): array
|
||||
{
|
||||
if ($this->driver === 'pandoc') {
|
||||
return $this->convertWithPandoc($document);
|
||||
}
|
||||
|
||||
throw new \Exception("不支持的转换驱动: {$this->driver}");
|
||||
}
|
||||
|
||||
/**
|
||||
* 使用 Pandoc 转换文档
|
||||
*
|
||||
* @param Document $document
|
||||
* @return array 返回 ['markdown' => string, 'mediaDir' => string|null]
|
||||
* @throws \Exception
|
||||
*/
|
||||
protected function convertWithPandoc(Document $document): array
|
||||
{
|
||||
// 获取文档的完整路径
|
||||
$documentPath = Storage::disk('local')->path($document->file_path);
|
||||
|
||||
if (!file_exists($documentPath)) {
|
||||
throw new \Exception("文档文件不存在: {$documentPath}");
|
||||
}
|
||||
|
||||
// 使用 Laravel 存储系统创建临时工作目录
|
||||
$tempDirName = 'temp/pandoc_' . uniqid();
|
||||
|
||||
// 确保临时目录存在
|
||||
if (!Storage::disk('local')->exists('temp')) {
|
||||
Storage::disk('local')->makeDirectory('temp');
|
||||
$doc = DocumentManager::open($documentPath, ['ocr' => false]);
|
||||
$markdown = DocumentManager::renderAs($doc, 'md');
|
||||
|
||||
if (empty(trim($markdown))) {
|
||||
throw new \Exception('文档转换后内容为空,可能是扫描件或不支持的内容格式');
|
||||
}
|
||||
|
||||
Storage::disk('local')->makeDirectory($tempDirName);
|
||||
$tempDir = Storage::disk('local')->path($tempDirName);
|
||||
|
||||
$tempOutputPath = $tempDir . '/output.md';
|
||||
|
||||
try {
|
||||
// 在临时目录中执行 Pandoc 转换命令
|
||||
$result = Process::timeout($this->timeout)
|
||||
->path($tempDir)
|
||||
->run([
|
||||
$this->pandocPath,
|
||||
$documentPath,
|
||||
'-f', $this->getInputFormat($document->mime_type),
|
||||
'-t', 'markdown',
|
||||
'-o', $tempOutputPath,
|
||||
'--wrap=none', // 不自动换行
|
||||
'--extract-media=.', // 提取媒体文件到当前目录
|
||||
]);
|
||||
|
||||
if (!$result->successful()) {
|
||||
throw new \Exception("Pandoc 转换失败: {$result->errorOutput()}");
|
||||
}
|
||||
|
||||
// 读取转换后的 Markdown 内容
|
||||
if (!file_exists($tempOutputPath)) {
|
||||
throw new \Exception("转换后的 Markdown 文件不存在");
|
||||
}
|
||||
|
||||
$markdown = file_get_contents($tempOutputPath);
|
||||
|
||||
if ($markdown === false) {
|
||||
throw new \Exception("无法读取转换后的 Markdown 文件");
|
||||
}
|
||||
|
||||
// 检查是否有提取的媒体文件
|
||||
$mediaDir = $tempDir . '/media';
|
||||
$hasMedia = is_dir($mediaDir) && count(glob($mediaDir . '/*')) > 0;
|
||||
|
||||
return [
|
||||
'markdown' => $markdown,
|
||||
'mediaDir' => $hasMedia ? $mediaDir : null,
|
||||
'tempDir' => $tempDir,
|
||||
'tempDirName' => $tempDirName, // 添加相对路径名
|
||||
];
|
||||
} catch (\Exception $e) {
|
||||
// 清理临时目录
|
||||
Storage::disk('local')->deleteDirectory($tempDirName);
|
||||
throw $e;
|
||||
}
|
||||
return ['markdown' => $markdown];
|
||||
}
|
||||
|
||||
/**
|
||||
* 递归删除目录
|
||||
*
|
||||
* @param string $dir 目录路径
|
||||
* @return void
|
||||
* 将 Markdown 内容保存到存储
|
||||
*/
|
||||
protected function deleteDirectory(string $dir): void
|
||||
public function saveMarkdownToFile(Document $document, string $markdown): string
|
||||
{
|
||||
if (!file_exists($dir)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!is_dir($dir)) {
|
||||
unlink($dir);
|
||||
return;
|
||||
}
|
||||
|
||||
$files = array_diff(scandir($dir), ['.', '..']);
|
||||
foreach ($files as $file) {
|
||||
$path = $dir . '/' . $file;
|
||||
if (is_dir($path)) {
|
||||
$this->deleteDirectory($path);
|
||||
} else {
|
||||
unlink($path);
|
||||
}
|
||||
}
|
||||
|
||||
rmdir($dir);
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据 MIME 类型获取 Pandoc 输入格式
|
||||
*
|
||||
* @param string $mimeType
|
||||
* @return string
|
||||
*/
|
||||
protected function getInputFormat(string $mimeType): string
|
||||
{
|
||||
return match ($mimeType) {
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document' => 'docx',
|
||||
'application/msword' => 'doc',
|
||||
default => 'docx',
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 将 Markdown 内容和媒体文件保存到存储
|
||||
*
|
||||
* @param Document $document
|
||||
* @param string $markdown
|
||||
* @param string|null $mediaDir 临时媒体目录路径
|
||||
* @return string 返回 Markdown 文件路径
|
||||
* @throws \Exception
|
||||
*/
|
||||
public function saveMarkdownToFile(Document $document, string $markdown, ?string $mediaDir = null): string
|
||||
{
|
||||
// 生成文件路径
|
||||
$path = $this->generateMarkdownPath($document);
|
||||
$directory = dirname($path);
|
||||
|
||||
// 如果有媒体文件,先保存它们
|
||||
if ($mediaDir && is_dir($mediaDir)) {
|
||||
$this->saveMediaFiles($mediaDir, $directory);
|
||||
}
|
||||
|
||||
// 保存 Markdown 文件
|
||||
$saved = Storage::disk('markdown')->put($path, $markdown);
|
||||
|
||||
if (!$saved) {
|
||||
throw new \Exception("无法保存 Markdown 文件");
|
||||
}
|
||||
@@ -219,83 +57,33 @@ class DocumentConversionService
|
||||
return $path;
|
||||
}
|
||||
|
||||
/**
|
||||
* 保存媒体文件到 storage
|
||||
* 媒体文件保存在文档的 UUID 目录下的 media 子目录中
|
||||
*
|
||||
* @param string $sourceDir 源媒体目录
|
||||
* @param string $targetDir 目标目录(相对于 markdown disk,例如:2025/12/04/{uuid})
|
||||
* @return void
|
||||
*/
|
||||
protected function saveMediaFiles(string $sourceDir, string $targetDir): void
|
||||
{
|
||||
$files = glob($sourceDir . '/*');
|
||||
|
||||
foreach ($files as $file) {
|
||||
if (is_file($file)) {
|
||||
$filename = basename($file);
|
||||
// 保存到文档目录下的 media 子目录
|
||||
$targetPath = $targetDir . '/media/' . $filename;
|
||||
|
||||
// 读取文件内容
|
||||
$content = file_get_contents($file);
|
||||
|
||||
// 保存到 storage
|
||||
Storage::disk('markdown')->put($targetPath, $content);
|
||||
|
||||
Log::info('媒体文件已保存', [
|
||||
'filename' => $filename,
|
||||
'path' => $targetPath,
|
||||
]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 生成 Markdown 文件路径
|
||||
* 使用 UUID 作为目录名,确保每个文档有独立的 media 目录
|
||||
*
|
||||
* @param Document $document
|
||||
* @return string
|
||||
*/
|
||||
protected function generateMarkdownPath(Document $document): string
|
||||
{
|
||||
$organizeByDate = config('documents.storage.organize_by_date', true);
|
||||
|
||||
// 生成唯一的 UUID 作为文档目录
|
||||
$uuid = Str::uuid()->toString();
|
||||
|
||||
if ($organizeByDate) {
|
||||
// 按日期组织: YYYY/MM/DD/{uuid}/{uuid}.md
|
||||
$date = $document->created_at ?? now();
|
||||
$directory = $date->format('Y/m/d') . '/' . $uuid;
|
||||
} else {
|
||||
// 直接使用 UUID: {uuid}/{uuid}.md
|
||||
$directory = $uuid;
|
||||
}
|
||||
|
||||
// 文件名也使用相同的 UUID
|
||||
$filename = $uuid . '.md';
|
||||
|
||||
return "{$directory}/{$filename}";
|
||||
return "{$directory}/{$uuid}.md";
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取 Markdown 内容的预览(前 N 个字符)
|
||||
*
|
||||
* @param string $markdown
|
||||
* @param int|null $length
|
||||
* @return string
|
||||
*/
|
||||
public function getMarkdownPreview(string $markdown, ?int $length = null): string
|
||||
{
|
||||
$length = $length ?? $this->previewLength;
|
||||
|
||||
// 移除多余的空白字符
|
||||
$cleaned = preg_replace('/\s+/', ' ', $markdown);
|
||||
$cleaned = trim($cleaned);
|
||||
|
||||
// 截取指定长度
|
||||
if (mb_strlen($cleaned) <= $length) {
|
||||
return $cleaned;
|
||||
}
|
||||
@@ -305,14 +93,9 @@ class DocumentConversionService
|
||||
|
||||
/**
|
||||
* 更新文档的 Markdown 信息
|
||||
*
|
||||
* @param Document $document
|
||||
* @param string $markdownPath
|
||||
* @return void
|
||||
*/
|
||||
public function updateDocumentMarkdown(Document $document, string $markdownPath): void
|
||||
{
|
||||
// 读取 Markdown 内容以生成预览
|
||||
$markdown = Storage::disk('markdown')->get($markdownPath);
|
||||
|
||||
if ($markdown === false) {
|
||||
@@ -325,7 +108,6 @@ class DocumentConversionService
|
||||
$preview = $this->getMarkdownPreview($markdown);
|
||||
}
|
||||
|
||||
// 更新文档记录
|
||||
$document->update([
|
||||
'markdown_path' => $markdownPath,
|
||||
'markdown_preview' => $preview,
|
||||
@@ -336,21 +118,17 @@ class DocumentConversionService
|
||||
|
||||
/**
|
||||
* 处理转换失败
|
||||
*
|
||||
* @param Document $document
|
||||
* @param \Exception $exception
|
||||
* @return void
|
||||
*/
|
||||
public function handleConversionFailure(Document $document, \Exception $exception): void
|
||||
{
|
||||
Log::error('文档转换失败', [
|
||||
'document_id' => $document->id,
|
||||
'document_title' => $document->title,
|
||||
'file_name' => $document->file_name,
|
||||
'error' => $exception->getMessage(),
|
||||
'trace' => $exception->getTraceAsString(),
|
||||
]);
|
||||
|
||||
// 更新文档状态
|
||||
$document->update([
|
||||
'conversion_status' => 'failed',
|
||||
'conversion_error' => $exception->getMessage(),
|
||||
@@ -359,21 +137,15 @@ class DocumentConversionService
|
||||
|
||||
/**
|
||||
* 将转换任务加入队列
|
||||
*
|
||||
* @param Document $document
|
||||
* @return void
|
||||
*/
|
||||
public function queueConversion(Document $document): void
|
||||
{
|
||||
// 更新文档状态为处理中
|
||||
$document->update([
|
||||
'conversion_status' => 'processing',
|
||||
'conversion_error' => null,
|
||||
]);
|
||||
|
||||
// 分发队列任务
|
||||
$queue = config('documents.conversion.queue', 'documents');
|
||||
\App\Jobs\ConvertDocumentToMarkdown::dispatch($document)->onQueue($queue);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -4,33 +4,25 @@ namespace App\Services;
|
||||
|
||||
use App\Models\Document;
|
||||
use Illuminate\Support\Facades\Storage;
|
||||
use PhpOffice\PhpWord\IOFactory;
|
||||
use PhpOffice\PhpWord\Settings;
|
||||
|
||||
class DocumentPreviewService
|
||||
{
|
||||
/**
|
||||
* 将文档转换为 HTML 用于预览
|
||||
* 在 Filament 后台中,直接从 Word 转换以保证图片正确显示
|
||||
*
|
||||
* 将文档的 Markdown 内容转换为 HTML 用于预览
|
||||
* 统一用于 Filament 后台内联预览和独立预览页面
|
||||
*
|
||||
* @param Document $document
|
||||
* @return string HTML 内容
|
||||
* @throws \Exception
|
||||
*/
|
||||
public function convertToHtml(Document $document): string
|
||||
{
|
||||
try {
|
||||
// 直接从 Word 转换,以确保图片正确显示
|
||||
// Markdown 转换的图片路径问题较复杂,暂时不使用
|
||||
return $this->convertWordToHtml($document);
|
||||
} catch (\Exception $e) {
|
||||
throw new \Exception('文档预览失败:' . $e->getMessage());
|
||||
}
|
||||
return $this->convertMarkdownToHtml($document);
|
||||
}
|
||||
|
||||
/**
|
||||
* 将 Markdown 转换为 HTML(用于专门的 Markdown 预览页面)
|
||||
*
|
||||
* 将 Markdown 转换为 HTML
|
||||
*
|
||||
* @param Document $document
|
||||
* @return string HTML 内容
|
||||
* @throws \Exception
|
||||
@@ -38,15 +30,15 @@ class DocumentPreviewService
|
||||
public function convertMarkdownToHtml(Document $document): string
|
||||
{
|
||||
$markdownContent = $document->getMarkdownContent();
|
||||
|
||||
|
||||
if (empty($markdownContent)) {
|
||||
throw new \Exception('Markdown 内容为空');
|
||||
}
|
||||
|
||||
// 获取 Markdown 文件的目录(例如:2025/12/04)
|
||||
// 获取 Markdown 文件的目录
|
||||
$markdownDir = dirname($document->markdown_path);
|
||||
|
||||
// 修复图片路径:将 ./media/ 替换为 /markdown/{date}/media/
|
||||
// 修复图片路径:将 ./media/ 替换为 /markdown/{dir}/media/
|
||||
$markdownContent = preg_replace_callback(
|
||||
'/\(\.\/media\/([^)]+)\)/',
|
||||
function ($matches) use ($markdownDir) {
|
||||
@@ -58,250 +50,19 @@ class DocumentPreviewService
|
||||
|
||||
// 使用 MarkdownRenderService 转换为 HTML
|
||||
$renderService = app(MarkdownRenderService::class);
|
||||
$htmlContent = $renderService->render($markdownContent);
|
||||
|
||||
return $htmlContent;
|
||||
}
|
||||
|
||||
/**
|
||||
* 直接从 Word 文档转换为 HTML
|
||||
*
|
||||
* @param Document $document
|
||||
* @return string HTML 内容
|
||||
* @throws \Exception
|
||||
*/
|
||||
protected function convertWordToHtml(Document $document): string
|
||||
{
|
||||
// 检查文件是否存在
|
||||
if (!Storage::disk('local')->exists($document->file_path)) {
|
||||
throw new \Exception('文档文件不存在');
|
||||
}
|
||||
|
||||
// 获取文件的完整路径
|
||||
$filePath = Storage::disk('local')->path($document->file_path);
|
||||
|
||||
// 确保临时目录存在并设置 PHPWord 的临时目录
|
||||
$tempDir = storage_path('app/temp');
|
||||
if (!is_dir($tempDir)) {
|
||||
mkdir($tempDir, 0755, true);
|
||||
}
|
||||
Settings::setTempDir($tempDir);
|
||||
|
||||
// 加载 Word 文档
|
||||
$phpWord = IOFactory::load($filePath);
|
||||
|
||||
// 提取图片并转换为 base64
|
||||
$images = $this->extractImagesFromDocument($phpWord);
|
||||
|
||||
// 创建 HTML Writer
|
||||
$htmlWriter = IOFactory::createWriter($phpWord, 'HTML');
|
||||
|
||||
// 使用 Laravel 存储系统创建临时文件
|
||||
$tempFileName = 'temp/doc_preview_' . uniqid() . '.html';
|
||||
|
||||
// 确保临时目录存在
|
||||
if (!Storage::disk('local')->exists('temp')) {
|
||||
Storage::disk('local')->makeDirectory('temp');
|
||||
}
|
||||
|
||||
$tempHtmlPath = Storage::disk('local')->path($tempFileName);
|
||||
$htmlWriter->save($tempHtmlPath);
|
||||
|
||||
// 读取 HTML 内容
|
||||
$htmlContent = Storage::disk('local')->get($tempFileName);
|
||||
|
||||
// 删除临时文件
|
||||
Storage::disk('local')->delete($tempFileName);
|
||||
|
||||
// 将图片嵌入为 base64
|
||||
$htmlContent = $this->embedImagesInHtml($htmlContent, $images);
|
||||
|
||||
// 清理和美化 HTML
|
||||
$htmlContent = $this->cleanHtml($htmlContent);
|
||||
|
||||
return $htmlContent;
|
||||
}
|
||||
|
||||
/**
|
||||
* 从 Word 文档中提取所有图片
|
||||
*
|
||||
* @param \PhpOffice\PhpWord\PhpWord $phpWord
|
||||
* @return array 图片数组,键为图片索引,值为 base64 编码的图片数据
|
||||
*/
|
||||
protected function extractImagesFromDocument($phpWord): array
|
||||
{
|
||||
$images = [];
|
||||
$imageIndex = 0;
|
||||
|
||||
foreach ($phpWord->getSections() as $section) {
|
||||
foreach ($section->getElements() as $element) {
|
||||
// 处理图片元素
|
||||
if (method_exists($element, 'getElements')) {
|
||||
foreach ($element->getElements() as $childElement) {
|
||||
if ($childElement instanceof \PhpOffice\PhpWord\Element\Image) {
|
||||
$imageSource = $childElement->getSource();
|
||||
if (file_exists($imageSource)) {
|
||||
$imageData = file_get_contents($imageSource);
|
||||
$imageType = $childElement->getImageType();
|
||||
$mimeType = $this->getImageMimeType($imageType);
|
||||
$base64 = base64_encode($imageData);
|
||||
$images[$imageIndex] = "data:{$mimeType};base64,{$base64}";
|
||||
$imageIndex++;
|
||||
}
|
||||
}
|
||||
}
|
||||
} elseif ($element instanceof \PhpOffice\PhpWord\Element\Image) {
|
||||
$imageSource = $element->getSource();
|
||||
if (file_exists($imageSource)) {
|
||||
$imageData = file_get_contents($imageSource);
|
||||
$imageType = $element->getImageType();
|
||||
$mimeType = $this->getImageMimeType($imageType);
|
||||
$base64 = base64_encode($imageData);
|
||||
$images[$imageIndex] = "data:{$mimeType};base64,{$base64}";
|
||||
$imageIndex++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $images;
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据图片类型获取 MIME 类型
|
||||
*
|
||||
* @param string $imageType
|
||||
* @return string
|
||||
*/
|
||||
protected function getImageMimeType(string $imageType): string
|
||||
{
|
||||
$mimeTypes = [
|
||||
'jpg' => 'image/jpeg',
|
||||
'jpeg' => 'image/jpeg',
|
||||
'png' => 'image/png',
|
||||
'gif' => 'image/gif',
|
||||
'bmp' => 'image/bmp',
|
||||
'svg' => 'image/svg+xml',
|
||||
];
|
||||
|
||||
return $mimeTypes[strtolower($imageType)] ?? 'image/jpeg';
|
||||
}
|
||||
|
||||
/**
|
||||
* 将 HTML 中的图片替换为 base64 编码
|
||||
*
|
||||
* @param string $html
|
||||
* @param array $images
|
||||
* @return string
|
||||
*/
|
||||
protected function embedImagesInHtml(string $html, array $images): string
|
||||
{
|
||||
// PHPWord 生成的 HTML 中,图片通常以 <img src="..." /> 的形式存在
|
||||
// 我们需要将这些图片路径替换为 base64 数据
|
||||
|
||||
$imageIndex = 0;
|
||||
$html = preg_replace_callback(
|
||||
'/<img([^>]*?)src=["\']([^"\']+)["\']([^>]*?)>/i',
|
||||
function ($matches) use ($images, &$imageIndex) {
|
||||
$beforeSrc = $matches[1];
|
||||
$src = $matches[2];
|
||||
$afterSrc = $matches[3];
|
||||
|
||||
// 如果已经是 base64 或 http 链接,不处理
|
||||
if (strpos($src, 'data:') === 0 || strpos($src, 'http') === 0) {
|
||||
return $matches[0];
|
||||
}
|
||||
|
||||
// 使用提取的图片数据
|
||||
if (isset($images[$imageIndex])) {
|
||||
$src = $images[$imageIndex];
|
||||
$imageIndex++;
|
||||
}
|
||||
|
||||
return "<img{$beforeSrc}src=\"{$src}\"{$afterSrc}>";
|
||||
},
|
||||
$html
|
||||
);
|
||||
|
||||
return $html;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* 清理和美化 HTML 内容
|
||||
*
|
||||
* @param string $html
|
||||
* @return string
|
||||
*/
|
||||
protected function cleanHtml(string $html): string
|
||||
{
|
||||
// 提取 body 内容
|
||||
if (preg_match('/<body[^>]*>(.*?)<\/body>/is', $html, $matches)) {
|
||||
$html = $matches[1];
|
||||
}
|
||||
|
||||
// 添加基本样式
|
||||
$styledHtml = '<div class="document-preview" style="
|
||||
font-family: -apple-system, BlinkMacSystemFont, \'Segoe UI\', Roboto, \'Helvetica Neue\', Arial, sans-serif;
|
||||
line-height: 1.6;
|
||||
color: #333;
|
||||
padding: 20px;
|
||||
background: white;
|
||||
border-radius: 8px;
|
||||
box-shadow: 0 1px 3px rgba(0,0,0,0.1);
|
||||
">';
|
||||
|
||||
$styledHtml .= $html;
|
||||
$styledHtml .= '</div>';
|
||||
|
||||
return $styledHtml;
|
||||
return $renderService->render($markdownContent);
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查文档是否可以预览
|
||||
*
|
||||
*
|
||||
* @param Document $document
|
||||
* @return bool
|
||||
*/
|
||||
public function canPreview(Document $document): bool
|
||||
{
|
||||
// 检查文件扩展名
|
||||
$extension = strtolower(pathinfo($document->file_name, PATHINFO_EXTENSION));
|
||||
|
||||
// 目前支持 .doc 和 .docx
|
||||
return in_array($extension, ['doc', 'docx']);
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取文档预览的纯文本内容(用于搜索等)
|
||||
*
|
||||
* @param Document $document
|
||||
* @return string
|
||||
* @throws \Exception
|
||||
*/
|
||||
public function extractText(Document $document): string
|
||||
{
|
||||
try {
|
||||
if (!Storage::disk('local')->exists($document->file_path)) {
|
||||
throw new \Exception('文档文件不存在');
|
||||
}
|
||||
|
||||
$filePath = Storage::disk('local')->path($document->file_path);
|
||||
$phpWord = IOFactory::load($filePath);
|
||||
|
||||
$text = '';
|
||||
foreach ($phpWord->getSections() as $section) {
|
||||
foreach ($section->getElements() as $element) {
|
||||
if (method_exists($element, 'getText')) {
|
||||
$text .= $element->getText() . "\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return trim($text);
|
||||
} catch (\Exception $e) {
|
||||
throw new \Exception('文本提取失败:' . $e->getMessage());
|
||||
}
|
||||
return $document->conversion_status === 'completed'
|
||||
&& !empty($document->markdown_path);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,93 +5,22 @@ namespace App\Services;
|
||||
use App\Models\Document;
|
||||
use App\Models\DownloadLog;
|
||||
use App\Models\User;
|
||||
use Illuminate\Http\UploadedFile;
|
||||
use Illuminate\Support\Facades\DB;
|
||||
use Illuminate\Support\Facades\Storage;
|
||||
use Symfony\Component\HttpFoundation\StreamedResponse;
|
||||
|
||||
class DocumentService
|
||||
{
|
||||
/**
|
||||
* 上传文档
|
||||
*
|
||||
* @param UploadedFile $file 上传的文件
|
||||
* @param string $title 文档标题
|
||||
* @param string $type 文档类型 ('global' 或 'dedicated')
|
||||
* @param int|null $groupId 分组 ID (专用文档必填)
|
||||
* @param int $uploaderId 上传者用户 ID
|
||||
* @return Document
|
||||
* @throws \Exception
|
||||
*/
|
||||
public function uploadDocument(
|
||||
UploadedFile $file,
|
||||
string $title,
|
||||
string $type,
|
||||
?int $groupId,
|
||||
int $uploaderId
|
||||
): Document {
|
||||
// 验证文件格式
|
||||
$extension = strtolower($file->getClientOriginalExtension());
|
||||
if (!in_array($extension, ['doc', 'docx'])) {
|
||||
throw new \InvalidArgumentException('文件格式不支持,请上传 Word 文档(.doc 或 .docx)');
|
||||
}
|
||||
|
||||
// 验证专用文档必须有分组
|
||||
if ($type === 'dedicated' && empty($groupId)) {
|
||||
throw new \InvalidArgumentException('专用知识库文档必须指定所属分组');
|
||||
}
|
||||
|
||||
// 使用事务确保一致性
|
||||
return DB::transaction(function () use ($file, $title, $type, $groupId, $uploaderId) {
|
||||
// 获取原始文件名
|
||||
$originalFileName = $file->getClientOriginalName();
|
||||
|
||||
// 生成文件存储路径,使用原始文件名
|
||||
$directory = 'documents/' . date('Y/m/d');
|
||||
$filePath = $file->storeAs($directory, $originalFileName, 'local');
|
||||
|
||||
// 创建数据库记录,设置初始转换状态为 pending
|
||||
$document = Document::create([
|
||||
'title' => $title,
|
||||
'file_path' => $filePath,
|
||||
'file_name' => $originalFileName,
|
||||
'file_size' => $file->getSize(),
|
||||
'mime_type' => $file->getMimeType(),
|
||||
'type' => $type,
|
||||
'group_id' => $groupId,
|
||||
'uploaded_by' => $uploaderId,
|
||||
'description' => '',
|
||||
'conversion_status' => 'pending',
|
||||
]);
|
||||
|
||||
// 文档保存成功后,触发异步转换
|
||||
$conversionService = app(DocumentConversionService::class);
|
||||
$conversionService->queueConversion($document);
|
||||
|
||||
return $document;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* 验证用户是否有权访问指定文档
|
||||
*
|
||||
* @param Document $document 要访问的文档
|
||||
* @param User $user 用户
|
||||
* @return bool
|
||||
*/
|
||||
public function validateDocumentAccess(Document $document, User $user): bool
|
||||
{
|
||||
// 如果是全局文档,所有用户都可以访问
|
||||
if ($document->type === 'global') {
|
||||
return true;
|
||||
}
|
||||
|
||||
// 如果是专用文档,检查用户是否属于该文档的分组
|
||||
if ($document->type === 'dedicated') {
|
||||
// 获取用户所属的所有分组 ID
|
||||
$userGroupIds = $user->groups()->pluck('groups.id')->toArray();
|
||||
|
||||
// 检查文档的分组 ID 是否在用户的分组列表中
|
||||
return in_array($document->group_id, $userGroupIds);
|
||||
}
|
||||
|
||||
@@ -100,25 +29,17 @@ class DocumentService
|
||||
|
||||
/**
|
||||
* 下载文档
|
||||
*
|
||||
* @param Document $document 要下载的文档
|
||||
* @param User $user 用户
|
||||
* @return StreamedResponse
|
||||
* @throws \Exception
|
||||
*/
|
||||
public function downloadDocument(Document $document, User $user): StreamedResponse
|
||||
{
|
||||
// 验证用户权限
|
||||
if (!$this->validateDocumentAccess($document, $user)) {
|
||||
throw new \Exception('您没有权限访问此文档');
|
||||
}
|
||||
|
||||
// 检查文件是否存在
|
||||
if (!Storage::disk('local')->exists($document->file_path)) {
|
||||
throw new \Exception('文档不存在或已被删除');
|
||||
}
|
||||
|
||||
// 返回文件流式响应,使用原始文件名
|
||||
return Storage::disk('local')->download(
|
||||
$document->file_path,
|
||||
$document->file_name
|
||||
@@ -127,11 +48,6 @@ class DocumentService
|
||||
|
||||
/**
|
||||
* 记录文档下载日志
|
||||
*
|
||||
* @param Document $document 被下载的文档
|
||||
* @param User $user 下载的用户
|
||||
* @param string|null $ipAddress IP 地址
|
||||
* @return DownloadLog
|
||||
*/
|
||||
public function logDownload(Document $document, User $user, ?string $ipAddress = null): DownloadLog
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user