refactor: 重构知识库文件上传和处理, 支持 pdf
This commit is contained in:
@@ -4,214 +4,52 @@ namespace App\Services;
|
||||
|
||||
use App\Models\Document;
|
||||
use Illuminate\Support\Facades\Log;
|
||||
use Illuminate\Support\Facades\Process;
|
||||
use Illuminate\Support\Facades\Storage;
|
||||
use Illuminate\Support\Str;
|
||||
use Paperdoc\Support\DocumentManager;
|
||||
|
||||
/**
|
||||
* 文档转换服务
|
||||
* 负责将 Word 文档转换为 Markdown 格式
|
||||
* 使用 paperdoc-lib 将文档(DOCX/PPTX/XLSX/PDF)转换为 Markdown
|
||||
*/
|
||||
class DocumentConversionService
|
||||
{
|
||||
/**
|
||||
* 转换驱动
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected string $driver;
|
||||
|
||||
/**
|
||||
* Pandoc 可执行文件路径
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected string $pandocPath;
|
||||
|
||||
/**
|
||||
* 转换超时时间(秒)
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
protected int $timeout;
|
||||
|
||||
/**
|
||||
* Markdown 预览长度
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
protected int $previewLength;
|
||||
|
||||
/**
|
||||
* 构造函数
|
||||
*/
|
||||
public function __construct()
|
||||
{
|
||||
$this->driver = config('documents.conversion.driver', 'pandoc');
|
||||
$this->pandocPath = config('documents.conversion.pandoc_path', 'pandoc');
|
||||
$this->timeout = config('documents.conversion.timeout', 300);
|
||||
$this->previewLength = config('documents.markdown.preview_length', 500);
|
||||
}
|
||||
|
||||
/**
|
||||
* 将 Word 文档转换为 Markdown
|
||||
*
|
||||
* @param Document $document
|
||||
* @return array 返回 ['markdown' => string, 'mediaDir' => string|null, 'tempDir' => string]
|
||||
* @throws \Exception
|
||||
* 将文档转换为 Markdown
|
||||
*/
|
||||
public function convertToMarkdown(Document $document): array
|
||||
{
|
||||
if ($this->driver === 'pandoc') {
|
||||
return $this->convertWithPandoc($document);
|
||||
}
|
||||
|
||||
throw new \Exception("不支持的转换驱动: {$this->driver}");
|
||||
}
|
||||
|
||||
/**
|
||||
* 使用 Pandoc 转换文档
|
||||
*
|
||||
* @param Document $document
|
||||
* @return array 返回 ['markdown' => string, 'mediaDir' => string|null]
|
||||
* @throws \Exception
|
||||
*/
|
||||
protected function convertWithPandoc(Document $document): array
|
||||
{
|
||||
// 获取文档的完整路径
|
||||
$documentPath = Storage::disk('local')->path($document->file_path);
|
||||
|
||||
if (!file_exists($documentPath)) {
|
||||
throw new \Exception("文档文件不存在: {$documentPath}");
|
||||
}
|
||||
|
||||
// 使用 Laravel 存储系统创建临时工作目录
|
||||
$tempDirName = 'temp/pandoc_' . uniqid();
|
||||
|
||||
// 确保临时目录存在
|
||||
if (!Storage::disk('local')->exists('temp')) {
|
||||
Storage::disk('local')->makeDirectory('temp');
|
||||
$doc = DocumentManager::open($documentPath, ['ocr' => false]);
|
||||
$markdown = DocumentManager::renderAs($doc, 'md');
|
||||
|
||||
if (empty(trim($markdown))) {
|
||||
throw new \Exception('文档转换后内容为空,可能是扫描件或不支持的内容格式');
|
||||
}
|
||||
|
||||
Storage::disk('local')->makeDirectory($tempDirName);
|
||||
$tempDir = Storage::disk('local')->path($tempDirName);
|
||||
|
||||
$tempOutputPath = $tempDir . '/output.md';
|
||||
|
||||
try {
|
||||
// 在临时目录中执行 Pandoc 转换命令
|
||||
$result = Process::timeout($this->timeout)
|
||||
->path($tempDir)
|
||||
->run([
|
||||
$this->pandocPath,
|
||||
$documentPath,
|
||||
'-f', $this->getInputFormat($document->mime_type),
|
||||
'-t', 'markdown',
|
||||
'-o', $tempOutputPath,
|
||||
'--wrap=none', // 不自动换行
|
||||
'--extract-media=.', // 提取媒体文件到当前目录
|
||||
]);
|
||||
|
||||
if (!$result->successful()) {
|
||||
throw new \Exception("Pandoc 转换失败: {$result->errorOutput()}");
|
||||
}
|
||||
|
||||
// 读取转换后的 Markdown 内容
|
||||
if (!file_exists($tempOutputPath)) {
|
||||
throw new \Exception("转换后的 Markdown 文件不存在");
|
||||
}
|
||||
|
||||
$markdown = file_get_contents($tempOutputPath);
|
||||
|
||||
if ($markdown === false) {
|
||||
throw new \Exception("无法读取转换后的 Markdown 文件");
|
||||
}
|
||||
|
||||
// 检查是否有提取的媒体文件
|
||||
$mediaDir = $tempDir . '/media';
|
||||
$hasMedia = is_dir($mediaDir) && count(glob($mediaDir . '/*')) > 0;
|
||||
|
||||
return [
|
||||
'markdown' => $markdown,
|
||||
'mediaDir' => $hasMedia ? $mediaDir : null,
|
||||
'tempDir' => $tempDir,
|
||||
'tempDirName' => $tempDirName, // 添加相对路径名
|
||||
];
|
||||
} catch (\Exception $e) {
|
||||
// 清理临时目录
|
||||
Storage::disk('local')->deleteDirectory($tempDirName);
|
||||
throw $e;
|
||||
}
|
||||
return ['markdown' => $markdown];
|
||||
}
|
||||
|
||||
/**
|
||||
* 递归删除目录
|
||||
*
|
||||
* @param string $dir 目录路径
|
||||
* @return void
|
||||
* 将 Markdown 内容保存到存储
|
||||
*/
|
||||
protected function deleteDirectory(string $dir): void
|
||||
public function saveMarkdownToFile(Document $document, string $markdown): string
|
||||
{
|
||||
if (!file_exists($dir)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!is_dir($dir)) {
|
||||
unlink($dir);
|
||||
return;
|
||||
}
|
||||
|
||||
$files = array_diff(scandir($dir), ['.', '..']);
|
||||
foreach ($files as $file) {
|
||||
$path = $dir . '/' . $file;
|
||||
if (is_dir($path)) {
|
||||
$this->deleteDirectory($path);
|
||||
} else {
|
||||
unlink($path);
|
||||
}
|
||||
}
|
||||
|
||||
rmdir($dir);
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据 MIME 类型获取 Pandoc 输入格式
|
||||
*
|
||||
* @param string $mimeType
|
||||
* @return string
|
||||
*/
|
||||
protected function getInputFormat(string $mimeType): string
|
||||
{
|
||||
return match ($mimeType) {
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document' => 'docx',
|
||||
'application/msword' => 'doc',
|
||||
default => 'docx',
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* 将 Markdown 内容和媒体文件保存到存储
|
||||
*
|
||||
* @param Document $document
|
||||
* @param string $markdown
|
||||
* @param string|null $mediaDir 临时媒体目录路径
|
||||
* @return string 返回 Markdown 文件路径
|
||||
* @throws \Exception
|
||||
*/
|
||||
public function saveMarkdownToFile(Document $document, string $markdown, ?string $mediaDir = null): string
|
||||
{
|
||||
// 生成文件路径
|
||||
$path = $this->generateMarkdownPath($document);
|
||||
$directory = dirname($path);
|
||||
|
||||
// 如果有媒体文件,先保存它们
|
||||
if ($mediaDir && is_dir($mediaDir)) {
|
||||
$this->saveMediaFiles($mediaDir, $directory);
|
||||
}
|
||||
|
||||
// 保存 Markdown 文件
|
||||
$saved = Storage::disk('markdown')->put($path, $markdown);
|
||||
|
||||
if (!$saved) {
|
||||
throw new \Exception("无法保存 Markdown 文件");
|
||||
}
|
||||
@@ -219,83 +57,33 @@ class DocumentConversionService
|
||||
return $path;
|
||||
}
|
||||
|
||||
/**
|
||||
* 保存媒体文件到 storage
|
||||
* 媒体文件保存在文档的 UUID 目录下的 media 子目录中
|
||||
*
|
||||
* @param string $sourceDir 源媒体目录
|
||||
* @param string $targetDir 目标目录(相对于 markdown disk,例如:2025/12/04/{uuid})
|
||||
* @return void
|
||||
*/
|
||||
protected function saveMediaFiles(string $sourceDir, string $targetDir): void
|
||||
{
|
||||
$files = glob($sourceDir . '/*');
|
||||
|
||||
foreach ($files as $file) {
|
||||
if (is_file($file)) {
|
||||
$filename = basename($file);
|
||||
// 保存到文档目录下的 media 子目录
|
||||
$targetPath = $targetDir . '/media/' . $filename;
|
||||
|
||||
// 读取文件内容
|
||||
$content = file_get_contents($file);
|
||||
|
||||
// 保存到 storage
|
||||
Storage::disk('markdown')->put($targetPath, $content);
|
||||
|
||||
Log::info('媒体文件已保存', [
|
||||
'filename' => $filename,
|
||||
'path' => $targetPath,
|
||||
]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 生成 Markdown 文件路径
|
||||
* 使用 UUID 作为目录名,确保每个文档有独立的 media 目录
|
||||
*
|
||||
* @param Document $document
|
||||
* @return string
|
||||
*/
|
||||
protected function generateMarkdownPath(Document $document): string
|
||||
{
|
||||
$organizeByDate = config('documents.storage.organize_by_date', true);
|
||||
|
||||
// 生成唯一的 UUID 作为文档目录
|
||||
$uuid = Str::uuid()->toString();
|
||||
|
||||
if ($organizeByDate) {
|
||||
// 按日期组织: YYYY/MM/DD/{uuid}/{uuid}.md
|
||||
$date = $document->created_at ?? now();
|
||||
$directory = $date->format('Y/m/d') . '/' . $uuid;
|
||||
} else {
|
||||
// 直接使用 UUID: {uuid}/{uuid}.md
|
||||
$directory = $uuid;
|
||||
}
|
||||
|
||||
// 文件名也使用相同的 UUID
|
||||
$filename = $uuid . '.md';
|
||||
|
||||
return "{$directory}/{$filename}";
|
||||
return "{$directory}/{$uuid}.md";
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取 Markdown 内容的预览(前 N 个字符)
|
||||
*
|
||||
* @param string $markdown
|
||||
* @param int|null $length
|
||||
* @return string
|
||||
*/
|
||||
public function getMarkdownPreview(string $markdown, ?int $length = null): string
|
||||
{
|
||||
$length = $length ?? $this->previewLength;
|
||||
|
||||
// 移除多余的空白字符
|
||||
$cleaned = preg_replace('/\s+/', ' ', $markdown);
|
||||
$cleaned = trim($cleaned);
|
||||
|
||||
// 截取指定长度
|
||||
if (mb_strlen($cleaned) <= $length) {
|
||||
return $cleaned;
|
||||
}
|
||||
@@ -305,14 +93,9 @@ class DocumentConversionService
|
||||
|
||||
/**
|
||||
* 更新文档的 Markdown 信息
|
||||
*
|
||||
* @param Document $document
|
||||
* @param string $markdownPath
|
||||
* @return void
|
||||
*/
|
||||
public function updateDocumentMarkdown(Document $document, string $markdownPath): void
|
||||
{
|
||||
// 读取 Markdown 内容以生成预览
|
||||
$markdown = Storage::disk('markdown')->get($markdownPath);
|
||||
|
||||
if ($markdown === false) {
|
||||
@@ -325,7 +108,6 @@ class DocumentConversionService
|
||||
$preview = $this->getMarkdownPreview($markdown);
|
||||
}
|
||||
|
||||
// 更新文档记录
|
||||
$document->update([
|
||||
'markdown_path' => $markdownPath,
|
||||
'markdown_preview' => $preview,
|
||||
@@ -336,21 +118,17 @@ class DocumentConversionService
|
||||
|
||||
/**
|
||||
* 处理转换失败
|
||||
*
|
||||
* @param Document $document
|
||||
* @param \Exception $exception
|
||||
* @return void
|
||||
*/
|
||||
public function handleConversionFailure(Document $document, \Exception $exception): void
|
||||
{
|
||||
Log::error('文档转换失败', [
|
||||
'document_id' => $document->id,
|
||||
'document_title' => $document->title,
|
||||
'file_name' => $document->file_name,
|
||||
'error' => $exception->getMessage(),
|
||||
'trace' => $exception->getTraceAsString(),
|
||||
]);
|
||||
|
||||
// 更新文档状态
|
||||
$document->update([
|
||||
'conversion_status' => 'failed',
|
||||
'conversion_error' => $exception->getMessage(),
|
||||
@@ -359,21 +137,15 @@ class DocumentConversionService
|
||||
|
||||
/**
|
||||
* 将转换任务加入队列
|
||||
*
|
||||
* @param Document $document
|
||||
* @return void
|
||||
*/
|
||||
public function queueConversion(Document $document): void
|
||||
{
|
||||
// 更新文档状态为处理中
|
||||
$document->update([
|
||||
'conversion_status' => 'processing',
|
||||
'conversion_error' => null,
|
||||
]);
|
||||
|
||||
// 分发队列任务
|
||||
$queue = config('documents.conversion.queue', 'documents');
|
||||
\App\Jobs\ConvertDocumentToMarkdown::dispatch($document)->onQueue($queue);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -4,33 +4,25 @@ namespace App\Services;
|
||||
|
||||
use App\Models\Document;
|
||||
use Illuminate\Support\Facades\Storage;
|
||||
use PhpOffice\PhpWord\IOFactory;
|
||||
use PhpOffice\PhpWord\Settings;
|
||||
|
||||
class DocumentPreviewService
|
||||
{
|
||||
/**
|
||||
* 将文档转换为 HTML 用于预览
|
||||
* 在 Filament 后台中,直接从 Word 转换以保证图片正确显示
|
||||
*
|
||||
* 将文档的 Markdown 内容转换为 HTML 用于预览
|
||||
* 统一用于 Filament 后台内联预览和独立预览页面
|
||||
*
|
||||
* @param Document $document
|
||||
* @return string HTML 内容
|
||||
* @throws \Exception
|
||||
*/
|
||||
public function convertToHtml(Document $document): string
|
||||
{
|
||||
try {
|
||||
// 直接从 Word 转换,以确保图片正确显示
|
||||
// Markdown 转换的图片路径问题较复杂,暂时不使用
|
||||
return $this->convertWordToHtml($document);
|
||||
} catch (\Exception $e) {
|
||||
throw new \Exception('文档预览失败:' . $e->getMessage());
|
||||
}
|
||||
return $this->convertMarkdownToHtml($document);
|
||||
}
|
||||
|
||||
/**
|
||||
* 将 Markdown 转换为 HTML(用于专门的 Markdown 预览页面)
|
||||
*
|
||||
* 将 Markdown 转换为 HTML
|
||||
*
|
||||
* @param Document $document
|
||||
* @return string HTML 内容
|
||||
* @throws \Exception
|
||||
@@ -38,15 +30,15 @@ class DocumentPreviewService
|
||||
public function convertMarkdownToHtml(Document $document): string
|
||||
{
|
||||
$markdownContent = $document->getMarkdownContent();
|
||||
|
||||
|
||||
if (empty($markdownContent)) {
|
||||
throw new \Exception('Markdown 内容为空');
|
||||
}
|
||||
|
||||
// 获取 Markdown 文件的目录(例如:2025/12/04)
|
||||
// 获取 Markdown 文件的目录
|
||||
$markdownDir = dirname($document->markdown_path);
|
||||
|
||||
// 修复图片路径:将 ./media/ 替换为 /markdown/{date}/media/
|
||||
// 修复图片路径:将 ./media/ 替换为 /markdown/{dir}/media/
|
||||
$markdownContent = preg_replace_callback(
|
||||
'/\(\.\/media\/([^)]+)\)/',
|
||||
function ($matches) use ($markdownDir) {
|
||||
@@ -58,250 +50,19 @@ class DocumentPreviewService
|
||||
|
||||
// 使用 MarkdownRenderService 转换为 HTML
|
||||
$renderService = app(MarkdownRenderService::class);
|
||||
$htmlContent = $renderService->render($markdownContent);
|
||||
|
||||
return $htmlContent;
|
||||
}
|
||||
|
||||
/**
|
||||
* 直接从 Word 文档转换为 HTML
|
||||
*
|
||||
* @param Document $document
|
||||
* @return string HTML 内容
|
||||
* @throws \Exception
|
||||
*/
|
||||
protected function convertWordToHtml(Document $document): string
|
||||
{
|
||||
// 检查文件是否存在
|
||||
if (!Storage::disk('local')->exists($document->file_path)) {
|
||||
throw new \Exception('文档文件不存在');
|
||||
}
|
||||
|
||||
// 获取文件的完整路径
|
||||
$filePath = Storage::disk('local')->path($document->file_path);
|
||||
|
||||
// 确保临时目录存在并设置 PHPWord 的临时目录
|
||||
$tempDir = storage_path('app/temp');
|
||||
if (!is_dir($tempDir)) {
|
||||
mkdir($tempDir, 0755, true);
|
||||
}
|
||||
Settings::setTempDir($tempDir);
|
||||
|
||||
// 加载 Word 文档
|
||||
$phpWord = IOFactory::load($filePath);
|
||||
|
||||
// 提取图片并转换为 base64
|
||||
$images = $this->extractImagesFromDocument($phpWord);
|
||||
|
||||
// 创建 HTML Writer
|
||||
$htmlWriter = IOFactory::createWriter($phpWord, 'HTML');
|
||||
|
||||
// 使用 Laravel 存储系统创建临时文件
|
||||
$tempFileName = 'temp/doc_preview_' . uniqid() . '.html';
|
||||
|
||||
// 确保临时目录存在
|
||||
if (!Storage::disk('local')->exists('temp')) {
|
||||
Storage::disk('local')->makeDirectory('temp');
|
||||
}
|
||||
|
||||
$tempHtmlPath = Storage::disk('local')->path($tempFileName);
|
||||
$htmlWriter->save($tempHtmlPath);
|
||||
|
||||
// 读取 HTML 内容
|
||||
$htmlContent = Storage::disk('local')->get($tempFileName);
|
||||
|
||||
// 删除临时文件
|
||||
Storage::disk('local')->delete($tempFileName);
|
||||
|
||||
// 将图片嵌入为 base64
|
||||
$htmlContent = $this->embedImagesInHtml($htmlContent, $images);
|
||||
|
||||
// 清理和美化 HTML
|
||||
$htmlContent = $this->cleanHtml($htmlContent);
|
||||
|
||||
return $htmlContent;
|
||||
}
|
||||
|
||||
/**
|
||||
* 从 Word 文档中提取所有图片
|
||||
*
|
||||
* @param \PhpOffice\PhpWord\PhpWord $phpWord
|
||||
* @return array 图片数组,键为图片索引,值为 base64 编码的图片数据
|
||||
*/
|
||||
protected function extractImagesFromDocument($phpWord): array
|
||||
{
|
||||
$images = [];
|
||||
$imageIndex = 0;
|
||||
|
||||
foreach ($phpWord->getSections() as $section) {
|
||||
foreach ($section->getElements() as $element) {
|
||||
// 处理图片元素
|
||||
if (method_exists($element, 'getElements')) {
|
||||
foreach ($element->getElements() as $childElement) {
|
||||
if ($childElement instanceof \PhpOffice\PhpWord\Element\Image) {
|
||||
$imageSource = $childElement->getSource();
|
||||
if (file_exists($imageSource)) {
|
||||
$imageData = file_get_contents($imageSource);
|
||||
$imageType = $childElement->getImageType();
|
||||
$mimeType = $this->getImageMimeType($imageType);
|
||||
$base64 = base64_encode($imageData);
|
||||
$images[$imageIndex] = "data:{$mimeType};base64,{$base64}";
|
||||
$imageIndex++;
|
||||
}
|
||||
}
|
||||
}
|
||||
} elseif ($element instanceof \PhpOffice\PhpWord\Element\Image) {
|
||||
$imageSource = $element->getSource();
|
||||
if (file_exists($imageSource)) {
|
||||
$imageData = file_get_contents($imageSource);
|
||||
$imageType = $element->getImageType();
|
||||
$mimeType = $this->getImageMimeType($imageType);
|
||||
$base64 = base64_encode($imageData);
|
||||
$images[$imageIndex] = "data:{$mimeType};base64,{$base64}";
|
||||
$imageIndex++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $images;
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据图片类型获取 MIME 类型
|
||||
*
|
||||
* @param string $imageType
|
||||
* @return string
|
||||
*/
|
||||
protected function getImageMimeType(string $imageType): string
|
||||
{
|
||||
$mimeTypes = [
|
||||
'jpg' => 'image/jpeg',
|
||||
'jpeg' => 'image/jpeg',
|
||||
'png' => 'image/png',
|
||||
'gif' => 'image/gif',
|
||||
'bmp' => 'image/bmp',
|
||||
'svg' => 'image/svg+xml',
|
||||
];
|
||||
|
||||
return $mimeTypes[strtolower($imageType)] ?? 'image/jpeg';
|
||||
}
|
||||
|
||||
/**
|
||||
* 将 HTML 中的图片替换为 base64 编码
|
||||
*
|
||||
* @param string $html
|
||||
* @param array $images
|
||||
* @return string
|
||||
*/
|
||||
protected function embedImagesInHtml(string $html, array $images): string
|
||||
{
|
||||
// PHPWord 生成的 HTML 中,图片通常以 <img src="..." /> 的形式存在
|
||||
// 我们需要将这些图片路径替换为 base64 数据
|
||||
|
||||
$imageIndex = 0;
|
||||
$html = preg_replace_callback(
|
||||
'/<img([^>]*?)src=["\']([^"\']+)["\']([^>]*?)>/i',
|
||||
function ($matches) use ($images, &$imageIndex) {
|
||||
$beforeSrc = $matches[1];
|
||||
$src = $matches[2];
|
||||
$afterSrc = $matches[3];
|
||||
|
||||
// 如果已经是 base64 或 http 链接,不处理
|
||||
if (strpos($src, 'data:') === 0 || strpos($src, 'http') === 0) {
|
||||
return $matches[0];
|
||||
}
|
||||
|
||||
// 使用提取的图片数据
|
||||
if (isset($images[$imageIndex])) {
|
||||
$src = $images[$imageIndex];
|
||||
$imageIndex++;
|
||||
}
|
||||
|
||||
return "<img{$beforeSrc}src=\"{$src}\"{$afterSrc}>";
|
||||
},
|
||||
$html
|
||||
);
|
||||
|
||||
return $html;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* 清理和美化 HTML 内容
|
||||
*
|
||||
* @param string $html
|
||||
* @return string
|
||||
*/
|
||||
protected function cleanHtml(string $html): string
|
||||
{
|
||||
// 提取 body 内容
|
||||
if (preg_match('/<body[^>]*>(.*?)<\/body>/is', $html, $matches)) {
|
||||
$html = $matches[1];
|
||||
}
|
||||
|
||||
// 添加基本样式
|
||||
$styledHtml = '<div class="document-preview" style="
|
||||
font-family: -apple-system, BlinkMacSystemFont, \'Segoe UI\', Roboto, \'Helvetica Neue\', Arial, sans-serif;
|
||||
line-height: 1.6;
|
||||
color: #333;
|
||||
padding: 20px;
|
||||
background: white;
|
||||
border-radius: 8px;
|
||||
box-shadow: 0 1px 3px rgba(0,0,0,0.1);
|
||||
">';
|
||||
|
||||
$styledHtml .= $html;
|
||||
$styledHtml .= '</div>';
|
||||
|
||||
return $styledHtml;
|
||||
return $renderService->render($markdownContent);
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查文档是否可以预览
|
||||
*
|
||||
*
|
||||
* @param Document $document
|
||||
* @return bool
|
||||
*/
|
||||
public function canPreview(Document $document): bool
|
||||
{
|
||||
// 检查文件扩展名
|
||||
$extension = strtolower(pathinfo($document->file_name, PATHINFO_EXTENSION));
|
||||
|
||||
// 目前支持 .doc 和 .docx
|
||||
return in_array($extension, ['doc', 'docx']);
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取文档预览的纯文本内容(用于搜索等)
|
||||
*
|
||||
* @param Document $document
|
||||
* @return string
|
||||
* @throws \Exception
|
||||
*/
|
||||
public function extractText(Document $document): string
|
||||
{
|
||||
try {
|
||||
if (!Storage::disk('local')->exists($document->file_path)) {
|
||||
throw new \Exception('文档文件不存在');
|
||||
}
|
||||
|
||||
$filePath = Storage::disk('local')->path($document->file_path);
|
||||
$phpWord = IOFactory::load($filePath);
|
||||
|
||||
$text = '';
|
||||
foreach ($phpWord->getSections() as $section) {
|
||||
foreach ($section->getElements() as $element) {
|
||||
if (method_exists($element, 'getText')) {
|
||||
$text .= $element->getText() . "\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return trim($text);
|
||||
} catch (\Exception $e) {
|
||||
throw new \Exception('文本提取失败:' . $e->getMessage());
|
||||
}
|
||||
return $document->conversion_status === 'completed'
|
||||
&& !empty($document->markdown_path);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,93 +5,22 @@ namespace App\Services;
|
||||
use App\Models\Document;
|
||||
use App\Models\DownloadLog;
|
||||
use App\Models\User;
|
||||
use Illuminate\Http\UploadedFile;
|
||||
use Illuminate\Support\Facades\DB;
|
||||
use Illuminate\Support\Facades\Storage;
|
||||
use Symfony\Component\HttpFoundation\StreamedResponse;
|
||||
|
||||
class DocumentService
|
||||
{
|
||||
/**
|
||||
* 上传文档
|
||||
*
|
||||
* @param UploadedFile $file 上传的文件
|
||||
* @param string $title 文档标题
|
||||
* @param string $type 文档类型 ('global' 或 'dedicated')
|
||||
* @param int|null $groupId 分组 ID (专用文档必填)
|
||||
* @param int $uploaderId 上传者用户 ID
|
||||
* @return Document
|
||||
* @throws \Exception
|
||||
*/
|
||||
public function uploadDocument(
|
||||
UploadedFile $file,
|
||||
string $title,
|
||||
string $type,
|
||||
?int $groupId,
|
||||
int $uploaderId
|
||||
): Document {
|
||||
// 验证文件格式
|
||||
$extension = strtolower($file->getClientOriginalExtension());
|
||||
if (!in_array($extension, ['doc', 'docx'])) {
|
||||
throw new \InvalidArgumentException('文件格式不支持,请上传 Word 文档(.doc 或 .docx)');
|
||||
}
|
||||
|
||||
// 验证专用文档必须有分组
|
||||
if ($type === 'dedicated' && empty($groupId)) {
|
||||
throw new \InvalidArgumentException('专用知识库文档必须指定所属分组');
|
||||
}
|
||||
|
||||
// 使用事务确保一致性
|
||||
return DB::transaction(function () use ($file, $title, $type, $groupId, $uploaderId) {
|
||||
// 获取原始文件名
|
||||
$originalFileName = $file->getClientOriginalName();
|
||||
|
||||
// 生成文件存储路径,使用原始文件名
|
||||
$directory = 'documents/' . date('Y/m/d');
|
||||
$filePath = $file->storeAs($directory, $originalFileName, 'local');
|
||||
|
||||
// 创建数据库记录,设置初始转换状态为 pending
|
||||
$document = Document::create([
|
||||
'title' => $title,
|
||||
'file_path' => $filePath,
|
||||
'file_name' => $originalFileName,
|
||||
'file_size' => $file->getSize(),
|
||||
'mime_type' => $file->getMimeType(),
|
||||
'type' => $type,
|
||||
'group_id' => $groupId,
|
||||
'uploaded_by' => $uploaderId,
|
||||
'description' => '',
|
||||
'conversion_status' => 'pending',
|
||||
]);
|
||||
|
||||
// 文档保存成功后,触发异步转换
|
||||
$conversionService = app(DocumentConversionService::class);
|
||||
$conversionService->queueConversion($document);
|
||||
|
||||
return $document;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* 验证用户是否有权访问指定文档
|
||||
*
|
||||
* @param Document $document 要访问的文档
|
||||
* @param User $user 用户
|
||||
* @return bool
|
||||
*/
|
||||
public function validateDocumentAccess(Document $document, User $user): bool
|
||||
{
|
||||
// 如果是全局文档,所有用户都可以访问
|
||||
if ($document->type === 'global') {
|
||||
return true;
|
||||
}
|
||||
|
||||
// 如果是专用文档,检查用户是否属于该文档的分组
|
||||
if ($document->type === 'dedicated') {
|
||||
// 获取用户所属的所有分组 ID
|
||||
$userGroupIds = $user->groups()->pluck('groups.id')->toArray();
|
||||
|
||||
// 检查文档的分组 ID 是否在用户的分组列表中
|
||||
return in_array($document->group_id, $userGroupIds);
|
||||
}
|
||||
|
||||
@@ -100,25 +29,17 @@ class DocumentService
|
||||
|
||||
/**
|
||||
* 下载文档
|
||||
*
|
||||
* @param Document $document 要下载的文档
|
||||
* @param User $user 用户
|
||||
* @return StreamedResponse
|
||||
* @throws \Exception
|
||||
*/
|
||||
public function downloadDocument(Document $document, User $user): StreamedResponse
|
||||
{
|
||||
// 验证用户权限
|
||||
if (!$this->validateDocumentAccess($document, $user)) {
|
||||
throw new \Exception('您没有权限访问此文档');
|
||||
}
|
||||
|
||||
// 检查文件是否存在
|
||||
if (!Storage::disk('local')->exists($document->file_path)) {
|
||||
throw new \Exception('文档不存在或已被删除');
|
||||
}
|
||||
|
||||
// 返回文件流式响应,使用原始文件名
|
||||
return Storage::disk('local')->download(
|
||||
$document->file_path,
|
||||
$document->file_name
|
||||
@@ -127,11 +48,6 @@ class DocumentService
|
||||
|
||||
/**
|
||||
* 记录文档下载日志
|
||||
*
|
||||
* @param Document $document 被下载的文档
|
||||
* @param User $user 下载的用户
|
||||
* @param string|null $ipAddress IP 地址
|
||||
* @return DownloadLog
|
||||
*/
|
||||
public function logDownload(Document $document, User $user, ?string $ipAddress = null): DownloadLog
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user