feat: 初始化知识库系统项目

- 实现基于 Laravel 11 和 Filament 3.X 的文档管理系统 - 添加用户认证和分组管理功能 - 实现文档上传、分类和权限控制 - 集成 Word 文档自动转换为 Markdown - 集成 Meilisearch 全文搜索引擎 - 实现文档在线预览功能 - 添加安全日志和审计功能 - 完整的简体中文界面 - 包含完整的项目文档和部署指南技术栈: - Laravel 11.x - Filament 3.X - Meilisearch 1.5+ - Pandoc 文档转换 - Redis 队列系统 - Pest PHP 测试框架
2025-12-05 14:44:44 +08:00
commit acf549c43c
165 changed files with 32838 additions and 0 deletions
--- a/app/Services/DocumentPreviewService.php
+++ b/app/Services/DocumentPreviewService.php
@@ -0,0 +1,296 @@
+<?php
+
+namespace App\Services;
+
+use App\Models\Document;
+use Illuminate\Support\Facades\Storage;
+use PhpOffice\PhpWord\IOFactory;
+use PhpOffice\PhpWord\Settings;
+
+class DocumentPreviewService
+{
+    /**
+     * 将文档转换为 HTML 用于预览
+     * 在 Filament 后台中，直接从 Word 转换以保证图片正确显示
+     * 
+     * @param Document $document
+     * @return string HTML 内容
+     * @throws \Exception
+     */
+    public function convertToHtml(Document $document): string
+    {
+        try {
+            // 直接从 Word 转换，以确保图片正确显示
+            // Markdown 转换的图片路径问题较复杂，暂时不使用
+            return $this->convertWordToHtml($document);
+        } catch (\Exception $e) {
+            throw new \Exception('文档预览失败：' . $e->getMessage());
+        }
+    }
+
+    /**
+     * 将 Markdown 转换为 HTML（用于专门的 Markdown 预览页面）
+     * 
+     * @param Document $document
+     * @return string HTML 内容
+     * @throws \Exception
+     */
+    public function convertMarkdownToHtml(Document $document): string
+    {
+        $markdownContent = $document->getMarkdownContent();
+        
+        if (empty($markdownContent)) {
+            throw new \Exception('Markdown 内容为空');
+        }
+
+        // 获取 Markdown 文件的目录（例如：2025/12/04）
+        $markdownDir = dirname($document->markdown_path);
+
+        // 修复图片路径：将 ./media/ 替换为 /markdown/{date}/media/
+        $markdownContent = preg_replace_callback(
+            '/\(\.\/media\/([^)]+)\)/',
+            function ($matches) use ($markdownDir) {
+                $filename = $matches[1];
+                return '(/markdown/' . $markdownDir . '/media/' . $filename . ')';
+            },
+            $markdownContent
+        );
+
+        // 使用 MarkdownRenderService 转换为 HTML
+        $renderService = app(MarkdownRenderService::class);
+        $htmlContent = $renderService->render($markdownContent);
+
+        return $htmlContent;
+    }
+
+    /**
+     * 直接从 Word 文档转换为 HTML
+     * 
+     * @param Document $document
+     * @return string HTML 内容
+     * @throws \Exception
+     */
+    protected function convertWordToHtml(Document $document): string
+    {
+        // 检查文件是否存在
+        if (!Storage::disk('local')->exists($document->file_path)) {
+            throw new \Exception('文档文件不存在');
+        }
+
+        // 获取文件的完整路径
+        $filePath = Storage::disk('local')->path($document->file_path);
+
+        // 设置 PHPWord 的临时目录
+        Settings::setTempDir(storage_path('app/temp'));
+
+        // 加载 Word 文档
+        $phpWord = IOFactory::load($filePath);
+
+        // 提取图片并转换为 base64
+        $images = $this->extractImagesFromDocument($phpWord);
+
+        // 创建 HTML Writer
+        $htmlWriter = IOFactory::createWriter($phpWord, 'HTML');
+
+        // 将内容写入临时文件
+        $tempHtmlFile = tempnam(sys_get_temp_dir(), 'doc_preview_') . '.html';
+        $htmlWriter->save($tempHtmlFile);
+
+        // 读取 HTML 内容
+        $htmlContent = file_get_contents($tempHtmlFile);
+
+        // 删除临时文件
+        unlink($tempHtmlFile);
+
+        // 将图片嵌入为 base64
+        $htmlContent = $this->embedImagesInHtml($htmlContent, $images);
+
+        // 清理和美化 HTML
+        $htmlContent = $this->cleanHtml($htmlContent);
+
+        return $htmlContent;
+    }
+
+    /**
+     * 从 Word 文档中提取所有图片
+     * 
+     * @param \PhpOffice\PhpWord\PhpWord $phpWord
+     * @return array 图片数组，键为图片索引，值为 base64 编码的图片数据
+     */
+    protected function extractImagesFromDocument($phpWord): array
+    {
+        $images = [];
+        $imageIndex = 0;
+
+        foreach ($phpWord->getSections() as $section) {
+            foreach ($section->getElements() as $element) {
+                // 处理图片元素
+                if (method_exists($element, 'getElements')) {
+                    foreach ($element->getElements() as $childElement) {
+                        if ($childElement instanceof \PhpOffice\PhpWord\Element\Image) {
+                            $imageSource = $childElement->getSource();
+                            if (file_exists($imageSource)) {
+                                $imageData = file_get_contents($imageSource);
+                                $imageType = $childElement->getImageType();
+                                $mimeType = $this->getImageMimeType($imageType);
+                                $base64 = base64_encode($imageData);
+                                $images[$imageIndex] = "data:{$mimeType};base64,{$base64}";
+                                $imageIndex++;
+                            }
+                        }
+                    }
+                } elseif ($element instanceof \PhpOffice\PhpWord\Element\Image) {
+                    $imageSource = $element->getSource();
+                    if (file_exists($imageSource)) {
+                        $imageData = file_get_contents($imageSource);
+                        $imageType = $element->getImageType();
+                        $mimeType = $this->getImageMimeType($imageType);
+                        $base64 = base64_encode($imageData);
+                        $images[$imageIndex] = "data:{$mimeType};base64,{$base64}";
+                        $imageIndex++;
+                    }
+                }
+            }
+        }
+
+        return $images;
+    }
+
+    /**
+     * 根据图片类型获取 MIME 类型
+     * 
+     * @param string $imageType
+     * @return string
+     */
+    protected function getImageMimeType(string $imageType): string
+    {
+        $mimeTypes = [
+            'jpg' => 'image/jpeg',
+            'jpeg' => 'image/jpeg',
+            'png' => 'image/png',
+            'gif' => 'image/gif',
+            'bmp' => 'image/bmp',
+            'svg' => 'image/svg+xml',
+        ];
+
+        return $mimeTypes[strtolower($imageType)] ?? 'image/jpeg';
+    }
+
+    /**
+     * 将 HTML 中的图片替换为 base64 编码
+     * 
+     * @param string $html
+     * @param array $images
+     * @return string
+     */
+    protected function embedImagesInHtml(string $html, array $images): string
+    {
+        // PHPWord 生成的 HTML 中，图片通常以 <img src="..." /> 的形式存在
+        // 我们需要将这些图片路径替换为 base64 数据
+        
+        $imageIndex = 0;
+        $html = preg_replace_callback(
+            '/<img([^>]*?)src=["\']([^"\']+)["\']([^>]*?)>/i',
+            function ($matches) use ($images, &$imageIndex) {
+                $beforeSrc = $matches[1];
+                $src = $matches[2];
+                $afterSrc = $matches[3];
+
+                // 如果已经是 base64 或 http 链接，不处理
+                if (strpos($src, 'data:') === 0 || strpos($src, 'http') === 0) {
+                    return $matches[0];
+                }
+
+                // 使用提取的图片数据
+                if (isset($images[$imageIndex])) {
+                    $src = $images[$imageIndex];
+                    $imageIndex++;
+                }
+
+                return "<img{$beforeSrc}src=\"{$src}\"{$afterSrc}>";
+            },
+            $html
+        );
+
+        return $html;
+    }
+
+
+
+    /**
+     * 清理和美化 HTML 内容
+     * 
+     * @param string $html
+     * @return string
+     */
+    protected function cleanHtml(string $html): string
+    {
+        // 提取 body 内容
+        if (preg_match('/<body[^>]*>(.*?)<\/body>/is', $html, $matches)) {
+            $html = $matches[1];
+        }
+
+        // 添加基本样式
+        $styledHtml = '<div class="document-preview" style="
+            font-family: -apple-system, BlinkMacSystemFont, \'Segoe UI\', Roboto, \'Helvetica Neue\', Arial, sans-serif;
+            line-height: 1.6;
+            color: #333;
+            padding: 20px;
+            background: white;
+            border-radius: 8px;
+            box-shadow: 0 1px 3px rgba(0,0,0,0.1);
+        ">';
+        
+        $styledHtml .= $html;
+        $styledHtml .= '</div>';
+
+        return $styledHtml;
+    }
+
+    /**
+     * 检查文档是否可以预览
+     * 
+     * @param Document $document
+     * @return bool
+     */
+    public function canPreview(Document $document): bool
+    {
+        // 检查文件扩展名
+        $extension = strtolower(pathinfo($document->file_name, PATHINFO_EXTENSION));
+        
+        // 目前支持 .doc 和 .docx
+        return in_array($extension, ['doc', 'docx']);
+    }
+
+    /**
+     * 获取文档预览的纯文本内容（用于搜索等）
+     * 
+     * @param Document $document
+     * @return string
+     * @throws \Exception
+     */
+    public function extractText(Document $document): string
+    {
+        try {
+            if (!Storage::disk('local')->exists($document->file_path)) {
+                throw new \Exception('文档文件不存在');
+            }
+
+            $filePath = Storage::disk('local')->path($document->file_path);
+            $phpWord = IOFactory::load($filePath);
+
+            $text = '';
+            foreach ($phpWord->getSections() as $section) {
+                foreach ($section->getElements() as $element) {
+                    if (method_exists($element, 'getText')) {
+                        $text .= $element->getText() . "\n";
+                    }
+                }
+            }
+
+            return trim($text);
+        } catch (\Exception $e) {
+            throw new \Exception('文本提取失败：' . $e->getMessage());
+        }
+    }
+}