diff --git a/.forgejo/workflows/deploy.yml b/.forgejo/workflows/deploy.yml index 98f157d..dba7bd6 100644 --- a/.forgejo/workflows/deploy.yml +++ b/.forgejo/workflows/deploy.yml @@ -51,6 +51,10 @@ jobs: rsync_deploy config/ "config/" rsync_deploy src/ "src/" rsync_deploy public/ "public/" + rsync_deploy patches/ "patches/" + rsync -az --no-perms --no-owner --no-group -e 'ssh -i ~/.ssh/id_ed25519' \ + package.json worker.php \ + ${{ vars.USERNAME }}@${{ vars.HOST }}:/tmp/web2print-deploy/ - name: Apply on server run: | @@ -62,6 +66,10 @@ jobs: sudo cp -r /tmp/web2print-deploy/config/. \$DEPLOY_PATH/config/ sudo cp -r /tmp/web2print-deploy/src/. \$DEPLOY_PATH/src/ sudo cp -r /tmp/web2print-deploy/public/. \$DEPLOY_PATH/public/ + sudo mkdir -p \$DEPLOY_PATH/patches + sudo cp -r /tmp/web2print-deploy/patches/. \$DEPLOY_PATH/patches/ + sudo cp /tmp/web2print-deploy/package.json \$DEPLOY_PATH/package.json + sudo cp /tmp/web2print-deploy/worker.php \$DEPLOY_PATH/worker.php echo 'Configuration des permissions...' sudo chown -R www-data:www-data \$DEPLOY_PATH @@ -70,6 +78,9 @@ jobs: sudo chmod -R 775 \$DEPLOY_PATH/tmp sudo chmod 640 \$DEPLOY_PATH/config/config.php + echo 'Installation des dépendances npm + application des patches...' + cd \$DEPLOY_PATH && sudo -u www-data npm install --omit=optional 2>&1 | tail -5 + echo 'Nettoyage...' sudo rm -rf /tmp/web2print-deploy diff --git a/.gitignore b/.gitignore index b13ebc3..99cef54 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,5 @@ -.claude/ \ No newline at end of file +.claude/ +node_modules/ +package-lock.json +tmp/ +logs/ diff --git a/config/config.php b/config/config.php index a6bf1df..feb144d 100644 --- a/config/config.php +++ b/config/config.php @@ -11,13 +11,19 @@ return [ 'tmp_dir' => '/var/www/web2print/tmp', 'log_file' => '/var/www/web2print/logs/app.log', - // Paged.js CLI - 'pagedjs_bin' => '/usr/bin/pagedjs-cli', // Vérifier avec: which pagedjs-cli - 'pagedjs_timeout' => 60, // secondes + // Paged.js CLI (install local via npm dans /var/www/web2print) + 'pagedjs_bin' => '/var/www/web2print/node_modules/.bin/pagedjs-cli', + 'pagedjs_timeout' => 240, // secondes + + // PHP CLI (utilisé pour spawn le worker async) + 'php_bin' => '/usr/bin/php', + + // Jobs asynchrones + 'job_max_age' => 3600, // 1h, age au-delà duquel un job orphelin est supprimé // Limites 'max_html_size' => 5 * 1024 * 1024, // 5 MB - 'max_execution_time' => 90, + 'max_execution_time' => 300, // Options PDF par défaut 'pdf_defaults' => [ diff --git a/package.json b/package.json new file mode 100644 index 0000000..b3d76a3 --- /dev/null +++ b/package.json @@ -0,0 +1,15 @@ +{ + "name": "web2print-service", + "version": "1.0.0", + "private": true, + "description": "PDF generation service using pagedjs-cli", + "scripts": { + "postinstall": "patch-package" + }, + "dependencies": { + "pagedjs-cli": "0.4.3" + }, + "devDependencies": { + "patch-package": "^8.0.0" + } +} diff --git a/patches/pagedjs-cli+0.4.3.patch b/patches/pagedjs-cli+0.4.3.patch new file mode 100644 index 0000000..ff93af4 --- /dev/null +++ b/patches/pagedjs-cli+0.4.3.patch @@ -0,0 +1,14 @@ +diff --git a/node_modules/pagedjs-cli/src/printer.js b/node_modules/pagedjs-cli/src/printer.js +index 206b4fc..e9d296b 100644 +--- a/node_modules/pagedjs-cli/src/printer.js ++++ b/node_modules/pagedjs-cli/src/printer.js +@@ -48,7 +48,8 @@ class Printer extends EventEmitter { + let puppeteerOptions = { + headless: this.headless, + args: ["--disable-dev-shm-usage", "--export-tagged-pdf"], +- ignoreHTTPSErrors: this.ignoreHTTPSErrors ++ ignoreHTTPSErrors: this.ignoreHTTPSErrors, ++ protocolTimeout: 300000 + }; + + if (this.allowLocal) { diff --git a/public/index.php b/public/index.php index 51abbdd..7701d61 100644 --- a/public/index.php +++ b/public/index.php @@ -50,11 +50,31 @@ if (!$auth->authenticate()) { exit; } -// Endpoint de génération PDF +// Endpoints if ($uri === '/generate') { + // Endpoint synchrone existant (rétro-compat) $generator = new \Web2Print\Services\PdfGenerator($config); $controller = new \Web2Print\Controllers\GenerateController($generator, $config); $controller->handle(); +} elseif ($uri === '/jobs') { + // POST /jobs : créer un job async + $jobs = new \Web2Print\Services\JobManager($config); + $controller = new \Web2Print\Controllers\JobsController($jobs, $config); + $controller->create(); +} elseif (preg_match('#^/jobs/([a-f0-9]+)/result$#', $uri, $m)) { + // GET /jobs/{id}/result : récupérer le PDF + $jobs = new \Web2Print\Services\JobManager($config); + $controller = new \Web2Print\Controllers\JobsController($jobs, $config); + $controller->result($m[1]); +} elseif (preg_match('#^/jobs/([a-f0-9]+)$#', $uri, $m)) { + // GET /jobs/{id} : status — DELETE /jobs/{id} : cleanup + $jobs = new \Web2Print\Services\JobManager($config); + $controller = new \Web2Print\Controllers\JobsController($jobs, $config); + if ($_SERVER['REQUEST_METHOD'] === 'DELETE') { + $controller->delete($m[1]); + } else { + $controller->status($m[1]); + } } else { http_response_code(404); header('Content-Type: application/json'); diff --git a/src/Controllers/JobsController.php b/src/Controllers/JobsController.php new file mode 100644 index 0000000..dbca109 --- /dev/null +++ b/src/Controllers/JobsController.php @@ -0,0 +1,120 @@ +jobs = $jobs; + $this->config = $config; + } + + public function create(): void + { + if ($_SERVER['REQUEST_METHOD'] !== 'POST') { + $this->sendError(405, 'Method not allowed'); + return; + } + + $input = file_get_contents('php://input'); + if (strlen($input) > $this->config['max_html_size']) { + $this->sendError(413, 'Request too large'); + return; + } + + $data = json_decode($input, true); + if (json_last_error() !== JSON_ERROR_NONE) { + $this->sendError(400, 'Invalid JSON'); + return; + } + + if (empty($data['url'])) { + $this->sendError(400, 'url required (only URL mode is supported for async jobs)'); + return; + } + + $jobId = $this->jobs->create([ + 'url' => $data['url'], + 'filename' => !empty($data['filename']) ? basename($data['filename']) : 'document.pdf', + 'options' => $data['options'] ?? [], + ]); + + http_response_code(202); + header('Content-Type: application/json'); + echo json_encode(['job_id' => $jobId]); + } + + public function status(string $jobId): void + { + if ($_SERVER['REQUEST_METHOD'] !== 'GET') { + $this->sendError(405, 'Method not allowed'); + return; + } + + $status = $this->jobs->getStatus($jobId); + if ($status === null) { + $this->sendError(404, 'Job not found'); + return; + } + + header('Content-Type: application/json'); + echo json_encode($status); + } + + public function result(string $jobId): void + { + if ($_SERVER['REQUEST_METHOD'] !== 'GET') { + $this->sendError(405, 'Method not allowed'); + return; + } + + $status = $this->jobs->getStatus($jobId); + if ($status === null) { + $this->sendError(404, 'Job not found'); + return; + } + + if (($status['status'] ?? null) !== 'done') { + $this->sendError(409, 'Job not ready (status: ' . ($status['status'] ?? 'unknown') . ')'); + return; + } + + $path = $this->jobs->getResultPath($jobId); + if ($path === null) { + $this->sendError(500, 'Result file missing'); + return; + } + + $request = $this->jobs->getRequest($jobId); + $filename = $request['filename'] ?? 'document.pdf'; + + header('Content-Type: application/pdf'); + header('Content-Disposition: attachment; filename="' . $filename . '"'); + header('Content-Length: ' . filesize($path)); + readfile($path); + } + + public function delete(string $jobId): void + { + if ($_SERVER['REQUEST_METHOD'] !== 'DELETE') { + $this->sendError(405, 'Method not allowed'); + return; + } + + $this->jobs->delete($jobId); + http_response_code(204); + } + + private function sendError(int $code, string $message): void + { + http_response_code($code); + header('Content-Type: application/json'); + echo json_encode(['error' => $message]); + } +} diff --git a/src/Services/JobManager.php b/src/Services/JobManager.php new file mode 100644 index 0000000..38a0812 --- /dev/null +++ b/src/Services/JobManager.php @@ -0,0 +1,104 @@ +config = $config; + } + + public function create(array $request): string + { + $jobId = bin2hex(random_bytes(16)); + + $requestPath = $this->path($jobId, 'request.json'); + $statusPath = $this->path($jobId, 'status.json'); + + file_put_contents($requestPath, json_encode($request, JSON_UNESCAPED_SLASHES)); + $this->writeStatus($jobId, ['status' => 'pending', 'createdAt' => time()]); + + $cmd = sprintf( + 'nohup %s %s %s > /dev/null 2>&1 &', + escapeshellcmd($this->config['php_bin']), + escapeshellarg(__DIR__ . '/../../worker.php'), + escapeshellarg($jobId) + ); + exec($cmd); + + return $jobId; + } + + public function getStatus(string $jobId): ?array + { + $path = $this->path($jobId, 'status.json'); + if (!file_exists($path)) { + return null; + } + $raw = @file_get_contents($path); + $data = json_decode($raw, true); + return is_array($data) ? $data : null; + } + + public function writeStatus(string $jobId, array $status): void + { + $status['updatedAt'] = time(); + file_put_contents( + $this->path($jobId, 'status.json'), + json_encode($status, JSON_UNESCAPED_SLASHES) + ); + } + + public function getRequest(string $jobId): ?array + { + $path = $this->path($jobId, 'request.json'); + if (!file_exists($path)) { + return null; + } + $data = json_decode(@file_get_contents($path), true); + return is_array($data) ? $data : null; + } + + public function getResultPath(string $jobId): ?string + { + $path = $this->path($jobId, 'pdf'); + return file_exists($path) ? $path : null; + } + + public function pdfPath(string $jobId): string + { + return $this->path($jobId, 'pdf'); + } + + public function delete(string $jobId): void + { + foreach (glob($this->config['tmp_dir'] . '/job_' . $jobId . '.*') ?: [] as $file) { + @unlink($file); + } + } + + public function cleanOrphans(): int + { + $maxAge = $this->config['job_max_age'] ?? 3600; + $cutoff = time() - $maxAge; + $count = 0; + + foreach (glob($this->config['tmp_dir'] . '/job_*.status.json') ?: [] as $statusFile) { + if (filemtime($statusFile) < $cutoff) { + if (preg_match('/job_([a-f0-9]+)\.status\.json$/', $statusFile, $m)) { + $this->delete($m[1]); + $count++; + } + } + } + return $count; + } + + private function path(string $jobId, string $suffix): string + { + return $this->config['tmp_dir'] . '/job_' . $jobId . '.' . $suffix; + } +} diff --git a/src/Services/PdfGenerator.php b/src/Services/PdfGenerator.php index 6e91688..e7aed80 100644 --- a/src/Services/PdfGenerator.php +++ b/src/Services/PdfGenerator.php @@ -44,6 +44,7 @@ class PdfGenerator // Nettoyer les fichiers temporaires @unlink($htmlFile); @unlink($pdfFile); + $this->cleanChromiumTmp(); } } @@ -80,6 +81,98 @@ class PdfGenerator } finally { @unlink($pdfFile); + $this->cleanChromiumTmp(); + } + } + + public function generateFromUrlToFile(string $url, string $pdfFile, array $options = [], ?callable $onProgress = null): void + { + $cmd = 'TMPDIR=' . escapeshellarg($this->config['tmp_dir']) . ' '; + $cmd .= escapeshellcmd($this->config['pagedjs_bin']); + $cmd .= ' ' . escapeshellarg($url); + $cmd .= ' -o ' . escapeshellarg($pdfFile); + + if (!empty($options['timeout'])) { + $cmd .= ' --timeout ' . (int)$options['timeout']; + } else { + $cmd .= ' --timeout ' . ($this->config['pagedjs_timeout'] * 1000); + } + + // proc_open pour streamer stdout/stderr ligne par ligne + $descriptors = [ + 0 => ['pipe', 'r'], + 1 => ['pipe', 'w'], + 2 => ['pipe', 'w'], + ]; + $proc = proc_open($cmd, $descriptors, $pipes); + if (!is_resource($proc)) { + throw new \Exception('Failed to spawn pagedjs-cli'); + } + fclose($pipes[0]); + + stream_set_blocking($pipes[1], false); + stream_set_blocking($pipes[2], false); + + $stderrBuffer = ''; + $partial = ['', '']; + + try { + while (true) { + $status = proc_get_status($proc); + $read = [$pipes[1], $pipes[2]]; + $w = null; $e = null; + $changed = @stream_select($read, $w, $e, 1); + + if ($changed > 0) { + foreach ($read as $stream) { + $idx = ($stream === $pipes[1]) ? 0 : 1; + $chunk = fread($stream, 4096); + if ($chunk === '' || $chunk === false) continue; + if ($idx === 1) $stderrBuffer .= $chunk; + $partial[$idx] .= $chunk; + // Découpe par ligne (\n ou \r — Ora utilise \r pour les spinners) + while (preg_match('/^([^\r\n]*)[\r\n](.*)$/s', $partial[$idx], $m)) { + $line = $m[1]; + $partial[$idx] = $m[2]; + if ($line !== '' && $onProgress !== null) { + $onProgress($line); + } + } + } + } + + if (!$status['running']) { + // Vider les buffers restants + foreach ([$pipes[1], $pipes[2]] as $i => $stream) { + $idx = ($stream === $pipes[1]) ? 0 : 1; + while (($chunk = fread($stream, 4096)) !== false && $chunk !== '') { + if ($idx === 1) $stderrBuffer .= $chunk; + $partial[$idx] .= $chunk; + } + if ($partial[$idx] !== '' && $onProgress !== null) { + foreach (preg_split('/[\r\n]+/', $partial[$idx]) as $line) { + if ($line !== '') $onProgress($line); + } + } + } + break; + } + } + + fclose($pipes[1]); + fclose($pipes[2]); + $exitCode = proc_close($proc); + + if ($exitCode !== 0) { + $this->log('Paged.js CLI error (URL streaming): ' . $stderrBuffer); + throw new \Exception('PDF generation failed: ' . trim($stderrBuffer)); + } + + if (!file_exists($pdfFile)) { + throw new \Exception('PDF file not created'); + } + } finally { + $this->cleanChromiumTmp(); } } @@ -130,6 +223,30 @@ class PdfGenerator return $cmd; } + private function cleanChromiumTmp(): void + { + $tmpDir = $this->config['tmp_dir']; + foreach (glob($tmpDir . '/org.chromium.Chromium.*') ?: [] as $dir) { + $this->rmdirRecursive($dir); + } + foreach (glob($tmpDir . '/puppeteer_dev_chrome_profile-*') ?: [] as $dir) { + $this->rmdirRecursive($dir); + } + } + + private function rmdirRecursive(string $path): void + { + if (!is_dir($path)) { + @unlink($path); + return; + } + foreach (scandir($path) as $item) { + if ($item === '.' || $item === '..') continue; + $this->rmdirRecursive($path . '/' . $item); + } + @rmdir($path); + } + private function log(string $message): void { $timestamp = date('Y-m-d H:i:s'); diff --git a/worker.php b/worker.php new file mode 100644 index 0000000..ea5a11d --- /dev/null +++ b/worker.php @@ -0,0 +1,89 @@ + + * php worker.php --cleanup (nettoyage des jobs orphelins, lancé par cron) + */ + +// Autoloader (mêmes règles que public/index.php) +spl_autoload_register(function ($class) { + $prefix = 'Web2Print\\'; + $baseDir = __DIR__ . '/src/'; + $len = strlen($prefix); + if (strncmp($prefix, $class, $len) !== 0) return; + $relativeClass = substr($class, $len); + $file = $baseDir . str_replace('\\', '/', $relativeClass) . '.php'; + if (file_exists($file)) require $file; +}); + +$config = require __DIR__ . '/config/config.php'; + +$jobs = new \Web2Print\Services\JobManager($config); + +// Mode cleanup orphan (cron) +if (isset($argv[1]) && $argv[1] === '--cleanup') { + $count = $jobs->cleanOrphans(); + echo "Cleaned {$count} orphan jobs.\n"; + exit(0); +} + +if (!isset($argv[1])) { + fwrite(STDERR, "Usage: php worker.php | --cleanup\n"); + exit(1); +} + +$jobId = $argv[1]; +if (!preg_match('/^[a-f0-9]+$/', $jobId)) { + fwrite(STDERR, "Invalid job_id\n"); + exit(1); +} + +$request = $jobs->getRequest($jobId); +if ($request === null) { + fwrite(STDERR, "Request not found for job {$jobId}\n"); + exit(1); +} + +set_time_limit(0); // Le worker tourne en arrière-plan, pas de limite + +$generator = new \Web2Print\Services\PdfGenerator($config); +$pdfPath = $jobs->pdfPath($jobId); + +$pageCount = 0; +$onProgress = function (string $line) use ($jobs, $jobId, &$pageCount) { + // pagedjs-cli utilise ora qui peut émettre via \r ; on a déjà découpé. + // Lignes typiques : "- Loading: ...", "✔ Loaded", "- Rendering: Page X" + if (preg_match('/Rendering:\s*Page\s*(\d+)/i', $line, $m)) { + $pageCount = max($pageCount, (int)$m[1]); + $jobs->writeStatus($jobId, [ + 'status' => 'rendering', + 'page' => $pageCount, + ]); + } elseif (stripos($line, 'Loaded') !== false) { + $jobs->writeStatus($jobId, ['status' => 'rendering', 'page' => 0]); + } +}; + +try { + $jobs->writeStatus($jobId, ['status' => 'rendering', 'page' => 0]); + + $generator->generateFromUrlToFile( + $request['url'], + $pdfPath, + $request['options'] ?? [], + $onProgress + ); + + $jobs->writeStatus($jobId, [ + 'status' => 'done', + 'pages' => $pageCount, + ]); +} catch (\Throwable $e) { + $jobs->writeStatus($jobId, [ + 'status' => 'error', + 'message' => $e->getMessage(), + ]); + fwrite(STDERR, "Job {$jobId} failed: " . $e->getMessage() . "\n"); + exit(1); +}