Génération PDF asynchrone : jobs avec polling + progression
All checks were successful
Deploy / deploy (push) Successful in 20s

- Nouveau système de jobs : POST /jobs (async), GET /jobs/{id} (status), GET /jobs/{id}/result (PDF), DELETE /jobs/{id}
- worker.php spawné en arrière-plan via nohup, lit la sortie pagedjs-cli ligne par ligne via proc_open et écrit la progression dans tmp/job_{id}.status.json
- Migration de pagedjs-cli en install local (node_modules) pour persister le patch protocolTimeout via patch-package
- CI : déploie package.json, worker.php, patches/, et lance npm install (qui réapplique les patches via postinstall)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
isUnknown 2026-05-04 09:37:00 +02:00
parent a005c982bd
commit 0b954ed494
10 changed files with 506 additions and 6 deletions

View file

@ -51,6 +51,10 @@ jobs:
rsync_deploy config/ "config/"
rsync_deploy src/ "src/"
rsync_deploy public/ "public/"
rsync_deploy patches/ "patches/"
rsync -az --no-perms --no-owner --no-group -e 'ssh -i ~/.ssh/id_ed25519' \
package.json worker.php \
${{ vars.USERNAME }}@${{ vars.HOST }}:/tmp/web2print-deploy/
- name: Apply on server
run: |
@ -62,6 +66,10 @@ jobs:
sudo cp -r /tmp/web2print-deploy/config/. \$DEPLOY_PATH/config/
sudo cp -r /tmp/web2print-deploy/src/. \$DEPLOY_PATH/src/
sudo cp -r /tmp/web2print-deploy/public/. \$DEPLOY_PATH/public/
sudo mkdir -p \$DEPLOY_PATH/patches
sudo cp -r /tmp/web2print-deploy/patches/. \$DEPLOY_PATH/patches/
sudo cp /tmp/web2print-deploy/package.json \$DEPLOY_PATH/package.json
sudo cp /tmp/web2print-deploy/worker.php \$DEPLOY_PATH/worker.php
echo 'Configuration des permissions...'
sudo chown -R www-data:www-data \$DEPLOY_PATH
@ -70,6 +78,9 @@ jobs:
sudo chmod -R 775 \$DEPLOY_PATH/tmp
sudo chmod 640 \$DEPLOY_PATH/config/config.php
echo 'Installation des dépendances npm + application des patches...'
cd \$DEPLOY_PATH && sudo -u www-data npm install --omit=optional 2>&1 | tail -5
echo 'Nettoyage...'
sudo rm -rf /tmp/web2print-deploy

4
.gitignore vendored
View file

@ -1 +1,5 @@
.claude/
node_modules/
package-lock.json
tmp/
logs/

View file

@ -11,13 +11,19 @@ return [
'tmp_dir' => '/var/www/web2print/tmp',
'log_file' => '/var/www/web2print/logs/app.log',
// Paged.js CLI
'pagedjs_bin' => '/usr/bin/pagedjs-cli', // Vérifier avec: which pagedjs-cli
'pagedjs_timeout' => 60, // secondes
// Paged.js CLI (install local via npm dans /var/www/web2print)
'pagedjs_bin' => '/var/www/web2print/node_modules/.bin/pagedjs-cli',
'pagedjs_timeout' => 240, // secondes
// PHP CLI (utilisé pour spawn le worker async)
'php_bin' => '/usr/bin/php',
// Jobs asynchrones
'job_max_age' => 3600, // 1h, age au-delà duquel un job orphelin est supprimé
// Limites
'max_html_size' => 5 * 1024 * 1024, // 5 MB
'max_execution_time' => 90,
'max_execution_time' => 300,
// Options PDF par défaut
'pdf_defaults' => [

15
package.json Normal file
View file

@ -0,0 +1,15 @@
{
"name": "web2print-service",
"version": "1.0.0",
"private": true,
"description": "PDF generation service using pagedjs-cli",
"scripts": {
"postinstall": "patch-package"
},
"dependencies": {
"pagedjs-cli": "0.4.3"
},
"devDependencies": {
"patch-package": "^8.0.0"
}
}

View file

@ -0,0 +1,14 @@
diff --git a/node_modules/pagedjs-cli/src/printer.js b/node_modules/pagedjs-cli/src/printer.js
index 206b4fc..e9d296b 100644
--- a/node_modules/pagedjs-cli/src/printer.js
+++ b/node_modules/pagedjs-cli/src/printer.js
@@ -48,7 +48,8 @@ class Printer extends EventEmitter {
let puppeteerOptions = {
headless: this.headless,
args: ["--disable-dev-shm-usage", "--export-tagged-pdf"],
- ignoreHTTPSErrors: this.ignoreHTTPSErrors
+ ignoreHTTPSErrors: this.ignoreHTTPSErrors,
+ protocolTimeout: 300000
};
if (this.allowLocal) {

View file

@ -50,11 +50,31 @@ if (!$auth->authenticate()) {
exit;
}
// Endpoint de génération PDF
// Endpoints
if ($uri === '/generate') {
// Endpoint synchrone existant (rétro-compat)
$generator = new \Web2Print\Services\PdfGenerator($config);
$controller = new \Web2Print\Controllers\GenerateController($generator, $config);
$controller->handle();
} elseif ($uri === '/jobs') {
// POST /jobs : créer un job async
$jobs = new \Web2Print\Services\JobManager($config);
$controller = new \Web2Print\Controllers\JobsController($jobs, $config);
$controller->create();
} elseif (preg_match('#^/jobs/([a-f0-9]+)/result$#', $uri, $m)) {
// GET /jobs/{id}/result : récupérer le PDF
$jobs = new \Web2Print\Services\JobManager($config);
$controller = new \Web2Print\Controllers\JobsController($jobs, $config);
$controller->result($m[1]);
} elseif (preg_match('#^/jobs/([a-f0-9]+)$#', $uri, $m)) {
// GET /jobs/{id} : status — DELETE /jobs/{id} : cleanup
$jobs = new \Web2Print\Services\JobManager($config);
$controller = new \Web2Print\Controllers\JobsController($jobs, $config);
if ($_SERVER['REQUEST_METHOD'] === 'DELETE') {
$controller->delete($m[1]);
} else {
$controller->status($m[1]);
}
} else {
http_response_code(404);
header('Content-Type: application/json');

View file

@ -0,0 +1,120 @@
<?php
namespace Web2Print\Controllers;
use Web2Print\Services\JobManager;
class JobsController
{
private JobManager $jobs;
private array $config;
public function __construct(JobManager $jobs, array $config)
{
$this->jobs = $jobs;
$this->config = $config;
}
public function create(): void
{
if ($_SERVER['REQUEST_METHOD'] !== 'POST') {
$this->sendError(405, 'Method not allowed');
return;
}
$input = file_get_contents('php://input');
if (strlen($input) > $this->config['max_html_size']) {
$this->sendError(413, 'Request too large');
return;
}
$data = json_decode($input, true);
if (json_last_error() !== JSON_ERROR_NONE) {
$this->sendError(400, 'Invalid JSON');
return;
}
if (empty($data['url'])) {
$this->sendError(400, 'url required (only URL mode is supported for async jobs)');
return;
}
$jobId = $this->jobs->create([
'url' => $data['url'],
'filename' => !empty($data['filename']) ? basename($data['filename']) : 'document.pdf',
'options' => $data['options'] ?? [],
]);
http_response_code(202);
header('Content-Type: application/json');
echo json_encode(['job_id' => $jobId]);
}
public function status(string $jobId): void
{
if ($_SERVER['REQUEST_METHOD'] !== 'GET') {
$this->sendError(405, 'Method not allowed');
return;
}
$status = $this->jobs->getStatus($jobId);
if ($status === null) {
$this->sendError(404, 'Job not found');
return;
}
header('Content-Type: application/json');
echo json_encode($status);
}
public function result(string $jobId): void
{
if ($_SERVER['REQUEST_METHOD'] !== 'GET') {
$this->sendError(405, 'Method not allowed');
return;
}
$status = $this->jobs->getStatus($jobId);
if ($status === null) {
$this->sendError(404, 'Job not found');
return;
}
if (($status['status'] ?? null) !== 'done') {
$this->sendError(409, 'Job not ready (status: ' . ($status['status'] ?? 'unknown') . ')');
return;
}
$path = $this->jobs->getResultPath($jobId);
if ($path === null) {
$this->sendError(500, 'Result file missing');
return;
}
$request = $this->jobs->getRequest($jobId);
$filename = $request['filename'] ?? 'document.pdf';
header('Content-Type: application/pdf');
header('Content-Disposition: attachment; filename="' . $filename . '"');
header('Content-Length: ' . filesize($path));
readfile($path);
}
public function delete(string $jobId): void
{
if ($_SERVER['REQUEST_METHOD'] !== 'DELETE') {
$this->sendError(405, 'Method not allowed');
return;
}
$this->jobs->delete($jobId);
http_response_code(204);
}
private function sendError(int $code, string $message): void
{
http_response_code($code);
header('Content-Type: application/json');
echo json_encode(['error' => $message]);
}
}

104
src/Services/JobManager.php Normal file
View file

@ -0,0 +1,104 @@
<?php
namespace Web2Print\Services;
class JobManager
{
private array $config;
public function __construct(array $config)
{
$this->config = $config;
}
public function create(array $request): string
{
$jobId = bin2hex(random_bytes(16));
$requestPath = $this->path($jobId, 'request.json');
$statusPath = $this->path($jobId, 'status.json');
file_put_contents($requestPath, json_encode($request, JSON_UNESCAPED_SLASHES));
$this->writeStatus($jobId, ['status' => 'pending', 'createdAt' => time()]);
$cmd = sprintf(
'nohup %s %s %s > /dev/null 2>&1 &',
escapeshellcmd($this->config['php_bin']),
escapeshellarg(__DIR__ . '/../../worker.php'),
escapeshellarg($jobId)
);
exec($cmd);
return $jobId;
}
public function getStatus(string $jobId): ?array
{
$path = $this->path($jobId, 'status.json');
if (!file_exists($path)) {
return null;
}
$raw = @file_get_contents($path);
$data = json_decode($raw, true);
return is_array($data) ? $data : null;
}
public function writeStatus(string $jobId, array $status): void
{
$status['updatedAt'] = time();
file_put_contents(
$this->path($jobId, 'status.json'),
json_encode($status, JSON_UNESCAPED_SLASHES)
);
}
public function getRequest(string $jobId): ?array
{
$path = $this->path($jobId, 'request.json');
if (!file_exists($path)) {
return null;
}
$data = json_decode(@file_get_contents($path), true);
return is_array($data) ? $data : null;
}
public function getResultPath(string $jobId): ?string
{
$path = $this->path($jobId, 'pdf');
return file_exists($path) ? $path : null;
}
public function pdfPath(string $jobId): string
{
return $this->path($jobId, 'pdf');
}
public function delete(string $jobId): void
{
foreach (glob($this->config['tmp_dir'] . '/job_' . $jobId . '.*') ?: [] as $file) {
@unlink($file);
}
}
public function cleanOrphans(): int
{
$maxAge = $this->config['job_max_age'] ?? 3600;
$cutoff = time() - $maxAge;
$count = 0;
foreach (glob($this->config['tmp_dir'] . '/job_*.status.json') ?: [] as $statusFile) {
if (filemtime($statusFile) < $cutoff) {
if (preg_match('/job_([a-f0-9]+)\.status\.json$/', $statusFile, $m)) {
$this->delete($m[1]);
$count++;
}
}
}
return $count;
}
private function path(string $jobId, string $suffix): string
{
return $this->config['tmp_dir'] . '/job_' . $jobId . '.' . $suffix;
}
}

View file

@ -44,6 +44,7 @@ class PdfGenerator
// Nettoyer les fichiers temporaires
@unlink($htmlFile);
@unlink($pdfFile);
$this->cleanChromiumTmp();
}
}
@ -80,6 +81,98 @@ class PdfGenerator
} finally {
@unlink($pdfFile);
$this->cleanChromiumTmp();
}
}
public function generateFromUrlToFile(string $url, string $pdfFile, array $options = [], ?callable $onProgress = null): void
{
$cmd = 'TMPDIR=' . escapeshellarg($this->config['tmp_dir']) . ' ';
$cmd .= escapeshellcmd($this->config['pagedjs_bin']);
$cmd .= ' ' . escapeshellarg($url);
$cmd .= ' -o ' . escapeshellarg($pdfFile);
if (!empty($options['timeout'])) {
$cmd .= ' --timeout ' . (int)$options['timeout'];
} else {
$cmd .= ' --timeout ' . ($this->config['pagedjs_timeout'] * 1000);
}
// proc_open pour streamer stdout/stderr ligne par ligne
$descriptors = [
0 => ['pipe', 'r'],
1 => ['pipe', 'w'],
2 => ['pipe', 'w'],
];
$proc = proc_open($cmd, $descriptors, $pipes);
if (!is_resource($proc)) {
throw new \Exception('Failed to spawn pagedjs-cli');
}
fclose($pipes[0]);
stream_set_blocking($pipes[1], false);
stream_set_blocking($pipes[2], false);
$stderrBuffer = '';
$partial = ['', ''];
try {
while (true) {
$status = proc_get_status($proc);
$read = [$pipes[1], $pipes[2]];
$w = null; $e = null;
$changed = @stream_select($read, $w, $e, 1);
if ($changed > 0) {
foreach ($read as $stream) {
$idx = ($stream === $pipes[1]) ? 0 : 1;
$chunk = fread($stream, 4096);
if ($chunk === '' || $chunk === false) continue;
if ($idx === 1) $stderrBuffer .= $chunk;
$partial[$idx] .= $chunk;
// Découpe par ligne (\n ou \r — Ora utilise \r pour les spinners)
while (preg_match('/^([^\r\n]*)[\r\n](.*)$/s', $partial[$idx], $m)) {
$line = $m[1];
$partial[$idx] = $m[2];
if ($line !== '' && $onProgress !== null) {
$onProgress($line);
}
}
}
}
if (!$status['running']) {
// Vider les buffers restants
foreach ([$pipes[1], $pipes[2]] as $i => $stream) {
$idx = ($stream === $pipes[1]) ? 0 : 1;
while (($chunk = fread($stream, 4096)) !== false && $chunk !== '') {
if ($idx === 1) $stderrBuffer .= $chunk;
$partial[$idx] .= $chunk;
}
if ($partial[$idx] !== '' && $onProgress !== null) {
foreach (preg_split('/[\r\n]+/', $partial[$idx]) as $line) {
if ($line !== '') $onProgress($line);
}
}
}
break;
}
}
fclose($pipes[1]);
fclose($pipes[2]);
$exitCode = proc_close($proc);
if ($exitCode !== 0) {
$this->log('Paged.js CLI error (URL streaming): ' . $stderrBuffer);
throw new \Exception('PDF generation failed: ' . trim($stderrBuffer));
}
if (!file_exists($pdfFile)) {
throw new \Exception('PDF file not created');
}
} finally {
$this->cleanChromiumTmp();
}
}
@ -130,6 +223,30 @@ class PdfGenerator
return $cmd;
}
private function cleanChromiumTmp(): void
{
$tmpDir = $this->config['tmp_dir'];
foreach (glob($tmpDir . '/org.chromium.Chromium.*') ?: [] as $dir) {
$this->rmdirRecursive($dir);
}
foreach (glob($tmpDir . '/puppeteer_dev_chrome_profile-*') ?: [] as $dir) {
$this->rmdirRecursive($dir);
}
}
private function rmdirRecursive(string $path): void
{
if (!is_dir($path)) {
@unlink($path);
return;
}
foreach (scandir($path) as $item) {
if ($item === '.' || $item === '..') continue;
$this->rmdirRecursive($path . '/' . $item);
}
@rmdir($path);
}
private function log(string $message): void
{
$timestamp = date('Y-m-d H:i:s');

89
worker.php Normal file
View file

@ -0,0 +1,89 @@
<?php
/**
* Worker de génération PDF asynchrone.
*
* Usage : php worker.php <job_id>
* php worker.php --cleanup (nettoyage des jobs orphelins, lancé par cron)
*/
// Autoloader (mêmes règles que public/index.php)
spl_autoload_register(function ($class) {
$prefix = 'Web2Print\\';
$baseDir = __DIR__ . '/src/';
$len = strlen($prefix);
if (strncmp($prefix, $class, $len) !== 0) return;
$relativeClass = substr($class, $len);
$file = $baseDir . str_replace('\\', '/', $relativeClass) . '.php';
if (file_exists($file)) require $file;
});
$config = require __DIR__ . '/config/config.php';
$jobs = new \Web2Print\Services\JobManager($config);
// Mode cleanup orphan (cron)
if (isset($argv[1]) && $argv[1] === '--cleanup') {
$count = $jobs->cleanOrphans();
echo "Cleaned {$count} orphan jobs.\n";
exit(0);
}
if (!isset($argv[1])) {
fwrite(STDERR, "Usage: php worker.php <job_id> | --cleanup\n");
exit(1);
}
$jobId = $argv[1];
if (!preg_match('/^[a-f0-9]+$/', $jobId)) {
fwrite(STDERR, "Invalid job_id\n");
exit(1);
}
$request = $jobs->getRequest($jobId);
if ($request === null) {
fwrite(STDERR, "Request not found for job {$jobId}\n");
exit(1);
}
set_time_limit(0); // Le worker tourne en arrière-plan, pas de limite
$generator = new \Web2Print\Services\PdfGenerator($config);
$pdfPath = $jobs->pdfPath($jobId);
$pageCount = 0;
$onProgress = function (string $line) use ($jobs, $jobId, &$pageCount) {
// pagedjs-cli utilise ora qui peut émettre via \r ; on a déjà découpé.
// Lignes typiques : "- Loading: ...", "✔ Loaded", "- Rendering: Page X"
if (preg_match('/Rendering:\s*Page\s*(\d+)/i', $line, $m)) {
$pageCount = max($pageCount, (int)$m[1]);
$jobs->writeStatus($jobId, [
'status' => 'rendering',
'page' => $pageCount,
]);
} elseif (stripos($line, 'Loaded') !== false) {
$jobs->writeStatus($jobId, ['status' => 'rendering', 'page' => 0]);
}
};
try {
$jobs->writeStatus($jobId, ['status' => 'rendering', 'page' => 0]);
$generator->generateFromUrlToFile(
$request['url'],
$pdfPath,
$request['options'] ?? [],
$onProgress
);
$jobs->writeStatus($jobId, [
'status' => 'done',
'pages' => $pageCount,
]);
} catch (\Throwable $e) {
$jobs->writeStatus($jobId, [
'status' => 'error',
'message' => $e->getMessage(),
]);
fwrite(STDERR, "Job {$jobId} failed: " . $e->getMessage() . "\n");
exit(1);
}