/** * @name French Typography Regex * @file A collection of regex rules for French text normalization * @description * This file provides a set of regular expressions to clean and standardize * French typography. It automatically fixes spacing, quotation marks, * apostrophes, ellipses, and common orthotypographic conventions * (non-breaking spaces before punctuation, French guillemets, etc.). * * @author Julie Blanc (contact@julie-blanc.fr) * @created 2025-08-26 * @updated 2025-08-26 * @see {@link https://gitlab.com/csspageweaver/frenchTypoRegex/ } */ import { Handler } from '/csspageweaver/lib/paged.esm.js'; export default class frenchTypoRegex extends Handler { constructor(chunker, polisher, caller) { super(chunker, polisher, caller); } beforeParsed(content) { // SPECIFIC, delete span with apostroph content.querySelectorAll('span[dir="rtl"]').forEach(span => { if (span.textContent.trim() === '’' || span.textContent.trim() === '"’"' ) { span.replaceWith(document.createTextNode('’')); } }); applyRegex(content); } } const arrayRegexFrenchTypo = [ { // Remplacement des doubles espaces (ou plus) par un seul espace normal reg: /\s{2,}/g, repl: " ", }, { // XIème = XIe reg: /(X|I|V)ème/g, repl: "$1e", }, { // guillemets français ouvrants reg: /"([^\s])/g, repl: "«" }, { // guillemets français fermants reg: /([^\s])"/g, repl: "»" }, { // espace insécable avant ; : ? ! » reg: /\s+([;:?!»])/g, repl: "\u00A0$1", }, { // espace insécable après « reg: /(«)\s+/g, repl: "$1\u00A0", }, { // real apostrophe reg: /(\w)'(\w)/g, repl: '$1’$2' }, { // real suspension points reg: /\.{3,}/g, repl: '\u2026' }, { // espace insécable après certains mots de deux lettres reg: /\b(le|la|ce|on|il|de|du|un|au|en)\s+/gi, repl: '$1\u00A0' }, { // no break space after one letter words reg: /\s+([a-zà])\s+/gi, repl: ' $1\u00A0' }, { // no break space after first word (2-3 letter) of the sentence reg: /\.\s([A-ZÀ-Ö])([A-Za-zÀ-ÖØ-öø-ÿœŒ]{1,3})\s+/g, repl: '. $1$2\u00A0' }, { // delete all spaces before punctuation !?;:»›”%€)]., reg: /\s+([!?;:»›”%€$)\]\.\,])/g, repl: '$1' }, { // add narrow no break space before !?;:»›%€ (sauf http://) reg: /(? match.replace(/(? ou
    const codeParent = node.parentElement?.closest("code, pre");
    if (codeParent) continue;

    // APPLY REGEX FROM ARRAY
    for (let i = 0; i < arrayRegexFrenchTypo.length; i++) {
      node.textContent = node.textContent.replace(arrayRegexFrenchTypo[i].reg, arrayRegexFrenchTypo[i].repl);
    }

  }

}