Initial commit

This commit is contained in:
sarahgarcin1 2026-01-05 19:33:15 +01:00
commit 388079e6bb
1108 changed files with 330121 additions and 0 deletions

View file

@ -0,0 +1 @@
.DS_Store

View file

@ -0,0 +1,7 @@
{
"name": "Regex Typo",
"description": "Script for handling orthotypography.",
"author": "Julie Blanc et Sarah Garcin",
"repository": "https://gitlab.com/JulieBlanc/typesetting-tools",
"hook": "regexTypo-hook.js"
}

View file

@ -0,0 +1,24 @@
/**
* @name Regex Typo
* @file Script for handling orthotypography.
* @author Julie Blanc <contact@julie-blanc.fr> modified by Sarah Garcin
* @see { @link https://gitlab.com/JulieBlanc/typesetting-tools }
* @see { @link https://gitlab.com/csspageweaver/plugins/regexTypo/ }
*/
import { Handler } from '/csspageweaver/lib/paged.esm.js';
import { orthotypo, exposants, noHyphens } from '/csspageweaver/plugins/regexTypo/regexTypo.js';
export default class regextypoHook extends Handler {
constructor(chunker, polisher, caller) {
super(chunker, polisher, caller)
}
beforeParsed(content) {
orthotypo(content);
exposants(content);
noHyphens(content);
}
}

View file

@ -0,0 +1,312 @@
export function orthotypo(content){
var nodes = document.createTreeWalker(content, NodeFilter.SHOW_TEXT, null, null);
let array = [
{
// XIème = XIe
reg: /(X|I|V)ème/g,
repl: '$1e'
},
{
// french open quotes
reg: /\"([A-Za-zÀ-ÖØ-öø-ÿœŒ])/g,
repl: '«$1'
},
{
// french close quotes
reg: /([A-Za-zÀ-ÖØ-öø-ÿœŒ])\"/g,
repl: '$1»'
},
{
// real apostrophe
reg: /\'/g,
repl: ''
},
{
// real suspension points
reg: /\.+\.+\./g,
repl: '\u2026'
},
{
// delete all spaces before punctuation !?;:»›”)].,
reg: /\s+([!?;:»›”)\]\.\,])/g,
repl: '$1'
},
{
// add narrow no break space before !?;:»›
reg: /([!?;:»›])/g,
repl: '\u202F$1'
},
{
// delete all spaces after «‹“[(
reg: /([«‹“\[(])\s+/g,
repl: '$1'
},
{
// add narrow no break space after «‹
reg: /([«‹])/g,
repl: '$1\u202F'
},
{
// OPTION 1 : no break space after two letter words (if not follow by an other two letter word)
// reg: /\s+([a-zØ-öø-ÿœ]{2})\s+([A-Za-zÀ-ÖØ-öø-ÿœŒ]{3,})/gi,
// repl: ' $1\u00A0$2'
// OPTION 2: no break space after some two letter words
reg: /\s(le|la|un|une|ce|ces|il|on|les|des|du|ils)\s+/g,
repl: ' $1\u00A0'
},
{
// if prev OPTION 2: no break space after successive two letter words
reg: /\s+([a-zØ-öø-ÿœ]{2})\s+([A-Za-zÀ-ÖØ-öø-ÿœŒ]{2})\s+/g,
repl: ' $1 $2\u00A0'
},
{
// no break space after one letter words
reg: /\s+([a-zà])\s+/gi,
repl: ' $1\u00A0'
},
{
// no break space after first word (2-5 letter) of the sentence
reg: /\.\s([A-ZÀ-Ö])([A-Za-zÀ-ÖØ-öø-ÿœŒ]{1,5})\s+/g,
repl: '. $1$2\u00A0'
},
{
// no break space into names
reg: /([A-ZÀ-ÖØŒ])([A-Za-zÀ-ÖØ-öø-ÿœŒ]+)\s+([A-ZÀ-ÖØŒ])([A-Za-zÀ-ÖØ-öø-ÿœŒ]+)/g,
repl: '$1$2\u00A0$3$4'
},
{
// no break space before Caps + .
reg: /\s([A-ZÀ-ÖØŒ])\./g,
repl: '\u00A0$1. '
},
{
// no break space before 'siècles'
reg: /(X|I|V)(er|e)\s+siècle/g,
repl: '$1$2\u00A0siècles'
},
]
var node;
while (node = nodes.nextNode()) {
// search if text is into a <code> element
if(!node.parentElement){
return
}
var code = node.parentElement.closest("code");
// if not, apply replacements
if(code == null){
for (var i = 0; i < array.length; i++) {
node.textContent = node.textContent.replace(array[i].reg, array[i].repl);
}
}
}
}
function spaces( content ){
let all = content.querySelectorAll('p, span');
all.forEach(element => {
element.innerHTML = spacesRegex(element.innerHTML)
})
}
function spacesRegex(elem){
let array = [
{
// french open quotes
reg: /\"([A-Za-zÀ-ÖØ-öø-ÿœŒ])/g,
repl: '«$1'
},
{
// french close quotes
reg: /([A-Za-zÀ-ÖØ-öø-ÿœŒ])\"/g,
repl: '$1»'
},
{
// real apostrophe
reg: /\'/g,
repl: ''
},
{
// real suspension points
reg: /\.+\.+\./g,
repl: '\u2026'
},
{
// delete all spaces before punctuation !?;:»›”)].,
reg: /\s+([!?;:»›”)\]\.\,])/g,
repl: '$1'
},
{
// add narrow no break space before !?;:»›
reg: /([!?;:»›])/g,
repl: '\u202F$1'
},
{
// delete all spaces after «‹“[(
reg: /([«‹“\[(])\s+/g,
repl: '$1'
},
{
// add narrow no break space after «‹
reg: /([«‹])/g,
repl: '$1\u202F'
},
{
// OPTION 1 : no break space after two letter words (if not follow by an other two letter word)
// reg: /\s+([a-zØ-öø-ÿœ]{2})\s+([A-Za-zÀ-ÖØ-öø-ÿœŒ]{3,})/gi,
// repl: ' $1\u00A0$2'
// OPTION 2: no break space after some two letter words
reg: /\s(le|la|un|une|ce|ces|il|on|les|des|du|ils)\s+/g,
repl: ' $1\u00A0'
},
{
// if prev OPTION 2: no break space after successive two letter words
reg: /\s+([a-zØ-öø-ÿœ]{2})\s+([A-Za-zÀ-ÖØ-öø-ÿœŒ]{2})\s+/g,
repl: ' $1 $2\u00A0'
},
{
// no break space after one letter words
reg: /\s+([a-zà])\s+/gi,
repl: ' $1\u00A0'
},
{
// no break space after first word (2-5 letter) of the sentence
reg: /\.\s([A-ZÀ-Ö])([A-Za-zÀ-ÖØ-öø-ÿœŒ]{1,5})\s+/g,
repl: '. $1$2\u00A0'
},
{
// no break space into names
reg: /([A-ZÀ-ÖØŒ])([A-Za-zÀ-ÖØ-öø-ÿœŒ]+)\s+([A-ZÀ-ÖØŒ])([A-Za-zÀ-ÖØ-öø-ÿœŒ]+)/g,
repl: '$1$2\u00A0$3$4'
},
{
// no break space before Caps + .
reg: /\s([A-ZÀ-ÖØŒ])\./g,
repl: '\u00A0$1. '
},
{
// no break space before 'siècles'
reg: /(X|I|V)(er|e)\s+siècle/g,
repl: '$1$2\u00A0siècles'
},
// {
// // no break space after figures table page chapitre ect. + number
// reg: /(figures?|tables?|planches?|chapitres?|pages?|parties?|sections?|volumes?|vol\.)\s+(\d|I|X|V)/g,
// repl: '$1\u00A0$2'
// },
// {
// // p. and pp. in blibliography
// reg: /(\spp?\.)\s?(\d)/g,
// repl: '$1\u00A0$2'
// }
]
for (var i = 0; i < array.length; i++) {
elem = elem.replace(array[i].reg, array[i].repl);
console.log(elem);
}
return elem;
}
export function noHyphens( content ){
let all = content.querySelectorAll('p');
// all french caracteres: [A-Za-zÀ-ÖØ-öø-ÿœŒ]
all.forEach(element => {
element.innerHTML = noHyphensRegex(element.innerHTML)
})
}
function noHyphensRegex(elem){
let array = [
{
// no break space into names
reg: /([A-ZÀ-ÖØŒ])([A-Za-zÀ-ÖØ-öø-ÿœŒ]+)\s+([A-ZÀ-ÖØŒ])([A-Za-zÀ-ÖØ-öø-ÿœŒ]+)/g,
repl: '$1$2\u00A0$3$4'
},
{
// no break space before Caps + .
reg: /\s([A-ZÀ-ÖØŒ])\./g,
repl: '\u00A0$1. '
},
{
// no break space before 'siècles'
reg: /(X|I|V)(er|e)\s+siècle/g,
repl: '$1$2\u00A0siècles'
},
{
// règles le problème de 1ep qui met le e en exposant
reg: '1<sup>e</sup>p',
repl: '1ep'
}
]
for (var i = 0; i < array.length; i++) {
elem = elem.replace(array[i].reg, array[i].repl);
}
return elem;
}
export function exposants(content){
let all = content.querySelectorAll('p, span');
all.forEach(element => {
element.innerHTML = exposantsRegex(element.innerHTML)
})
}
function exposantsRegex(elem){
let array = [
{
// numéros
reg: /\sno\.?\s?(\d+)/g,
repl: ' n<sup>o</sup>&nbsp;$1'
},
{
// siècles + small caps
reg: /(XXI|XX|XIX|XVIII|XVII|XVI|XV|xxi|xx|xix|xviii|xvii|xvi|xv)(e|er)/g,
repl: '<span style="text-transform: lowercase; font-variant: small-caps;">$1</span><sup>$2</sup>'
},
{
// exposant e après chiffres
reg: /(\d+)(er|e)[\s\\u00A0]/g,
repl: '$1<sup>$2</sup>'
},
{
// exposant e après chiffres
reg: '22e',
repl: '22<sup>e</sup>'
},
{
// exposant e après chiffres
reg: '4e éd.',
repl: '4<sup>e</sup> éd.'
},
{
// exposant e après chiffres
reg: 'IVe',
repl: 'IV<sup>e</sup>'
},
]
for (var i = 0; i < array.length; i++) {
elem = elem.replace(array[i].reg, array[i].repl);
}
return elem;
}