208 lines
6.8 KiB
Plaintext
208 lines
6.8 KiB
Plaintext
# Copyright (C) 2020-2021 Samuel Thibault <samuel.thibault@ens-lyon.org>
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify it under
|
|
# the terms of the GNU General Public License as published by the Free Software
|
|
# Foundation; either version 2 of the License, or (at your option) any later
|
|
# version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
|
|
# PARTICULAR PURPOSE. See the GNU General Public License for more details (file
|
|
# COPYING in the root directory).
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
complexSymbols:
|
|
# Pattern matching first:
|
|
# match name\tperl regexp to be matched
|
|
|
|
# Note: we separate out the plural case because we want to catch all of its
|
|
# marks, and keep one only
|
|
#
|
|
# Always make sure to match a letter after matching ·, to avoid matching . at
|
|
# end of sentences.
|
|
|
|
|
|
# Référence: Bescherelle Hatier 1997, Grammaire, article 50
|
|
|
|
# petit(e)s
|
|
end-words (e) (?<=[^\W_])[(][eE][)]?(?=\b)
|
|
# petits·es
|
|
end-words es (?<=[^\W_])[sS]?[.·•‧⋅‐‑-][eE][.·•‧⋅‐‑-]?[sS](?=\b)
|
|
# petit·e
|
|
end-words e (?<=[^\W_])[.·•‧⋅‐‑-][eE](?=\b)
|
|
|
|
# bons·nnes
|
|
end-words nes (?<=[^\W_][nN])[sS]?[.·•‧⋅‐‑-][nN]?[nN][eE][.·•‧⋅‐‑-]?[sS](?=\b)
|
|
# bon·nne
|
|
end-words ne (?<=[^\W_][nN])[.·•‧⋅‐‑-][nN]?[nN][eE](?=\b)
|
|
|
|
# cruels·lles
|
|
end-words les (?<=[^\W_][lL])[sS]?[.·•‧⋅‐‑-][lL]?[lL][eE][.·•‧⋅‐‑-]?[sS](?=\b)
|
|
# cruel·lle
|
|
end-words le (?<=[^\W_][lL])[.·•‧⋅‐‑-][lL]?[lL][eE](?=\b)
|
|
|
|
# coquets·tes
|
|
end-words tes (?<=[^\W_][tT])[sS]?[.·•‧⋅‐‑-][tT]?[tT][eE][.·•‧⋅‐‑-]?[sS](?=\b)
|
|
# coquet·te
|
|
end-words te (?<=[^\W_][tT])[.·•‧⋅‐‑-][tT]?[tT][eE](?=\b)
|
|
|
|
# bas·ses
|
|
end-words ses (?<=[^\W_][sS])[.·•‧⋅‐‑-][sS]?[sS][eE][.·•‧⋅‐‑-]?[sS](?=\b)
|
|
# bas·se
|
|
end-words se (?<=[^\W_][sS])[.·•‧⋅‐‑-][sS]?[sS][eE](?=\b)
|
|
|
|
# nerveux·ses
|
|
end-words uxses (?<=[^\W_][uU])[xX]?[.·•‧⋅‐‑-][sS][eE][.·•‧⋅‐‑-]?[sS](?=\b)
|
|
# nerveux·se
|
|
end-words uxse (?<=[^\W_][uU])[xX]?[.·•‧⋅‐‑-][sS][eE](?=\b)
|
|
# doux·ces
|
|
end-words xces (?<=[^\W_][uU])[xX]?[.·•‧⋅‐‑-][cC][eE][.·•‧⋅‐‑-]?[sS](?=\b)
|
|
# doux·ce
|
|
end-words xce (?<=[^\W_][uU])[xX]?[.·•‧⋅‐‑-][cC][eE](?=\b)
|
|
|
|
# légers·ères
|
|
end-words ères (?<=[^\W_][eE][rR])[sS]?[.·•‧⋅‐‑-][èÈE][rR][eE][.·•‧⋅‐‑-]?[sS](?=\b)
|
|
# léger·ère
|
|
end-words ère (?<=[^\W_][eE][rR])[.·•‧⋅‐‑-][èÈE][rR][eE](?=\b)
|
|
|
|
# neufs·ves
|
|
end-words ives (?<=[^\W_][fF])[sS]?[.·•‧⋅‐‑-][iI]?[vV][eE][.·•‧⋅‐‑-]?[sS](?=\b)
|
|
# neuf·ve
|
|
end-words ive (?<=[^\W_][fF])[.·•‧⋅‐‑-][iI]?[vV][eE](?=\b)
|
|
|
|
# beaux·elles nouveaux·elles·
|
|
end-words elles (?<=[^\W_][eE][aA][uU])[xX]?[.·•‧⋅‐‑-][bB]?[eE]?[lL][lL][eE][.·•‧⋅‐‑-]?[sS](?=\b)
|
|
# beau·elle nouveau·elle
|
|
end-words elle (?<=[^\W_][eE][aA][uU])[.·•‧⋅‐‑-][bB]?[eE]?[lL][lL][eE](?=\b)
|
|
|
|
# foux·folles
|
|
end-words folles (?<=\b[fF][oO][uU])[xX]?[.·•‧⋅‐‑-][fF]?[oO]?[lL][lL][eE][.·•‧⋅‐‑-]?[sS](?=\b)
|
|
# fou·folle
|
|
end-words folle (?<=\b[fF][oO][uU])[.·•‧⋅‐‑-][fF]?[oO]?[lL][lL][eE](?=\b)
|
|
|
|
# mous·molles
|
|
end-words molles (?<=\b[mM][oO][uU])[sS]?[.·•‧⋅‐‑-][mM]?[oO]?[lL][lL][eE][.·•‧⋅‐‑-]?[sS](?=\b)
|
|
# mou·molle
|
|
end-words molle (?<=\b[mM][oO][uU])[.·•‧⋅‐‑-][mM]?[oO]?[lL][lL][eE](?=\b)
|
|
|
|
# vieux·vieilles
|
|
end-words vieilles (?<=\b[vV][iI][eE][uU][xX])[.·•‧⋅‐‑-][vV]?[iI]?[eE]?[iI][lL][lL][eE][.·•‧⋅‐‑-]?[sS](?=\b)
|
|
# vieux·vieille
|
|
end-words vieille (?<=\b[vV][iI][eE][uU][xX])[.·•‧⋅‐‑-][vV]?[iI]?[eE]?[iI][lL][lL][eE](?=\b)
|
|
|
|
# francs·ches
|
|
end-words ches (?<=[^\W_][cC])[sS]?[.·•‧⋅‐‑-][cC]?[hH][eE][.·•‧⋅‐‑-]?[sS](?=\b)
|
|
# franc·che
|
|
end-words che (?<=[^\W_][cC])[.·•‧⋅‐‑-][cC]?[hH][eE](?=\b)
|
|
|
|
# francs·ques
|
|
end-words ques (?<=[^\W_][cC])[sS]?[.·•‧⋅‐‑-][qQ][uU][eE][.·•‧⋅‐‑-]?[sS](?=\b)
|
|
# franc·que
|
|
end-words que (?<=[^\W_][cC])[.·•‧⋅‐‑-][qQ]?[uU][eE](?=\b)
|
|
|
|
|
|
# Noms
|
|
|
|
# auteurs·trices
|
|
end-words trices (?<=[^\W_])[sS]?[.·•‧⋅‐‑-][tT]?[rR]?[iI][cC][eE][.·•‧⋅‐‑-]?[sS](?=\b)
|
|
# auteur·trice
|
|
end-words trice (?<=[^\W_])[.·•‧⋅‐‑-][tT]?[rR]?[iI][cC][eE](?=\b)
|
|
|
|
# chercheurs·ses
|
|
end-words euses (?<=[^\W_])[sS]?[.·•‧⋅‐‑-][eE][uU][sS][eE][.·•‧⋅‐‑-]?[sS](?=\b)
|
|
# chercheur·se
|
|
end-words euse (?<=[^\W_])[.·•‧⋅‐‑-][eE][uU][sS][eE](?=\b)
|
|
|
|
# chefs·fes
|
|
end-words fes (?<=[^\W_][fF])[sS]?[.·•‧⋅‐‑-][fF][eE][.·•‧⋅‐‑-]?[sS](?=\b)
|
|
# chef·fe
|
|
end-words fe (?<=[^\W_][fF])[.·•‧⋅‐‑-][fF][eE](?=\b)
|
|
|
|
# tous·tes
|
|
end-words stes (?<=[^\W_][sS])[.·•‧⋅‐‑-][tT][eE][.·•‧⋅‐‑-]?[sS](?=\b)
|
|
|
|
# ce·cette
|
|
end-words cette1 (?<=\b[cC][eE])[.·•‧⋅‐‑-][cC]?[eE]?[tT][tT][eE](?=\b)
|
|
end-words cette2 (?<=\b[cC][eE][tT])[.·•‧⋅‐‑-][cC]?[eE]?[tT][tT][eE](?=\b)
|
|
|
|
# le·la
|
|
word le (?<=\b)[lL][eE][/.·•‧⋅‐‑-][lL]?[aA](?=\b)
|
|
|
|
|
|
# As last resort, when it's inside a word just drop it and hope for the best
|
|
# Intentionally *not* putting . and - here of course :)
|
|
dot-fallback (?<=[^\W_])[·⋅‧‐‑](?=[^\W_])
|
|
|
|
symbols:
|
|
# Replacement:
|
|
# match name\treplacement\tnone\tliteral
|
|
#
|
|
# none makes the rule always apply independently of the punctuation level
|
|
# literal avoids introducing spurious spaces
|
|
|
|
end-words (e) none literal
|
|
end-words es s none literal
|
|
end-words e none literal
|
|
|
|
end-words nes s none literal
|
|
end-words ne none literal
|
|
|
|
end-words les s none literal
|
|
end-words le none literal
|
|
|
|
end-words tes s none literal
|
|
end-words te none literal
|
|
|
|
end-words ses s none literal
|
|
end-words se none literal
|
|
|
|
end-words uxses x none literal
|
|
end-words uxse x none literal
|
|
end-words xces x none literal
|
|
end-words xce x none literal
|
|
|
|
end-words ères s none literal
|
|
end-words ère none literal
|
|
|
|
end-words ives s none literal
|
|
end-words ive none literal
|
|
|
|
end-words elles x none literal
|
|
end-words elle none literal
|
|
|
|
end-words folles x none literal
|
|
end-words folle none literal
|
|
|
|
end-words molles x none literal
|
|
end-words molle none literal
|
|
|
|
end-words vieilles none literal
|
|
end-words vieille none literal
|
|
|
|
end-words ches s none literal
|
|
end-words che none literal
|
|
|
|
end-words ques s none literal
|
|
end-words que none literal
|
|
|
|
|
|
end-words trices s none literal
|
|
end-words trice none literal
|
|
|
|
end-words euses s none literal
|
|
end-words euse none literal
|
|
|
|
end-words fes s none literal
|
|
end-words fe none literal
|
|
|
|
end-words stes none literal
|
|
|
|
end-words cette1 none literal
|
|
end-words cette2 none literal
|
|
|
|
word le le none literal
|
|
|
|
dot-fallback none literal
|