Reverting usage of RE2 (WIP):

**Breaking changes**

The livechat v13 introduced a new library to handle regular expressions in forbidden words, to avoid
[ReDOS](https://en.wikipedia.org/wiki/ReDoS) attacks.
Unfortunately, this library was not able to install itself properly on some systems, and some admins were not able
to install the livechat plugin.

That's why we have disabled this library in v14, and introduce a new settings to enable regexp in forbidden words.
By default this settings is disabled, and your users won't be able to use regexp in their forbidden words.

The risk by enabling this feature is that a malicious user could cause a denial of service for the chat bot, by using a
special crafted regular expression in their channel options, and sending a special crafter message in one of their
rooms. If you trust your users (those who have rights to livestream), you can enable the settings. Otherwise it is not
recommanded. See the documentation for more informations.

**Minor changes and fixes**

* Channel's forbidden words: new "enable" column.
* New settings to enable regular expressions for channel forbidden words.
* "Channel advanced configuration" settings: removing the "experimental feature" label.
This commit is contained in:
John Livingston
2025-06-19 12:07:39 +02:00
parent e41529b61f
commit 3624dd5c3c
65 changed files with 343 additions and 2145 deletions

View File

@ -11,9 +11,6 @@ import {
noDuplicateDefaultDelay,
noDuplicateMaxDelay
} from '../../../../shared/lib/constants'
import * as RE2 from 're2'
type SanitizeMode = 'validation' | 'read'
/**
* Sanitize data so that they can safely be used/stored for channel configuration configuration.
@ -25,10 +22,9 @@ type SanitizeMode = 'validation' | 'read'
* @param mode Sanitization mode. 'validation': when verifiying user input. 'read': when reading from disk.
*/
async function sanitizeChannelConfigurationOptions (
_options: RegisterServerOptions,
options: RegisterServerOptions,
_channelId: number | string,
data: unknown,
mode: SanitizeMode
data: unknown
): Promise<ChannelConfigurationOptions> {
if (!_assertObjectType(data)) {
throw new Error('Invalid data type')
@ -96,7 +92,7 @@ async function sanitizeChannelConfigurationOptions (
bot: {
enabled: _readBoolean(botData, 'enabled'),
nickname: _readSimpleInput(botData, 'nickname', true),
forbiddenWords: await _readForbiddenWords(botData, mode),
forbiddenWords: await _readForbiddenWords(options, botData),
forbidSpecialChars: await _readForbidSpecialChars(botData),
noDuplicate: await _readNoDuplicate(botData),
quotes: _readQuotes(botData),
@ -206,7 +202,7 @@ function _readMultiLineString (data: Record<string, unknown>, f: string): string
return s
}
async function _readRegExpArray (data: Record<string, unknown>, f: string, mode: SanitizeMode): Promise<string[]> {
async function _readRegExpArray (data: Record<string, unknown>, f: string): Promise<string[]> {
// Note: this function can instanciate a lot of RegExp.
// To avoid freezing the server, we make it async, and will validate each regexp in a separate tick.
if (!(f in data)) {
@ -224,24 +220,11 @@ async function _readRegExpArray (data: Record<string, unknown>, f: string, mode:
// ignore empty values
continue
}
// value must be a valid RE2 regexp
// value must be a valid regexp
try {
async function _validate (v: string): Promise<void> {
// Before livechat v13, the bot was using RegExp.
// Now it is using RE2, to avoid ReDOS attacks.
// RE2 does not accept all regular expressions.
// So, here come the question about settings saved before...
// So we introduce the "mode" parameter.
// When reading from disk, we want to be more permissive.
// When validating frontend data, we want to be more restrictive.
// Note: the bot will simply ignore any invalid RE2 expression, and generate an error log on loading.
if (mode === 'read') {
// eslint-disable-next-line no-new
new RegExp(v)
} else {
// eslint-disable-next-line no-new, new-cap
new RE2.default(v)
}
// eslint-disable-next-line no-new
new RegExp(v)
}
await _validate(v)
} catch (err: any) {
@ -253,9 +236,11 @@ async function _readRegExpArray (data: Record<string, unknown>, f: string, mode:
}
async function _readForbiddenWords (
botData: Record<string, unknown>,
mode: SanitizeMode
options: RegisterServerOptions,
botData: Record<string, unknown>
): Promise<ChannelConfigurationOptions['bot']['forbiddenWords']> {
const enableUsersRegexp = (await options.settingsManager.getSetting('enable-users-regexp')) === true
if (!Array.isArray(botData.forbiddenWords)) {
throw new Error('Invalid forbiddenWords data')
}
@ -265,9 +250,10 @@ async function _readForbiddenWords (
throw new Error('Invalid entry in botData.forbiddenWords')
}
const regexp = !!fw.regexp
let entries
if (regexp) {
entries = await _readRegExpArray(fw, 'entries', mode)
entries = await _readRegExpArray(fw, 'entries')
} else {
entries = _readStringArray(fw, 'entries')
}
@ -276,7 +262,17 @@ async function _readForbiddenWords (
const reason = fw.reason ? _readSimpleInput(fw, 'reason') : undefined
const comments = fw.comments ? _readMultiLineString(fw, 'comments') : undefined
// Enabled was introduced in v14. So we must set to true if not present.
let enabled = !('enabled' in fw) ? true : _readBoolean(fw, 'enabled')
if (enabled && regexp && !enableUsersRegexp) {
// here we don't fail, we just change the value.
// This is usefull when the settings changes:
// RoomChannel.singleton().rebuildData() will automatically update data.
enabled = false
}
result.push({
enabled,
regexp,
entries,
applyToModerators,

View File

@ -38,7 +38,7 @@ async function getChannelConfigurationOptions (
const content = await fs.promises.readFile(filePath, {
encoding: 'utf-8'
})
const sanitized = await sanitizeChannelConfigurationOptions(options, channelId, JSON.parse(content), 'read')
const sanitized = await sanitizeChannelConfigurationOptions(options, channelId, JSON.parse(content))
return sanitized
}
@ -188,7 +188,7 @@ function _getForbiddenWordsHandler (
return handler
}
handler.enabled = true
handler.enabled = forbiddenWords.enabled
const rule: any = {
name: id
}
@ -262,7 +262,6 @@ function _getForbidSpecialCharsHandler (
name: id,
regexp,
modifiers: 'us',
regexp_engine: 'regexp', // FIXME: node-re2 is not compatible with \p{Emoji} and co, so we ensure to use RegExp here
reason: forbidSpecialChars.reason
}
handler.options.rules.push(rule)

View File

@ -3,101 +3,20 @@
// SPDX-License-Identifier: AGPL-3.0-only
import type { RegisterServerOptions } from '@peertube/peertube-types'
import * as path from 'path'
import * as fs from 'fs'
/**
* Livechat v13.0.0: now using xmppjs-chat-bot 0.6.0, which replaced RegExp by RE2.
* we must change the forbidspecialchar regexp configuration, to be compatible.
* Livechat v14.0.0: we removed RE2 because of some incompatibility issues.
* So this update is no more necessary.
* We won't do any update script to remove the `regexp_engine` attribute we added,
* the bot will just ignore it. But we keep this function, so that dev can understand
* the history, and understand why some files have the `regexp_engine` attribute.
*
* This script will only be launched one time.
*/
async function updateForbidSpecialCharsHandler (options: RegisterServerOptions): Promise<void> {
const logger = options.peertubeHelpers.logger
// First, detect if we already run this script.
const doneFilePath = path.resolve(options.peertubeHelpers.plugin.getDataDirectoryPath(), 'fix-v13-forbidspecialchars')
if (fs.existsSync(doneFilePath)) {
logger.debug('[migratev13_ForbidSpecialChars] Special Chars Regex already updated.')
return
}
logger.info('[migratev13_ForbidSpecialChars] Updating Special Chars Regex')
const confDir = path.resolve(
options.peertubeHelpers.plugin.getDataDirectoryPath(),
'bot',
)
// In this directory, we should find a subdir named as the mucDomain.
// To be sure to migrate everything, including in case of instance name change,
// we will loop on this dir content.
let directories: fs.Dirent[]
try {
directories = await fs.promises.readdir(confDir, { withFileTypes: true })
} catch (_err) {
logger.info('[migratev13_ForbidSpecialChars] can\'t read config dir, probably a fresh install.')
directories = []
}
for (const dirent of directories) {
if (!dirent.isDirectory()) { continue }
const dir = path.resolve(confDir, dirent.name, 'rooms')
logger.debug('[migratev13_ForbidSpecialChars] Checking directory ' + dir)
let files: string[]
try {
files = await fs.promises.readdir(dir)
} catch (_err) {
logger.info('[migratev13_ForbidSpecialChars] can\'t read dir ' + dir)
files = []
}
logger.debug('[migratev13_ForbidSpecialChars] Found ' + files.length.toString() + ' files.')
for (const file of files) {
if (!file.endsWith('.json')) { continue }
const filePath = path.join(dir, file)
try {
logger.debug('[migratev13_ForbidSpecialChars] check file ' + filePath)
const content = (await fs.promises.readFile(filePath, {
encoding: 'utf-8'
})).toString()
const config = JSON.parse(content)
const handlers = config?.handlers ?? []
let modified = false
for (const handler of handlers) {
if (handler?.type === 'moderate' && handler?.id === 'forbid_special_chars') {
for (const r of handler.options?.rules ?? []) {
if (r.name === 'forbid_special_chars') {
if (r.regexp_engine !== 'regexp') {
r.regexp_engine = 'regexp'
modified = true
}
}
}
}
}
if (modified) {
logger.info('[migratev13_ForbidSpecialChars] Must fix file ' + filePath)
await fs.promises.writeFile(filePath, JSON.stringify(config), {
encoding: 'utf-8'
})
}
} catch (err) {
logger.error(
'[migratev13_ForbidSpecialChars] Failed to fix file ' +
filePath + ', skipping. Error: ' + (err as string)
)
continue
}
}
}
await fs.promises.writeFile(doneFilePath, '')
async function updateForbidSpecialCharsHandler (_options: RegisterServerOptions): Promise<void> {
// deprecated (see comments)
}
export {

View File

@ -0,0 +1,24 @@
// SPDX-FileCopyrightText: 2024-2025 John Livingston <https://www.john-livingston.fr/>
//
// SPDX-License-Identifier: AGPL-3.0-only
import type { RegisterServerOptions } from '@peertube/peertube-types'
import * as path from 'path'
import * as fs from 'fs'
async function mustMigrateV14 (options: RegisterServerOptions): Promise<boolean> {
const logger = options.peertubeHelpers.logger
const doneFilePath = path.resolve(options.peertubeHelpers.plugin.getDataDirectoryPath(), 'fix-v14-regexp')
if (fs.existsSync(doneFilePath)) {
logger.debug('[migratev14] Already migrated.')
return false
}
await fs.promises.writeFile(doneFilePath, '')
return true
}
export {
mustMigrateV14
}

View File

@ -110,7 +110,7 @@ async function initConfigurationApiRouter (options: RegisterServerOptions, route
// req.body.bot.forbidSpecialChars.enabled = false
// ... NoDuplicate...
// }
channelOptions = await sanitizeChannelConfigurationOptions(options, channelInfos.id, req.body, 'validation')
channelOptions = await sanitizeChannelConfigurationOptions(options, channelInfos.id, req.body)
} catch (err: any) {
logger.warn(err.message as string)
if (err.validationErrorMessage && (typeof err.validationErrorMessage === 'string')) {

View File

@ -60,7 +60,9 @@ async function initSettings (options: RegisterServerOptions): Promise<void> {
}
loadOidcs() // we don't have to wait (can take time, it will do external http requests)
let currentProsodyRoomtype = (await settingsManager.getSettings(['prosody-room-type']))['prosody-room-type']
const tmpSettings = await settingsManager.getSettings(['prosody-room-type', 'enable-users-regexp'])
let currentProsodyRoomtype = tmpSettings['prosody-room-type']
let currentUsersRegexp = tmpSettings['enable-users-regexp']
// ********** settings changes management
settingsManager.onSettingsChange(async (settings: any) => {
@ -84,8 +86,12 @@ async function initSettings (options: RegisterServerOptions): Promise<void> {
await BotsCtl.singleton().start()
// In case prosody-room-type changed, we must rebuild room-channel links.
if (settings['prosody-room-type'] !== currentProsodyRoomtype) {
peertubeHelpers.logger.info('Setting prosody-room-type has changed value, must rebuild room-channel infos')
// In case enable-users-regexp becomes false, we must rebuild to make sure regexp lines are disabled
if (
settings['prosody-room-type'] !== currentProsodyRoomtype ||
(currentUsersRegexp && !settings['enable-users-regexp'])
) {
peertubeHelpers.logger.info('Settings changed, must rebuild room-channel infos')
// doing it without waiting, could be long!
RoomChannel.singleton().rebuildData().then(
() => peertubeHelpers.logger.info('Room-channel info rebuild ok.'),
@ -93,6 +99,7 @@ async function initSettings (options: RegisterServerOptions): Promise<void> {
)
}
currentProsodyRoomtype = settings['prosody-room-type']
currentUsersRegexp = settings['enable-users-regexp']
})
}
@ -363,11 +370,6 @@ function initAdvancedChannelCustomizationSettings ({ registerSetting }: Register
private: true,
descriptionHTML: loc('configuration_description')
})
registerSetting({
type: 'html',
private: true,
descriptionHTML: loc('experimental_warning')
})
registerSetting({
name: 'disable-channel-configuration',
label: loc('disable_channel_configuration_label'),
@ -376,6 +378,19 @@ function initAdvancedChannelCustomizationSettings ({ registerSetting }: Register
default: false,
private: false
})
registerSetting({
// For now (v14), this settings is used to enable/disable regexp for forbidden words.
// This settings is basically here to say if you trust your users or not concerning regexp
// (because there is a risk of ReDOS on the chatbot).
// This settings could be used for other purpose later on (if we implement regexp anywhere else).
// So we use a pretty standard name, `enable-users-regexp`, that could apply for other uses.
name: 'enable-users-regexp',
label: loc('enable_users_regexp'),
descriptionHTML: loc('enable_users_regexp_description'),
type: 'input-checkbox',
default: false,
private: false
})
}
/**

View File

@ -20,6 +20,7 @@ import { ExternalAuthOIDC } from './lib/external-auth/oidc'
import { migrateMUCAffiliations } from './lib/prosody/migration/migrateV10'
import { updateProsodyChannelEmojisRegex } from './lib/prosody/migration/migrateV12'
import { updateForbidSpecialCharsHandler } from './lib/prosody/migration/migrateV13'
import { mustMigrateV14 } from './lib/prosody/migration/migratev14'
import { Emojis } from './lib/emojis'
import { LivechatProsodyAuth } from './lib/prosody/auth'
import decache from 'decache'
@ -52,8 +53,10 @@ async function register (options: RegisterServerOptions): Promise<any> {
await BotConfiguration.initSingleton(options)
// Then load the RoomChannel singleton
const roomChannelSingleton = await RoomChannel.initSingleton(options)
// roomChannelNeedsDataInit: if true, means that the data file does not exist (or is invalid), so we must initiate it
const roomChannelNeedsDataInit = !await roomChannelSingleton.readData()
// roomChannelNeedsDataInit: if true, means that we must rebuild the data file
// (for example because it does not exist (or is invalid), so we must initiate it)
let roomChannelNeedsDataRebuild = !await roomChannelSingleton.readData()
if (await mustMigrateV14(options)) { roomChannelNeedsDataRebuild = true }
// BotsCtl.initSingleton() will force reload the bots conf files, so must be done before generating Prosody Conf.
await BotsCtl.initSingleton(options)
@ -76,8 +79,8 @@ async function register (options: RegisterServerOptions): Promise<any> {
await ensureProsodyRunning(options)
let preBotPromise: Promise<void>
if (roomChannelNeedsDataInit) {
logger.info('The RoomChannel singleton has not found any data, we must rebuild')
if (roomChannelNeedsDataRebuild) {
logger.info('The RoomChannel singleton must rebuild data')
// no need to wait here, can be done without await.
preBotPromise = roomChannelSingleton.rebuildData().then(
() => { logger.info('RoomChannel singleton rebuild done') },