import-wikimedia.mjs script
This commit is contained in:
parent
6705cd9e3d
commit
d7d7b9e1df
1
package-lock.json
generated
1
package-lock.json
generated
@ -30,6 +30,7 @@
|
|||||||
"@types/winston": "^2.4.4",
|
"@types/winston": "^2.4.4",
|
||||||
"@typescript-eslint/eslint-plugin": "^4.29.0",
|
"@typescript-eslint/eslint-plugin": "^4.29.0",
|
||||||
"@typescript-eslint/parser": "^4.29.0",
|
"@typescript-eslint/parser": "^4.29.0",
|
||||||
|
"commander": "^11.0.0",
|
||||||
"esbuild": "^0.16.1",
|
"esbuild": "^0.16.1",
|
||||||
"eslint": "^7.32.0",
|
"eslint": "^7.32.0",
|
||||||
"eslint-config-standard": "^16.0.3",
|
"eslint-config-standard": "^16.0.3",
|
||||||
|
@ -54,6 +54,7 @@
|
|||||||
"@types/winston": "^2.4.4",
|
"@types/winston": "^2.4.4",
|
||||||
"@typescript-eslint/eslint-plugin": "^4.29.0",
|
"@typescript-eslint/eslint-plugin": "^4.29.0",
|
||||||
"@typescript-eslint/parser": "^4.29.0",
|
"@typescript-eslint/parser": "^4.29.0",
|
||||||
|
"commander": "^11.0.0",
|
||||||
"esbuild": "^0.16.1",
|
"esbuild": "^0.16.1",
|
||||||
"eslint": "^7.32.0",
|
"eslint": "^7.32.0",
|
||||||
"eslint-config-standard": "^16.0.3",
|
"eslint-config-standard": "^16.0.3",
|
||||||
|
@ -5,6 +5,12 @@ This page lists some common options you can use to configure the bot forbidden w
|
|||||||
Note: this page is still "Work In Progress". Fill free to contribute, by suggesting new entries on the
|
Note: this page is still "Work In Progress". Fill free to contribute, by suggesting new entries on the
|
||||||
[livechat plugin github page](https://github.com/JohnXLivingston/peertube-plugin-livechat).
|
[livechat plugin github page](https://github.com/JohnXLivingston/peertube-plugin-livechat).
|
||||||
|
|
||||||
|
## Importing rules from wiktionary
|
||||||
|
|
||||||
|
There is a [wiktionary import script](./import-wikimedia.md) that can list all page title in a witionnary categorie.
|
||||||
|
This script was used to generate some of the forbidden-words lists that are present in this repository.
|
||||||
|
In such case, the command is provided near to the word list.
|
||||||
|
|
||||||
## URLs
|
## URLs
|
||||||
|
|
||||||
If you want to prevent users to send URLs in the chat, you can use following values for the bot configuration.
|
If you want to prevent users to send URLs in the chat, you can use following values for the bot configuration.
|
||||||
|
27
support/forbidden_words/import-wikimedia.md
Normal file
27
support/forbidden_words/import-wikimedia.md
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
# import-wikimedia.mjs
|
||||||
|
|
||||||
|
The [import-wikimedia.mjs](./import-wikimedia.mjs) script can be used to generate some word lists.
|
||||||
|
|
||||||
|
It can parse some categories from the [wiktionary](https://www.wiktionary.org), and return the result as a flat text list.
|
||||||
|
This list can be pasted in the "forbidden words" field (do not forget to uncheck "consider as a regular expression").
|
||||||
|
|
||||||
|
## Pre-requisite
|
||||||
|
|
||||||
|
To use it, you have to install NodeJS (version >= 16) on your computer.
|
||||||
|
|
||||||
|
You also need the `commander` package. To get it, you have to choice:
|
||||||
|
|
||||||
|
* you can install the plugin dev dependencies (`npm install` if your are not on the production server)
|
||||||
|
* or `npm install -g commander`
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
To run the script:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# commands to run from the livechat plugin directory.
|
||||||
|
# to get the script help:
|
||||||
|
node ./support/forbidden_words/import-wikimedia.mjs --help
|
||||||
|
# to get a category content, and save it to /tmp/result.txt:
|
||||||
|
node ./support/forbidden_words/import-wikimedia.mjs category --lang fr --service wiktionary --category 'Insultes_en_français' > /tmp/result.txt
|
||||||
|
```
|
64
support/forbidden_words/import-wikimedia.mjs
Normal file
64
support/forbidden_words/import-wikimedia.mjs
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
import { Command } from 'commander'
|
||||||
|
|
||||||
|
const program = new Command()
|
||||||
|
program
|
||||||
|
.usage('[command] [options]')
|
||||||
|
.showHelpAfterError()
|
||||||
|
|
||||||
|
const runCommand = program.command('category')
|
||||||
|
runCommand.description('Loads data from wikimedia services (wiktionary, ...), and prints it on the standard output.')
|
||||||
|
runCommand.requiredOption('-c, --category <category>', 'The category to request.')
|
||||||
|
runCommand.option(
|
||||||
|
'-s, --service <service>',
|
||||||
|
'The service to query. Possible values: "wiktionary", "wikipedia". Default: "wiktionary".',
|
||||||
|
'wiktionary'
|
||||||
|
)
|
||||||
|
runCommand.option(
|
||||||
|
'-l, --lang <lang>',
|
||||||
|
'The code lang for the requested service. For example "en", "fr", ... (as it appears in the wikimedia site url). Default: "en"',
|
||||||
|
'en'
|
||||||
|
)
|
||||||
|
runCommand.action(async (options) => {
|
||||||
|
const service = options.service ?? 'wiktionary'
|
||||||
|
if (service !== 'wiktionary' && service !== 'wikipedia') {
|
||||||
|
throw new Error('Invalid service ' + service)
|
||||||
|
}
|
||||||
|
const lang = options.lang
|
||||||
|
if (!/^[a-z-]+$/.test(lang)) {
|
||||||
|
throw new Error('Invalid lang ' + lang)
|
||||||
|
}
|
||||||
|
const category = options.category
|
||||||
|
|
||||||
|
const url = new URL(`https://${lang}.${service}.org/w/api.php?action=query&format=json&formatversion=2&list=categorymembers&cmlimit=200`)
|
||||||
|
url.searchParams.append('cmtitle', 'Category:' + category)
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
const r = await fetch(url.toString())
|
||||||
|
if (!r.ok) {
|
||||||
|
throw new Error('Failed requesting ' + url)
|
||||||
|
}
|
||||||
|
const json = await r.json()
|
||||||
|
if (!json) {
|
||||||
|
throw new Error('Invalid JSON content')
|
||||||
|
}
|
||||||
|
const list = json.query.categorymembers
|
||||||
|
for (const entry of list) {
|
||||||
|
// Note: at the end, there might be some links to other categories.
|
||||||
|
// These links have title like "Category:xxx", or "Catégorie:xxx".
|
||||||
|
// We don't want to test every translation of "Category"...
|
||||||
|
// So we just ignore titles with ":"
|
||||||
|
if (entry.title.includes(':')) { continue }
|
||||||
|
console.log(entry.title)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (json.continue) {
|
||||||
|
for (const k in json.continue) {
|
||||||
|
url.searchParams.set(k, json.continue[k])
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
program.parse(process.argv)
|
Loading…
x
Reference in New Issue
Block a user