More accurate nsfw domain check

2026-06-03 23:36:43 +02:00 · 2021-04-03 19:50:05 -04:00
parent 07db7d5325
commit b6bf3e4bfa
4 changed files with 33 additions and 18 deletions
@@ -40,10 +40,11 @@ luck on this one.
 14. Download [the NSFW model](https://github.com/gantman/nsfw_model) and extract the contents to `<xiao folder>/tf_models/nsfw`.
 15. Download DECTalk and extract it to `<xiao folder>/dectalk`. You will have to find this yourself. You need the files `say.exe`, `dectalk.dll`, and `dtalk_us.dic`.
 16. Run `apt install wine` to install wine.
-17. Run `dpkg --add-architecture i386` to allow installation of `wine32`.
+17. Run `dpkg --add-architecture i386` to allow installation of wine32.
 18. Run `apt update` again.
 19. Run `apt install wine32` to install wine32.
 20. Run `apt install xvfb` to install xvfb.
 21. Run `npm i --production` in the folder you cloned the bot.
-22. Run `npm i -g pm2` to install PM2.
-23. Run `pm2 start Xiao.js --name xiao` to run the bot.
+22. Run `npx parse-domain-update` to update the domain list for `parse-domain`.
+23. Run `npm i -g pm2` to install PM2.
+24. Run `pm2 start Xiao.js --name xiao` to run the bot.
@@ -1,7 +1,7 @@
 const Command = require('../../structures/Command');
 const request = require('node-superfetch');
 const { URL } = require('url');
-const { isImageNSFW } = require('../../util/Util');
+const { isImageNSFW, isUrlNSFW } = require('../../util/Util');

 module.exports = class ScreenshotCommand extends Command {
 	constructor(client) {
@@ -36,18 +36,8 @@ module.exports = class ScreenshotCommand extends Command {
 	async run(msg, { url }) {
 		try {
 			if (!msg.channel.nsfw) {
-				let nsfw;
-				if (this.client.adultSiteList.includes(url.host)) {
-					nsfw = true;
-				} else {
-					try {
-						const { url: newURL } = await request.get(url);
-						const parsedNewURL = new URL(newURL);
-						if (this.client.adultSiteList.includes(parsedNewURL.host)) nsfw = true;
-					} catch {
-						return msg.reply('This site did not respond, or sent an error.');
-					}
-				}
+				const nsfw = await isUrlNSFW(url.href);
+				if (nsfw === null) return msg.reply('This site did not respond, or sent an error.');
 				if (nsfw) return msg.reply('This site is NSFW.');
 			}
 			const { body } = await request.get(`https://image.thum.io/get/width/1920/crop/675/noanimate/${url.href}`);
@@ -1,6 +1,6 @@
 {
 	"name": "xiao",
-	"version": "134.6.1",
+	"version": "134.6.2",
 	"description": "Your personal server companion.",
 	"main": "Xiao.js",
 	"private": true,
@@ -70,6 +70,7 @@
 		"moment-timezone": "^0.5.33",
 		"node-superfetch": "^0.1.11",
 		"nsfwjs": "^2.3.0",
+		"parse-domain": "^3.0.3",
 		"pokersolver": "^2.1.4",
 		"random-js": "^2.1.0",
 		"rss-parser": "^3.12.0",
@@ -1,5 +1,7 @@
 const crypto = require('crypto');
+const request = require('node-superfetch');
 const tf = require('@tensorflow/tfjs-node');
+const { parseDomain, ParseResultType } = require('parse-domain');
 const { decode: decodeHTML } = require('html-entities');
 const { stripIndents } = require('common-tags');
 const { URL } = require('url');
@@ -206,12 +208,33 @@ module.exports = class Util {
 		return str.replace(/(https?:\/\/\S+)/g, '<$1>');
 	}

+	static async isUrlNSFW(uri, siteList) {
+		const parsed = new URL(uri);
+		const { type, domain, topLevelDomains } = parseDomain(parsed.hostname);
+		if (type !== ParseResultType.Listed) return null;
+		if (siteList.includes(`${domain}.${topLevelDomains.join('.')}`)) return true;
+		let redirectURL;
+		try {
+			const { url: redirected } = await request.get(uri);
+			redirectURL = redirected;
+		} catch {
+			return null;
+		}
+		const parsedRedirect = new URL(redirectURL);
+		const { type: reType, domain: reDomain, topLevelDomains: reTop } = parseDomain(parsedRedirect.hostname);
+		if (reType !== ParseResultType.Listsed) return null;
+		if (siteList.includes(`${reDomain}.${reTop.join('.')}`)) return true;
+		return false;
+	}
+
 	static stripNSFWURLs(str, siteList, text = '[redacted nsfw url]') {
 		const uris = str.match(/(https?:\/\/\S+)/g);
 		if (!uris) return str;
 		for (const uri of uris) {
 			const parsed = new URL(uri);
-			if (!siteList.includes(parsed.host)) continue;
+			const { type, domain, topLevelDomains } = parseDomain(parsed.hostname);
+			if (type !== ParseResultType.Listed) continue;
+			if (!siteList.includes(`${domain}.${topLevelDomains.join('.')}`)) continue;
 			str = str.replace(uri, text);
 		}
 		return str;