From b6bf3e4bfa48d94f13e046fd7b12f27c521b41d4 Mon Sep 17 00:00:00 2001 From: Dragon Fire Date: Sat, 3 Apr 2021 19:50:05 -0400 Subject: [PATCH] More accurate nsfw domain check --- README.md | 7 ++++--- commands/other/screenshot.js | 16 +++------------- package.json | 3 ++- util/Util.js | 25 ++++++++++++++++++++++++- 4 files changed, 33 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 548b2fa3..ffcf90f7 100644 --- a/README.md +++ b/README.md @@ -40,10 +40,11 @@ luck on this one. 14. Download [the NSFW model](https://github.com/gantman/nsfw_model) and extract the contents to `/tf_models/nsfw`. 15. Download DECTalk and extract it to `/dectalk`. You will have to find this yourself. You need the files `say.exe`, `dectalk.dll`, and `dtalk_us.dic`. 16. Run `apt install wine` to install wine. -17. Run `dpkg --add-architecture i386` to allow installation of `wine32`. +17. Run `dpkg --add-architecture i386` to allow installation of wine32. 18. Run `apt update` again. 19. Run `apt install wine32` to install wine32. 20. Run `apt install xvfb` to install xvfb. 21. Run `npm i --production` in the folder you cloned the bot. -22. Run `npm i -g pm2` to install PM2. -23. Run `pm2 start Xiao.js --name xiao` to run the bot. +22. Run `npx parse-domain-update` to update the domain list for `parse-domain`. +23. Run `npm i -g pm2` to install PM2. +24. Run `pm2 start Xiao.js --name xiao` to run the bot. diff --git a/commands/other/screenshot.js b/commands/other/screenshot.js index 21670c3c..4893b6e1 100644 --- a/commands/other/screenshot.js +++ b/commands/other/screenshot.js @@ -1,7 +1,7 @@ const Command = require('../../structures/Command'); const request = require('node-superfetch'); const { URL } = require('url'); -const { isImageNSFW } = require('../../util/Util'); +const { isImageNSFW, isUrlNSFW } = require('../../util/Util'); module.exports = class ScreenshotCommand extends Command { constructor(client) { @@ -36,18 +36,8 @@ module.exports = class ScreenshotCommand extends Command { async run(msg, { url }) { try { if (!msg.channel.nsfw) { - let nsfw; - if (this.client.adultSiteList.includes(url.host)) { - nsfw = true; - } else { - try { - const { url: newURL } = await request.get(url); - const parsedNewURL = new URL(newURL); - if (this.client.adultSiteList.includes(parsedNewURL.host)) nsfw = true; - } catch { - return msg.reply('This site did not respond, or sent an error.'); - } - } + const nsfw = await isUrlNSFW(url.href); + if (nsfw === null) return msg.reply('This site did not respond, or sent an error.'); if (nsfw) return msg.reply('This site is NSFW.'); } const { body } = await request.get(`https://image.thum.io/get/width/1920/crop/675/noanimate/${url.href}`); diff --git a/package.json b/package.json index f557937b..a24df304 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "xiao", - "version": "134.6.1", + "version": "134.6.2", "description": "Your personal server companion.", "main": "Xiao.js", "private": true, @@ -70,6 +70,7 @@ "moment-timezone": "^0.5.33", "node-superfetch": "^0.1.11", "nsfwjs": "^2.3.0", + "parse-domain": "^3.0.3", "pokersolver": "^2.1.4", "random-js": "^2.1.0", "rss-parser": "^3.12.0", diff --git a/util/Util.js b/util/Util.js index e3b254fd..254c8048 100644 --- a/util/Util.js +++ b/util/Util.js @@ -1,5 +1,7 @@ const crypto = require('crypto'); +const request = require('node-superfetch'); const tf = require('@tensorflow/tfjs-node'); +const { parseDomain, ParseResultType } = require('parse-domain'); const { decode: decodeHTML } = require('html-entities'); const { stripIndents } = require('common-tags'); const { URL } = require('url'); @@ -206,12 +208,33 @@ module.exports = class Util { return str.replace(/(https?:\/\/\S+)/g, '<$1>'); } + static async isUrlNSFW(uri, siteList) { + const parsed = new URL(uri); + const { type, domain, topLevelDomains } = parseDomain(parsed.hostname); + if (type !== ParseResultType.Listed) return null; + if (siteList.includes(`${domain}.${topLevelDomains.join('.')}`)) return true; + let redirectURL; + try { + const { url: redirected } = await request.get(uri); + redirectURL = redirected; + } catch { + return null; + } + const parsedRedirect = new URL(redirectURL); + const { type: reType, domain: reDomain, topLevelDomains: reTop } = parseDomain(parsedRedirect.hostname); + if (reType !== ParseResultType.Listsed) return null; + if (siteList.includes(`${reDomain}.${reTop.join('.')}`)) return true; + return false; + } + static stripNSFWURLs(str, siteList, text = '[redacted nsfw url]') { const uris = str.match(/(https?:\/\/\S+)/g); if (!uris) return str; for (const uri of uris) { const parsed = new URL(uri); - if (!siteList.includes(parsed.host)) continue; + const { type, domain, topLevelDomains } = parseDomain(parsed.hostname); + if (type !== ParseResultType.Listed) continue; + if (!siteList.includes(`${domain}.${topLevelDomains.join('.')}`)) continue; str = str.replace(uri, text); } return str;