More accurate nsfw domain check

This commit is contained in:
Dragon Fire
2021-04-03 19:50:05 -04:00
parent 07db7d5325
commit b6bf3e4bfa
4 changed files with 33 additions and 18 deletions
+4 -3
View File
@@ -40,10 +40,11 @@ luck on this one.
14. Download [the NSFW model](https://github.com/gantman/nsfw_model) and extract the contents to `<xiao folder>/tf_models/nsfw`.
15. Download DECTalk and extract it to `<xiao folder>/dectalk`. You will have to find this yourself. You need the files `say.exe`, `dectalk.dll`, and `dtalk_us.dic`.
16. Run `apt install wine` to install wine.
17. Run `dpkg --add-architecture i386` to allow installation of `wine32`.
17. Run `dpkg --add-architecture i386` to allow installation of wine32.
18. Run `apt update` again.
19. Run `apt install wine32` to install wine32.
20. Run `apt install xvfb` to install xvfb.
21. Run `npm i --production` in the folder you cloned the bot.
22. Run `npm i -g pm2` to install PM2.
23. Run `pm2 start Xiao.js --name xiao` to run the bot.
22. Run `npx parse-domain-update` to update the domain list for `parse-domain`.
23. Run `npm i -g pm2` to install PM2.
24. Run `pm2 start Xiao.js --name xiao` to run the bot.
+3 -13
View File
@@ -1,7 +1,7 @@
const Command = require('../../structures/Command');
const request = require('node-superfetch');
const { URL } = require('url');
const { isImageNSFW } = require('../../util/Util');
const { isImageNSFW, isUrlNSFW } = require('../../util/Util');
module.exports = class ScreenshotCommand extends Command {
constructor(client) {
@@ -36,18 +36,8 @@ module.exports = class ScreenshotCommand extends Command {
async run(msg, { url }) {
try {
if (!msg.channel.nsfw) {
let nsfw;
if (this.client.adultSiteList.includes(url.host)) {
nsfw = true;
} else {
try {
const { url: newURL } = await request.get(url);
const parsedNewURL = new URL(newURL);
if (this.client.adultSiteList.includes(parsedNewURL.host)) nsfw = true;
} catch {
return msg.reply('This site did not respond, or sent an error.');
}
}
const nsfw = await isUrlNSFW(url.href);
if (nsfw === null) return msg.reply('This site did not respond, or sent an error.');
if (nsfw) return msg.reply('This site is NSFW.');
}
const { body } = await request.get(`https://image.thum.io/get/width/1920/crop/675/noanimate/${url.href}`);
+2 -1
View File
@@ -1,6 +1,6 @@
{
"name": "xiao",
"version": "134.6.1",
"version": "134.6.2",
"description": "Your personal server companion.",
"main": "Xiao.js",
"private": true,
@@ -70,6 +70,7 @@
"moment-timezone": "^0.5.33",
"node-superfetch": "^0.1.11",
"nsfwjs": "^2.3.0",
"parse-domain": "^3.0.3",
"pokersolver": "^2.1.4",
"random-js": "^2.1.0",
"rss-parser": "^3.12.0",
+24 -1
View File
@@ -1,5 +1,7 @@
const crypto = require('crypto');
const request = require('node-superfetch');
const tf = require('@tensorflow/tfjs-node');
const { parseDomain, ParseResultType } = require('parse-domain');
const { decode: decodeHTML } = require('html-entities');
const { stripIndents } = require('common-tags');
const { URL } = require('url');
@@ -206,12 +208,33 @@ module.exports = class Util {
return str.replace(/(https?:\/\/\S+)/g, '<$1>');
}
static async isUrlNSFW(uri, siteList) {
const parsed = new URL(uri);
const { type, domain, topLevelDomains } = parseDomain(parsed.hostname);
if (type !== ParseResultType.Listed) return null;
if (siteList.includes(`${domain}.${topLevelDomains.join('.')}`)) return true;
let redirectURL;
try {
const { url: redirected } = await request.get(uri);
redirectURL = redirected;
} catch {
return null;
}
const parsedRedirect = new URL(redirectURL);
const { type: reType, domain: reDomain, topLevelDomains: reTop } = parseDomain(parsedRedirect.hostname);
if (reType !== ParseResultType.Listsed) return null;
if (siteList.includes(`${reDomain}.${reTop.join('.')}`)) return true;
return false;
}
static stripNSFWURLs(str, siteList, text = '[redacted nsfw url]') {
const uris = str.match(/(https?:\/\/\S+)/g);
if (!uris) return str;
for (const uri of uris) {
const parsed = new URL(uri);
if (!siteList.includes(parsed.host)) continue;
const { type, domain, topLevelDomains } = parseDomain(parsed.hostname);
if (type !== ParseResultType.Listed) continue;
if (!siteList.includes(`${domain}.${topLevelDomains.join('.')}`)) continue;
str = str.replace(uri, text);
}
return str;