This commit is contained in:
Tutur33
2023-11-24 22:35:41 +01:00
parent 3c0b507a93
commit 7644b2a0f7
45165 changed files with 4803356 additions and 3 deletions
+22
View File
@@ -0,0 +1,22 @@
# Auto detect text files and perform LF normalization
* text=auto
# Custom for Visual Studio
*.cs diff=csharp
*.sln merge=union
*.csproj merge=union
*.vbproj merge=union
*.fsproj merge=union
*.dbproj merge=union
# Standard to msysgit
*.doc diff=astextplain
*.DOC diff=astextplain
*.docx diff=astextplain
*.DOCX diff=astextplain
*.dot diff=astextplain
*.DOT diff=astextplain
*.pdf diff=astextplain
*.PDF diff=astextplain
*.rtf diff=astextplain
*.RTF diff=astextplain
+21
View File
@@ -0,0 +1,21 @@
The MIT License (MIT)
Copyright (c) 2013 Marcus Noble
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
+40
View File
@@ -0,0 +1,40 @@
# Truncatise
## JavaScript module to truncate HTML strings.
### Description
Provides the ability to truncate HTML strings down to excerpts based on character length, word length or paragraph length.
**Features:**
* Truncate based on number of Characters, Words or Paragraphs.
* Strip HTML from returned string.
* Provides a strict flag to specify whether to cut-off mid word or not.
* Configurable suffix appended to the end of the returned excerpt.
### Install
npm install truncatise
### Options (with default values)
```javascript
{
TruncateBy: 'words', // Options are 'words', 'characters' or 'paragraphs'
TruncateLength: 50, // The count to be used with TruncatedBy
StripHTML: false, // Whether or not the truncated text should contain HTML tags
Strict: true, // If set to false the truncated text finish at the end of the word
Suffix: '...' // Text to be appended to the end of the truncated text
}
```
### Example Usage
```javascript
var options = {
TruncateLength: 4,
TruncateBy : "words",
Strict : false,
StripHTML : true,
Suffix : ' (Read More)'
};
var excerpt = truncatise("<p>This is a test of Truncatise</p>", options);
console.log(excerpt); // This is a test (Read More)
```
+188
View File
@@ -0,0 +1,188 @@
(function(exportTo) {
"use strict";
var selfClosingTags = ["area", "base", "br", "col", "embed", "hr", "img", "input", "keygen", "link", "menuitem", "meta", "param", "source", "track", "wbr"];
/**
* Truncates a given HTML string to the specified length.
* @param {string} text This is the HTMl string to be truncated
* @param {object} options An options object defining how to truncate
* Default values:
* {
* TruncateBy : 'words', // Options are 'words', 'characters' or 'paragraphs'
* TruncateLength : 50, // The count to be used with TruncatedBy
* StripHTML : false, // Whether or not the truncated text should contain HTML tags
* Strict : true, // When set to false the truncated text finish at the end of the word
* Suffix : '...' // Text to be appended to the end of the truncated text
* }
* @return {string} This returns the provided string truncated to the
* length provided by the options. HTML tags may be stripped based
* on the given options.
*/
var truncatise = function(text,options) {
var options = options || {},
text = (text || "").trim(),
truncatedText = "",
currentState = 0,
isEndOfWord = false,
isTagOpen = false,
currentTag = "",
tagStack = [],
nextChar = "";
//Counters
var charCounter = 0,
wordCounter = 0,
paragraphCounter = 0;
//currentState values
var NOT_TAG = 0,
TAG_START = 1,
TAG_ATTRIBUTES = 2;
//Set default values
options.TruncateBy = (options.TruncateBy === undefined
|| typeof options.TruncateBy !== "string"
|| !options.TruncateBy.match(/(word(s)?|character(s)?|paragraph(s)?)/))
? 'words'
: options.TruncateBy.toLowerCase();
options.TruncateLength = (options.TruncateLength === undefined
|| typeof options.TruncateLength !== "number")
? 50
: options.TruncateLength;
options.StripHTML = (options.StripHTML === undefined
|| typeof options.StripHTML !== "boolean")
? false
: options.StripHTML;
options.Strict = (options.Strict === undefined
|| typeof options.Strict !== "boolean")
? true
: options.Strict;
options.Suffix = (options.Suffix === undefined
|| typeof options.Suffix !== "string")
? '...'
: options.Suffix;
if(text === "" || (text.length <= options.TruncateLength && options.StripHTML === false)){
return text;
}
if(options.StripHTML) {
text = String(text).replace(/<br( \/)?>/gi, ' ');
}
//If not splitting on paragraphs we can quickly remove tags using regex
if(options.StripHTML && !options.TruncateBy.match(/(paragraph(s)?)/)){
text = String(text).replace(/<!--(.*?)-->/gm, '').replace(/<\/?[^>]+>/gi, '');
}
//Remove newline seperating paragraphs
text = String(text).replace(/<\/p>(\r?\n)+<p>/gm, '</p><p>');
//Replace double newlines with paragraphs
if(options.StripHTML && String(text).match(/\r?\n\r?\n/)){
text = String(text).replace(/((.+)(\r?\n\r?\n|$))/gi, "<p>$2</p>");
}
for (var pointer = 0; pointer < text.length; pointer++ ) {
var currentChar = text[pointer];
switch(currentChar){
case "<":
if(currentState === NOT_TAG){
currentState = TAG_START;
currentTag = "";
}
if(!options.StripHTML){
truncatedText += currentChar;
}
break;
case ">":
if(currentState === TAG_START || currentState === TAG_ATTRIBUTES){
currentState = NOT_TAG;
currentTag = currentTag.toLowerCase();
if(currentTag === "/p"){
paragraphCounter++;
if(options.StripHTML){
truncatedText += " ";
}
}
// Ignore self-closing tags.
if ((selfClosingTags.indexOf(currentTag) === -1) && (selfClosingTags.indexOf(currentTag + '/') === -1)) {
if(currentTag.indexOf("/") >= 0){
tagStack.pop();
} else {
tagStack.push(currentTag);
}
}
}
if(!options.StripHTML){
truncatedText += currentChar;
}
break;
case " ":
if(currentState === TAG_START){
currentState = TAG_ATTRIBUTES;
}
if(currentState === NOT_TAG){
wordCounter++;
charCounter++;
}
if(currentState === NOT_TAG || !options.StripHTML){
truncatedText += currentChar;
}
break;
default:
if(currentState === NOT_TAG){
charCounter++;
}
if(currentState === TAG_START){
currentTag += currentChar;
}
if(currentState === NOT_TAG || !options.StripHTML){
truncatedText += currentChar;
}
break;
}
nextChar = text[pointer + 1] || "";
isEndOfWord = options.Strict ? true : (!currentChar.match(/[a-zA-ZÇ-Ü']/i) || !nextChar.match(/[a-zA-ZÇ-Ü']/i));
if(options.TruncateBy.match(/word(s)?/i) && options.TruncateLength <= wordCounter){
truncatedText = truncatedText.replace(/\s+$/, '');
break;
}
if(options.TruncateBy.match(/character(s)?/i) && options.TruncateLength <= charCounter && isEndOfWord){
break;
}
if(options.TruncateBy.match(/paragraph(s)?/i) && options.TruncateLength === paragraphCounter){
break;
}
}
if(!options.StripHTML && tagStack.length > 0){
while(tagStack.length > 0){
var tag = tagStack.pop();
if(tag!=="!--"){
truncatedText += "</"+tag+">";
}
}
}
if(pointer < text.length - 1) {
if(truncatedText.match(/<\/p>$/gi)){
truncatedText = truncatedText.replace(/(<\/p>)$/gi, options.Suffix + "$1");
}else{
truncatedText = truncatedText + options.Suffix;
}
}
return truncatedText.trim();
};
// Export to node
if (typeof module !== 'undefined' && module.exports){
return module.exports = truncatise;
}
// Nope, export to the browser instead.
exportTo.truncatise = truncatise;
}(this));
+36
View File
@@ -0,0 +1,36 @@
{
"name": "truncatise",
"version": "0.0.8",
"description": "Truncate HTML based on characters, words or paragraphs. Has the ability to strip tags.",
"main": "index.js",
"scripts": {
"test": "mocha -R spec test.js"
},
"repository": {
"type": "git",
"url": "git://github.com/AverageMarcus/Truncatise"
},
"keywords": [
"text",
"truncate",
"truncatise",
"word",
"character",
"limit",
"html",
"tag",
"strip",
"paragraph",
"excerpt"
],
"author": "Marcus Noble",
"license": "MIT",
"devDependencies": {
"chai": "~1.5.0",
"mocha": "~1.9.0"
},
"readmeFilename": "README.md",
"bugs": {
"url": "https://github.com/AverageMarcus/Truncatise/issues"
}
}
+241
View File
@@ -0,0 +1,241 @@
var truncatise = require("./"),
chai = require("chai"),
assert = require("chai").assert,
mocha = require("mocha");
chai.should();
describe("Truncating to characters", function(){
it("should be able to strip html", function(){
truncatise("<p>This is a test of <b>html</b> <strong>tag</strong> <span class='cssClass'>stripping</span></p>", {TruncateLength: 10, TruncateBy : "characters", Strict : true, StripHTML : true, Suffix : ''})
.should.equal("This is a");
});
it("should be able handle and ignore self-closing tags", function(){
truncatise("<p>This<img src=\"test.jpg\" /> is a test of self-closing tags such as <img src=\"test.jpg\" /></p>", {TruncateLength: 10, TruncateBy : "characters", Strict : true, StripHTML : true, Suffix : ''})
.should.equal("This is a");
});
it("should ignore comments",function() {
truncatise("<p>This <!-- is a test --> <strong>is a test of comments</strong></p>",{TruncateLength: 10, TruncateBy : "characters", Strict : true, StripHTML : true, Suffix : ''})
.should.equal("This is a");
});
it("should strip comments",function() {
truncatise("<p>This <!-- comment <a href='/'>link</a> test --><strong>is a test of comments</strong></p>",{TruncateLength: 7, TruncateBy : "characters", Strict : true, StripHTML : true, Suffix : ''})
.should.equal("This is");
});
it("should ignore tags in comments",function() {
truncatise("<p>This <!-- is <a href='/'>a</a> test --><strong>is a test of comments</strong></p>",{TruncateLength: 9, TruncateBy : "character", Strict : true, StripHTML : true, Suffix : ''})
.should.equal("This is a");
});
it("should correctly handle comments when not stripping tags",function() {
truncatise("<p>This <!-- comment --><strong>is a test of comments</strong></p>",{TruncateLength: 9, TruncateBy : "character", Strict : true, StripHTML : false, Suffix : ''})
.should.equal("<p>This <!-- comment --><strong>is a</strong></p>");
});
it("should return all if truncate length is longer than input",function() {
truncatise("<p>This is a test of length</p>",{TruncateLength: 100, TruncateBy : "characters", Strict : true, StripHTML : true, Suffix : ''})
.should.equal("This is a test of length");
});
it("should handle encoded entities",function() {
truncatise("<p>This is &amp; test of length</p>",{TruncateLength: 100, TruncateBy : "characters", Strict : true, StripHTML : true, Suffix : ''})
.should.equal("This is &amp; test of length");
});
it("should strip custom tags",function() {
truncatise("<p>This is <faketag>a</faketag> test of length</p>",{TruncateLength: 100, TruncateBy : "characters", Strict : true, StripHTML : true, Suffix : ''})
.should.equal("This is a test of length");
});
it("should not split a word when not strict",function() {
truncatise("<p>This is a test of strictness</p>",{TruncateLength: 12, TruncateBy : "characters", Strict : false, StripHTML : true, Suffix : ''})
.should.equal("This is a test");
});
it("should split a word when strict is true",function() {
truncatise("<p>This is a test of strictness</p>",{TruncateLength: 12, TruncateBy : "characters", Strict : true, StripHTML : true, Suffix : ''})
.should.equal("This is a te");
});
});
describe("Truncating to words", function(){
it("should be able to strip html", function(){
truncatise("<p>This is a test of <b>html</b> <strong>tag</strong> <span class='cssClass'>stripping</span></p>", {TruncateLength: 3, TruncateBy : "words", StripHTML : true, Suffix : ''})
.should.equal("This is a");
});
it("should be able to handle html tags", function(){
truncatise("<p>This is a test of <b>html</b> <strong>tag</strong> <span class='cssClass'>stripping</span></p>", {TruncateLength: 3, TruncateBy : "words", StripHTML : false, Suffix : ''})
.should.equal("<p>This is a</p>");
});
});
describe("Truncating to paragraphs", function(){
it("should be able to strip html", function(){
truncatise("<p>This is a test of <b>html</b> <strong>tag</strong> <span class='cssClass'>stripping</span></p><p>With multiple paragraphs</p>", {TruncateLength: 1, TruncateBy : "paragraph", StripHTML : true, Suffix : ''})
.should.equal("This is a test of html tag stripping");
});
it("should be able to handle html tags", function(){
truncatise("<p>This is a test of <b>html</b> <strong>tag</strong> <span class='cssClass'>stripping</span></p><p>With multiple paragraphs</p>", {TruncateLength: 1, TruncateBy : "paragraph", StripHTML : false, Suffix : ''})
.should.equal("<p>This is a test of <b>html</b> <strong>tag</strong> <span class='cssClass'>stripping</span></p>");
});
it("should be able to handle several paragraphs", function(){
truncatise("<p>This</p><p>is</p><p>a</p><p>test</p><p>of</p><p>multiple</p><p>paragraphs</p>", {TruncateLength: 3, TruncateBy : "paragraph", StripHTML : true, Suffix : ''})
.should.equal("This is a");
});
it("should append the suffix inside the paragraph", function(){
truncatise("<p>This</p><p>is</p><p>a</p><p>test</p><p>of</p><p>multiple</p><p>paragraphs</p>", {TruncateLength: 3, TruncateBy : "paragraph", StripHTML : false, Suffix : '...'})
.should.equal("<p>This</p><p>is</p><p>a...</p>");
});
it("should be able to handle double newline", function(){
truncatise("This\n\nIs\r\n\r\nA\n\nTest", {TruncateLength: 3, TruncateBy : "paragraph", StripHTML : true, Suffix : ''})
.should.equal("This Is A");
});
it("should be able to handle double newline", function(){
truncatise("<p>This</p>\n\n<p>Is</p>\n\n<p>A</p>\n\n<p>Test</p>", {TruncateLength: 3, TruncateBy : "paragraph", StripHTML : true, Suffix : ''})
.should.equal("This Is A");
});
});
describe("Appending a suffix", function(){
it("should append ... by default",function(){
truncatise("This is a long paragraph that I intend to truncate.",{TruncateLength: 14, TruncateBy : "characters", Strict : true, StripHTML : true})
.should.equal("This is a long...");
});
it("should append the provided suffix",function(){
truncatise("This is a long paragraph that I intend to truncate.",{TruncateLength: 14, TruncateBy : "characters", Strict : true, StripHTML : true, Suffix : ' (Read More)'})
.should.equal("This is a long (Read More)");
});
it("should place the suffix correctly within open tags",function(){
truncatise("<p>This is a long paragraph that I intend to truncate.</p>",{TruncateLength: 2, TruncateBy : "words", StripHTML : false})
.should.equal("<p>This is...</p>");
});
it("shouldn't add suffix if text length is less than truncate lengh (paragraph)",function() {
truncatise("<p>This is a long paragraph that I intend to truncate.</p>",{TruncateLength: 2, TruncateBy : "paragraphs", StripHTML : false})
.should.equal("<p>This is a long paragraph that I intend to truncate.</p>");
});
it("shouldn't add suffix if text length is same as truncate lengh (paragraph)",function() {
truncatise("<p>This is a long paragraph that I intend to truncate.</p>",{TruncateLength: 1, TruncateBy : "paragraphs", StripHTML : false})
.should.equal("<p>This is a long paragraph that I intend to truncate.</p>");
});
it("shouldn't add suffix if text length is less than truncate lengh (word)",function() {
truncatise("This is a short line.",{TruncateLength: 200, TruncateBy : "words", StripHTML : false})
.should.equal("This is a short line.");
});
it("shouldn't add suffix if text length is same as truncate lengh (word)",function() {
truncatise("This is a short line.",{TruncateLength: 5, TruncateBy : "words", StripHTML : false})
.should.equal("This is a short line.");
});
it("shouldn't add suffix if text length is less than truncate lengh (chars)",function() {
truncatise("This is a short line.",{TruncateLength: 200, TruncateBy : "characters", StripHTML : false})
.should.equal("This is a short line.");
});
it("shouldn't add suffix if text length is same as truncate lengh (chars)",function() {
truncatise("This is a short line.",{TruncateLength: 21, TruncateBy : "characters", StripHTML : false})
.should.equal("This is a short line.");
});
it("shouldn't add suffix if text length is same as truncate lengh (paragraph with trailing space)",function() {
truncatise("<p>This is a long paragraph that I intend to truncate.</p> ",{TruncateLength: 1, TruncateBy : "paragraphs", StripHTML : false})
.should.equal("<p>This is a long paragraph that I intend to truncate.</p>");
});
});
describe("Handling tags", function(){
it("should be able to close an open tag",function(){
truncatise("<p>This is a long paragraph that I intend to truncate.</p>",{TruncateLength: 2, TruncateBy : "words", StripHTML : false, Suffix : ''})
.should.equal("<p>This is</p>");
});
it("should be able to close multiple open tags",function(){
truncatise("<p>This <a href=\"/\">is a long paragraph</a> that I intend to truncate.</p>",{TruncateLength: 2, TruncateBy : "words", StripHTML : false, Suffix : ''})
.should.equal("<p>This <a href=\"/\">is</a></p>");
});
it("should not append self-closing br tags to the end of the string",function(){
truncatise("<p>This<br>handles<br></p>",{TruncateLength: 2, TruncateBy : "words", StripHTML : false, Suffix : ''})
.should.equal("<p>This<br>handles<br></p>");
});
});
describe("Performance testing",function() {
var input = "";
for (var i=0; i<1000000; i++) {
input += "<p>This is a paragraph used for performance testing</p>\n";
}
describe("truncate to only 400 words from a 1000000 paragraph long string",function() {
it("should take less than a second",function() {
var startTime = Date.now();
truncatise(input,{TruncateLength: 400, TruncateBy : "words", StripHTML : true, Suffix : ''});
(Date.now() - startTime).should.be.lte(1000);
});
});
describe("truncate to only 1000 words from a 1000000 paragraph long string",function() {
it("should take less than a second",function() {
var startTime = Date.now();
truncatise(input,{TruncateLength: 1000, TruncateBy : "words", StripHTML : true, Suffix : ''});
(Date.now() - startTime).should.be.lte(1000);
});
});
describe("truncate to only 10000 words from a 1000000 paragraph long string",function() {
it("should take less than a second",function() {
var startTime = Date.now();
truncatise(input,{TruncateLength: 10000, TruncateBy : "words", StripHTML : true, Suffix : ''});
(Date.now() - startTime).should.be.lte(1000);
});
});
});
describe("Strict", function(){
it("should close in the middle of a word when strict enabled",function(){
truncatise("This is a test of strict mode",{TruncateLength: 12, TruncateBy : "characters", StripHTML : false, Strict : true, Suffix : ''})
.should.equal("This is a te");
});
it("should not close in the middle of a word when strict disabled",function(){
truncatise("This is a test of strict mode",{TruncateLength: 12, TruncateBy : "characters", StripHTML : false, Strict : false, Suffix : ''})
.should.equal("This is a test");
});
it("should not close at an apostrophe when strict disabled",function(){
truncatise("This is a test I'm doing of strict mode",{TruncateLength: 16, TruncateBy : "characters", StripHTML : false, Strict : false, Suffix : ''})
.should.equal("This is a test I'm");
});
it("should not close on accented char when strict disabled",function(){
truncatise("test test tést test",{TruncateLength: 12, TruncateBy : "characters", StripHTML : false, Strict : false, Suffix : ''})
.should.equal("test test tést");
});
it("should not close on capital letter when strict disabled",function(){
truncatise("This is a TEST of strict mode",{TruncateLength: 12, TruncateBy : "characters", StripHTML : false, Strict : false, Suffix : ''})
.should.equal("This is a TEST");
});
});
describe("Handing newlines", function() {
it("should replace newlines with spaces", function(){
truncatise("<div><!--block-->Laborum. <strong>Odit</strong> in omn.<br>Lorem Ipsum Abc<br>Lorem Ipsum<br>Lorem Ipsum</div>", {TruncateLength: 8, TruncateBy : "words", StripHTML: true})
.should.equal("Laborum. Odit in omn. Lorem Ipsum Abc Lorem...");
});
})