modified

2026-06-14 08:08:24 +02:00 · 2023-11-24 22:35:41 +01:00
parent 3c0b507a93
commit 7644b2a0f7
45165 changed files with 4803356 additions and 3 deletions
@@ -0,0 +1,22 @@
+# Auto detect text files and perform LF normalization
+* text=auto
+
+# Custom for Visual Studio
+*.cs     diff=csharp
+*.sln    merge=union
+*.csproj merge=union
+*.vbproj merge=union
+*.fsproj merge=union
+*.dbproj merge=union
+
+# Standard to msysgit
+*.doc	 diff=astextplain
+*.DOC	 diff=astextplain
+*.docx diff=astextplain
+*.DOCX diff=astextplain
+*.dot  diff=astextplain
+*.DOT  diff=astextplain
+*.pdf  diff=astextplain
+*.PDF	 diff=astextplain
+*.rtf	 diff=astextplain
+*.RTF	 diff=astextplain
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2013 Marcus Noble
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
@@ -0,0 +1,40 @@
+# Truncatise
+## JavaScript module to truncate HTML strings.
+
+### Description
+Provides the ability to truncate HTML strings down to excerpts based on character length, word length or paragraph length.
+
+**Features:**
+*	Truncate based on number of Characters, Words or Paragraphs.
+*	Strip HTML from returned string.
+*	Provides a strict flag to specify whether to cut-off mid word or not.
+*	Configurable suffix appended to the end of the returned excerpt.
+
+### Install
+  npm install truncatise
+
+### Options (with default values)
+
+```javascript
+{
+  TruncateBy:     'words',  // Options are 'words', 'characters' or 'paragraphs'
+  TruncateLength: 50,    	  // The count to be used with TruncatedBy
+  StripHTML:      false,    // Whether or not the truncated text should contain HTML tags
+  Strict:         true,     // If set to false the truncated text finish at the end of the word
+  Suffix:         '...'     // Text to be appended to the end of the truncated text
+}
+```
+
+### Example Usage
+
+```javascript
+var options = {
+  TruncateLength: 4,
+  TruncateBy : "words",
+  Strict : false,
+  StripHTML : true,
+  Suffix : ' (Read More)'
+};
+var excerpt = truncatise("<p>This is a test of Truncatise</p>", options);
+console.log(excerpt); // This is a test (Read More)
+```
@@ -0,0 +1,188 @@
+(function(exportTo) {
+    "use strict";
+
+    var selfClosingTags = ["area", "base", "br", "col", "embed", "hr", "img", "input", "keygen", "link", "menuitem", "meta", "param", "source", "track", "wbr"];
+
+    /**
+     * Truncates a given HTML string to the specified length.
+     * @param {string} text This is the HTMl string to be truncated
+     * @param {object} options An options object defining how to truncate
+     *      Default values:
+     *      {
+     *          TruncateBy : 'words',   // Options are 'words', 'characters' or 'paragraphs'
+     *          TruncateLength : 50,    // The count to be used with TruncatedBy
+     *          StripHTML : false,      // Whether or not the truncated text should contain HTML tags
+     *          Strict : true,          // When set to false the truncated text finish at the end of the word
+     *          Suffix : '...'          // Text to be appended to the end of the truncated text
+     *      }
+     * @return {string} This returns the provided string truncated to the
+     *      length provided by the options. HTML tags may be stripped based
+     *      on the given options.
+     */
+    var truncatise = function(text,options) {
+        var options         = options || {},
+            text            = (text || "").trim(),
+            truncatedText   = "",
+            currentState    = 0,
+            isEndOfWord     = false,
+            isTagOpen       = false,
+            currentTag      = "",
+            tagStack        = [],
+            nextChar        = "";
+        //Counters
+        var charCounter         = 0,
+            wordCounter         = 0,
+            paragraphCounter    = 0;
+        //currentState values
+        var NOT_TAG         = 0,
+            TAG_START       = 1,
+            TAG_ATTRIBUTES  = 2;
+
+        //Set default values
+        options.TruncateBy      = (options.TruncateBy === undefined
+                                    || typeof options.TruncateBy !==  "string"
+                                    || !options.TruncateBy.match(/(word(s)?|character(s)?|paragraph(s)?)/))
+                                ? 'words'
+                                : options.TruncateBy.toLowerCase();
+        options.TruncateLength  = (options.TruncateLength === undefined
+                                    || typeof options.TruncateLength !== "number")
+                                ? 50
+                                : options.TruncateLength;
+        options.StripHTML       = (options.StripHTML === undefined
+                                    || typeof options.StripHTML !== "boolean")
+                                ? false
+                                : options.StripHTML;
+        options.Strict          = (options.Strict === undefined
+                                    || typeof options.Strict !== "boolean")
+                                ? true
+                                : options.Strict;
+        options.Suffix          = (options.Suffix === undefined
+                                    || typeof options.Suffix !== "string")
+                                ? '...'
+                                : options.Suffix;
+
+        if(text === "" || (text.length <= options.TruncateLength && options.StripHTML === false)){
+            return text;
+        }
+
+        if(options.StripHTML) {
+            text = String(text).replace(/<br( \/)?>/gi, ' ');
+        }
+
+        //If not splitting on paragraphs we can quickly remove tags using regex
+        if(options.StripHTML && !options.TruncateBy.match(/(paragraph(s)?)/)){
+            text = String(text).replace(/<!--(.*?)-->/gm, '').replace(/<\/?[^>]+>/gi, '');
+        }
+        //Remove newline seperating paragraphs
+        text = String(text).replace(/<\/p>(\r?\n)+<p>/gm, '</p><p>');
+        //Replace double newlines with paragraphs
+        if(options.StripHTML && String(text).match(/\r?\n\r?\n/)){
+            text = String(text).replace(/((.+)(\r?\n\r?\n|$))/gi, "<p>$2</p>");
+        }
+
+        for (var pointer = 0; pointer < text.length; pointer++ ) {
+
+            var currentChar = text[pointer];
+
+            switch(currentChar){
+                case "<":
+                    if(currentState === NOT_TAG){
+                        currentState = TAG_START;
+                        currentTag = "";
+                    }
+                    if(!options.StripHTML){
+                        truncatedText += currentChar;
+                    }
+                    break;
+                case ">":
+                    if(currentState === TAG_START || currentState === TAG_ATTRIBUTES){
+                        currentState = NOT_TAG;
+                        currentTag = currentTag.toLowerCase();
+                        if(currentTag === "/p"){
+                            paragraphCounter++;
+                            if(options.StripHTML){
+                                truncatedText += " ";
+                            }
+                        }
+
+                        // Ignore self-closing tags.
+                        if ((selfClosingTags.indexOf(currentTag) === -1) && (selfClosingTags.indexOf(currentTag + '/') === -1)) {
+                            if(currentTag.indexOf("/") >= 0){
+                                tagStack.pop();
+                            } else {
+                                tagStack.push(currentTag);
+                            }
+                        }
+                    }
+                    if(!options.StripHTML){
+                        truncatedText += currentChar;
+                    }
+                    break;
+                case " ":
+                    if(currentState === TAG_START){
+                        currentState = TAG_ATTRIBUTES;
+                    }
+                    if(currentState === NOT_TAG){
+                        wordCounter++;
+                        charCounter++;
+                    }
+                    if(currentState === NOT_TAG || !options.StripHTML){
+                        truncatedText += currentChar;
+                    }
+                    break;
+                default:
+                    if(currentState === NOT_TAG){
+                        charCounter++;
+                    }
+                    if(currentState === TAG_START){
+                        currentTag += currentChar;
+                    }
+                    if(currentState === NOT_TAG || !options.StripHTML){
+                        truncatedText += currentChar;
+                    }
+                    break;
+            }
+
+            nextChar = text[pointer + 1] || "";
+            isEndOfWord = options.Strict ? true : (!currentChar.match(/[a-zA-ZÇ-Ü']/i) || !nextChar.match(/[a-zA-ZÇ-Ü']/i));
+
+            if(options.TruncateBy.match(/word(s)?/i) && options.TruncateLength <= wordCounter){
+                truncatedText = truncatedText.replace(/\s+$/, '');
+                break;
+            }
+            if(options.TruncateBy.match(/character(s)?/i) && options.TruncateLength <= charCounter && isEndOfWord){
+                break;
+            }
+            if(options.TruncateBy.match(/paragraph(s)?/i) && options.TruncateLength === paragraphCounter){
+                break;
+            }
+        }
+
+        if(!options.StripHTML && tagStack.length > 0){
+            while(tagStack.length > 0){
+                var tag = tagStack.pop();
+                if(tag!=="!--"){
+                    truncatedText += "</"+tag+">";
+                }
+            }
+        }
+
+        if(pointer < text.length - 1) {
+          if(truncatedText.match(/<\/p>$/gi)){
+              truncatedText = truncatedText.replace(/(<\/p>)$/gi, options.Suffix + "$1");
+          }else{
+              truncatedText = truncatedText + options.Suffix;
+          }
+        }
+
+        return truncatedText.trim();
+    };
+
+    // Export to node
+    if (typeof module !== 'undefined' && module.exports){
+        return module.exports = truncatise;
+    }
+
+    // Nope, export to the browser instead.
+    exportTo.truncatise = truncatise;
+}(this));
@@ -0,0 +1,36 @@
+{
+  "name": "truncatise",
+  "version": "0.0.8",
+  "description": "Truncate HTML based on characters, words or paragraphs. Has the ability to strip tags.",
+  "main": "index.js",
+  "scripts": {
+    "test": "mocha -R spec test.js"
+  },
+  "repository": {
+    "type": "git",
+    "url": "git://github.com/AverageMarcus/Truncatise"
+  },
+  "keywords": [
+    "text",
+    "truncate",
+    "truncatise",
+    "word",
+    "character",
+    "limit",
+    "html",
+    "tag",
+    "strip",
+    "paragraph",
+    "excerpt"
+  ],
+  "author": "Marcus Noble",
+  "license": "MIT",
+  "devDependencies": {
+    "chai": "~1.5.0",
+    "mocha": "~1.9.0"
+  },
+  "readmeFilename": "README.md",
+  "bugs": {
+    "url": "https://github.com/AverageMarcus/Truncatise/issues"
+  }
+}
@@ -0,0 +1,241 @@
+var truncatise		= require("./"),
+    chai			= require("chai"),
+    assert			= require("chai").assert,
+    mocha           = require("mocha");
+    chai.should();
+
+describe("Truncating to characters", function(){
+	it("should be able to strip html", function(){
+		truncatise("<p>This is a test of <b>html</b> <strong>tag</strong> <span class='cssClass'>stripping</span></p>", {TruncateLength: 10, TruncateBy : "characters", Strict : true, StripHTML : true, Suffix : ''})
+			.should.equal("This is a");
+	});
+
+	it("should be able handle and ignore self-closing tags", function(){
+		truncatise("<p>This<img src=\"test.jpg\" /> is a test of self-closing tags such as <img src=\"test.jpg\" /></p>", {TruncateLength: 10, TruncateBy : "characters", Strict : true, StripHTML : true, Suffix : ''})
+			.should.equal("This is a");
+	});
+
+	it("should ignore comments",function() {
+        truncatise("<p>This <!-- is a test --> <strong>is a test of comments</strong></p>",{TruncateLength: 10, TruncateBy : "characters", Strict : true, StripHTML : true, Suffix : ''})
+            .should.equal("This  is a");
+    });
+
+	it("should strip comments",function() {
+        truncatise("<p>This <!-- comment <a href='/'>link</a> test --><strong>is a test of comments</strong></p>",{TruncateLength: 7, TruncateBy : "characters", Strict : true, StripHTML : true, Suffix : ''})
+            .should.equal("This is");
+    });
+
+	it("should ignore tags in comments",function() {
+        truncatise("<p>This <!-- is <a href='/'>a</a> test --><strong>is a test of comments</strong></p>",{TruncateLength: 9, TruncateBy : "character", Strict : true, StripHTML : true, Suffix : ''})
+            .should.equal("This is a");
+    });
+
+	it("should correctly handle comments when not stripping tags",function() {
+        truncatise("<p>This <!-- comment --><strong>is a test of comments</strong></p>",{TruncateLength: 9, TruncateBy : "character", Strict : true, StripHTML : false, Suffix : ''})
+            .should.equal("<p>This <!-- comment --><strong>is a</strong></p>");
+    });
+
+	it("should return all if truncate length is longer than input",function() {
+        truncatise("<p>This is a test of length</p>",{TruncateLength: 100, TruncateBy : "characters", Strict : true, StripHTML : true, Suffix : ''})
+            .should.equal("This is a test of length");
+    });
+
+	it("should handle encoded entities",function() {
+        truncatise("<p>This is &amp; test of length</p>",{TruncateLength: 100, TruncateBy : "characters", Strict : true, StripHTML : true, Suffix : ''})
+            .should.equal("This is &amp; test of length");
+    });
+
+	it("should strip custom tags",function() {
+        truncatise("<p>This is <faketag>a</faketag> test of length</p>",{TruncateLength: 100, TruncateBy : "characters", Strict : true, StripHTML : true, Suffix : ''})
+            .should.equal("This is a test of length");
+    });
+
+	it("should not split a word when not strict",function() {
+        truncatise("<p>This is a test of strictness</p>",{TruncateLength: 12, TruncateBy : "characters", Strict : false, StripHTML : true, Suffix : ''})
+            .should.equal("This is a test");
+    });
+
+	it("should split a word when strict is true",function() {
+        truncatise("<p>This is a test of strictness</p>",{TruncateLength: 12, TruncateBy : "characters", Strict : true, StripHTML : true, Suffix : ''})
+            .should.equal("This is a te");
+    });
+});
+
+describe("Truncating to words", function(){
+	it("should be able to strip html", function(){
+		truncatise("<p>This is a test of <b>html</b> <strong>tag</strong> <span class='cssClass'>stripping</span></p>", {TruncateLength: 3, TruncateBy : "words", StripHTML : true, Suffix : ''})
+			.should.equal("This is a");
+	});
+
+	it("should be able to handle html tags", function(){
+		truncatise("<p>This is a test of <b>html</b> <strong>tag</strong> <span class='cssClass'>stripping</span></p>", {TruncateLength: 3, TruncateBy : "words", StripHTML : false, Suffix : ''})
+			.should.equal("<p>This is a</p>");
+	});
+});
+
+describe("Truncating to paragraphs", function(){
+	it("should be able to strip html", function(){
+		truncatise("<p>This is a test of <b>html</b> <strong>tag</strong> <span class='cssClass'>stripping</span></p><p>With multiple paragraphs</p>", {TruncateLength: 1, TruncateBy : "paragraph", StripHTML : true, Suffix : ''})
+			.should.equal("This is a test of html tag stripping");
+	});
+
+	it("should be able to handle html tags", function(){
+		truncatise("<p>This is a test of <b>html</b> <strong>tag</strong> <span class='cssClass'>stripping</span></p><p>With multiple paragraphs</p>", {TruncateLength: 1, TruncateBy : "paragraph", StripHTML : false, Suffix : ''})
+			.should.equal("<p>This is a test of <b>html</b> <strong>tag</strong> <span class='cssClass'>stripping</span></p>");
+	});
+
+	it("should be able to handle several paragraphs", function(){
+		truncatise("<p>This</p><p>is</p><p>a</p><p>test</p><p>of</p><p>multiple</p><p>paragraphs</p>", {TruncateLength: 3, TruncateBy : "paragraph", StripHTML : true, Suffix : ''})
+			.should.equal("This is a");
+	});
+
+	it("should append the suffix inside the paragraph", function(){
+		truncatise("<p>This</p><p>is</p><p>a</p><p>test</p><p>of</p><p>multiple</p><p>paragraphs</p>", {TruncateLength: 3, TruncateBy : "paragraph", StripHTML : false, Suffix : '...'})
+			.should.equal("<p>This</p><p>is</p><p>a...</p>");
+	});
+
+	it("should be able to handle double newline", function(){
+		truncatise("This\n\nIs\r\n\r\nA\n\nTest", {TruncateLength: 3, TruncateBy : "paragraph", StripHTML : true, Suffix : ''})
+			.should.equal("This Is A");
+	});
+
+	it("should be able to handle double newline", function(){
+		truncatise("<p>This</p>\n\n<p>Is</p>\n\n<p>A</p>\n\n<p>Test</p>", {TruncateLength: 3, TruncateBy : "paragraph", StripHTML : true, Suffix : ''})
+			.should.equal("This Is A");
+	});
+});
+
+describe("Appending a suffix", function(){
+	it("should append ... by default",function(){
+		truncatise("This is a long paragraph that I intend to truncate.",{TruncateLength: 14, TruncateBy : "characters", Strict : true, StripHTML : true})
+            .should.equal("This is a long...");
+	});
+
+	it("should append the provided suffix",function(){
+		truncatise("This is a long paragraph that I intend to truncate.",{TruncateLength: 14, TruncateBy : "characters", Strict : true, StripHTML : true, Suffix : ' (Read More)'})
+            .should.equal("This is a long (Read More)");
+	});
+
+	it("should place the suffix correctly within open tags",function(){
+		truncatise("<p>This is a long paragraph that I intend to truncate.</p>",{TruncateLength: 2, TruncateBy : "words", StripHTML : false})
+            .should.equal("<p>This is...</p>");
+	});
+
+  it("shouldn't add suffix if text length is less than truncate lengh (paragraph)",function() {
+    truncatise("<p>This is a long paragraph that I intend to truncate.</p>",{TruncateLength: 2, TruncateBy : "paragraphs", StripHTML : false})
+            .should.equal("<p>This is a long paragraph that I intend to truncate.</p>");
+  });
+
+  it("shouldn't add suffix if text length is same as truncate lengh (paragraph)",function() {
+    truncatise("<p>This is a long paragraph that I intend to truncate.</p>",{TruncateLength: 1, TruncateBy : "paragraphs", StripHTML : false})
+            .should.equal("<p>This is a long paragraph that I intend to truncate.</p>");
+  });
+
+  it("shouldn't add suffix if text length is less than truncate lengh (word)",function() {
+    truncatise("This is a short line.",{TruncateLength: 200, TruncateBy : "words", StripHTML : false})
+            .should.equal("This is a short line.");
+  });
+
+  it("shouldn't add suffix if text length is same as truncate lengh (word)",function() {
+    truncatise("This is a short line.",{TruncateLength: 5, TruncateBy : "words", StripHTML : false})
+            .should.equal("This is a short line.");
+  });
+
+  it("shouldn't add suffix if text length is less than truncate lengh (chars)",function() {
+    truncatise("This is a short line.",{TruncateLength: 200, TruncateBy : "characters", StripHTML : false})
+            .should.equal("This is a short line.");
+  });
+
+  it("shouldn't add suffix if text length is same as truncate lengh (chars)",function() {
+    truncatise("This is a short line.",{TruncateLength: 21, TruncateBy : "characters", StripHTML : false})
+            .should.equal("This is a short line.");
+  });
+
+  it("shouldn't add suffix if text length is same as truncate lengh (paragraph with trailing space)",function() {
+    truncatise("<p>This is a long paragraph that I intend to truncate.</p>   ",{TruncateLength: 1, TruncateBy : "paragraphs", StripHTML : false})
+            .should.equal("<p>This is a long paragraph that I intend to truncate.</p>");
+  });
+});
+
+describe("Handling tags", function(){
+	it("should be able to close an open tag",function(){
+		truncatise("<p>This is a long paragraph that I intend to truncate.</p>",{TruncateLength: 2, TruncateBy : "words", StripHTML : false, Suffix : ''})
+            .should.equal("<p>This is</p>");
+	});
+
+	it("should be able to close multiple open tags",function(){
+		truncatise("<p>This <a href=\"/\">is a long paragraph</a> that I intend to truncate.</p>",{TruncateLength: 2, TruncateBy : "words", StripHTML : false, Suffix : ''})
+            .should.equal("<p>This <a href=\"/\">is</a></p>");
+	});
+
+    it("should not append self-closing br tags to the end of the string",function(){
+		truncatise("<p>This<br>handles<br></p>",{TruncateLength: 2, TruncateBy : "words", StripHTML : false, Suffix : ''})
+            .should.equal("<p>This<br>handles<br></p>");
+	});
+});
+
+describe("Performance testing",function() {
+    var input = "";
+    for (var i=0; i<1000000; i++) {
+            input += "<p>This is a paragraph used for performance testing</p>\n";
+    }
+
+    describe("truncate to only 400 words from a 1000000 paragraph long string",function() {
+        it("should take less than a second",function() {
+                var startTime = Date.now();
+				truncatise(input,{TruncateLength: 400, TruncateBy : "words", StripHTML : true, Suffix : ''});
+                (Date.now() - startTime).should.be.lte(1000);
+        });
+    });
+
+    describe("truncate to only 1000 words from a 1000000 paragraph long string",function() {
+        it("should take less than a second",function() {
+                var startTime = Date.now();
+				truncatise(input,{TruncateLength: 1000, TruncateBy : "words", StripHTML : true, Suffix : ''});
+                (Date.now() - startTime).should.be.lte(1000);
+        });
+    });
+
+    describe("truncate to only 10000 words from a 1000000 paragraph long string",function() {
+        it("should take less than a second",function() {
+                var startTime = Date.now();
+				truncatise(input,{TruncateLength: 10000, TruncateBy : "words", StripHTML : true, Suffix : ''});
+                (Date.now() - startTime).should.be.lte(1000);
+        });
+    });
+});
+
+describe("Strict", function(){
+	it("should close in the middle of a word when strict enabled",function(){
+		truncatise("This is a test of strict mode",{TruncateLength: 12, TruncateBy : "characters", StripHTML : false, Strict : true, Suffix : ''})
+            .should.equal("This is a te");
+	});
+
+	it("should not close in the middle of a word when strict disabled",function(){
+		truncatise("This is a test of strict mode",{TruncateLength: 12, TruncateBy : "characters", StripHTML : false, Strict : false, Suffix : ''})
+            .should.equal("This is a test");
+	});
+
+	it("should not close at an apostrophe when strict disabled",function(){
+		truncatise("This is a test I'm doing of strict mode",{TruncateLength: 16, TruncateBy : "characters", StripHTML : false, Strict : false, Suffix : ''})
+            .should.equal("This is a test I'm");
+	});
+
+  it("should not close on accented char when strict disabled",function(){
+		truncatise("test test tést test",{TruncateLength: 12, TruncateBy : "characters", StripHTML : false, Strict : false, Suffix : ''})
+            .should.equal("test test tést");
+	});
+
+  it("should not close on capital letter when strict disabled",function(){
+		truncatise("This is a TEST of strict mode",{TruncateLength: 12, TruncateBy : "characters", StripHTML : false, Strict : false, Suffix : ''})
+            .should.equal("This is a TEST");
+	});
+
+});
+
+describe("Handing newlines", function() {
+    it("should replace newlines with spaces", function(){
+		truncatise("<div><!--block-->Laborum. <strong>Odit</strong> in omn.<br>Lorem Ipsum Abc<br>Lorem Ipsum<br>Lorem Ipsum</div>", {TruncateLength: 8, TruncateBy : "words", StripHTML: true})
+			.should.equal("Laborum. Odit in omn. Lorem Ipsum Abc Lorem...");
+	});
+})