Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Change matching words in a webpage's text to buttons

I am trying to make a Chrome extension that parses through a website looking for keywords, then replacing those keywords with buttons. However, when I change the text the image path becomes corrupted.

// This is a content script (isolated environment)
//    It will have partial access to the chrome API
// TODO 
//    Consider adding a "run_at": "document_end" in the manifest... 
//      don't want to run before full load
//      Might also be able to do this via the chrome API 
console.log("Scraper Running");
var keywords = ["sword", "gold", "yellow", "blue", "green", "china", "civil", "state"];

// This will match the keywords with the page textx
// Will also create the necessary buttons
(function() {
  function runScraper() {
    console.log($('body'));
    for(var i = 0; i < keywords.length; i++){
     $("body:not([href]):not(:image)").html($("body:not([href]):not(:image)").html()
         .replace(new RegExp(keywords[i], "ig"),"<button> " + keywords[i] + " </button>"));
     console.log("Ran it " + i);
    }
  }
  function createResourceButton() {
    // Programatically create a button here

    // Really want to return the button
    return null;
  }
  function createActionButton() {
  }
  runScraper();
})();
// TODO create the functions that the buttons will call
//      These will pass data to the chrome extension (see message passing)
//      Or we can consider a hack like this: 
// "Building a Chrome Extension - Inject code in a page using a Content script"
// http://stackoverflow.com/questions/9515704

Image of current results:

Wikipedia images will not load

like image 675
user2918160 Avatar asked Nov 13 '16 10:11

user2918160


People also ask

How to replace text in HTML using JavaScript?

The JavaScript replace() method is used to replace any occurrence of a character in a string or the entire string. It searches for a string corresponding to either a particular value or regular expression and returns a new string with the modified values.


1 Answers

Your approach to this problem is wrong. To do this, you need to walk though the document only changing text nodes, not the HTML of all nodes.

Modifying the code from this other answer of mine, the following complete extension changes all matching words on the page to buttons.

The extension in action:

button-izing matching words in Wikipedia page used in image by OP

manifest.json

{
    "description": "Upon action button click, make all matching words buttons.",
    "manifest_version": 2,
    "name": "Button all matching words",
    "version": "0.1",

    "permissions": [
        "activeTab"
    ],

    "background": {
        "scripts": [
            "background.js"
        ]
    },

    "browser_action": {
        "default_icon": {
            "32": "myIcon.png"
        },
        "default_title": "Make Buttons of specified words"
    }
}

background.js:

chrome.browserAction.onClicked.addListener(function(tab) {
    //Inject the script to change the text of all matching words into buttons.
    chrome.tabs.executeScript(tab.id,{file: 'contentScript.js'});
});

contentScript.js:

(function(){
    var keywords = ["sword", "gold", "yellow", "blue", "green", "china", "civil", "state"];
    //Build the RegExp once. Doing it for every replace is inefficient.
    //  Build one RegExp that matches all of the words instead of multiple RegExp.
    var regExpText = '\\b(' + keywords.join('|') + ')\\b';
    console.log(regExpText);
    var myRegExp = new RegExp(regExpText ,'mgi');

    function handleTextNode(textNode) {
        if(textNode.nodeName !== '#text'
            || textNode.parentNode.nodeName === 'SCRIPT' 
            || textNode.parentNode.nodeName === 'STYLE'
        ) {
            //Don't do anything except on text nodes, which are not children 
            //  of <script> or <style>.
            return;
        }
        let origText = textNode.textContent;
        //Clear the regExp search, not doing so may cause issues if matching against
        //  identical strings after the first one.
        myRegExp.lastIndex = 0;
        let newHtml=origText.replace(myRegExp, '<button>$1</button>');
        //Only change the DOM if we actually made a replacement in the text.
        //Compare the strings, as it should be faster than a second RegExp operation and
        //  lets us use the RegExp in only one place for maintainability.
        if( newHtml !== origText) {
            let newSpan = document.createElement('span');
            newSpan.innerHTML = newHtml;
            textNode.parentNode.replaceChild(newSpan,textNode);
        }
    }

    //This assumes that you want all matching words in the document changed, without
    //  limiting it to only certain sub portions of the document (i.e. not 'not(a)').
    let textNodes = [];
    //Create a NodeIterator to get the text node descendants
    let nodeIter = document.createNodeIterator(document.body,NodeFilter.SHOW_TEXT);
    let currentNode;
    //Add text nodes found to list of text nodes to process below.
    while(currentNode = nodeIter.nextNode()) {
        textNodes.push(currentNode);
    }
    //Process each text node
    textNodes.forEach(function(el){
        handleTextNode(el);
    });
})();

myIcon.png:

Icojam-Weathy-24-tornado.png

The code in handleTextNode to make changes to text nodes was modified from code in another answer of mine.

like image 129
Makyen Avatar answered Sep 22 '22 18:09

Makyen