Assume I have the following html:
<html>
<head>
</head>
<body>
    <div id="wrapper" >
         <div class="s2">I am going <a title="some title" href="">by flying</a>
           <p>mr tt</p>
         </div> 
    </div>
</body>    
</html>
Any words in the text nodes that are equal to or greater than 4 characters for example the word 'going' is replaced with html content (not text) <span>going<span> in the original html without changing anything else.
If I try do something like element.html(replacement), the problem is if lets the current element is <div class="s2"> it will also wipe off <a title="some title" 
In this case you must traverse your document as suggested by this answer. Here's a way of doing it using Jsoup APIs:
NodeTraversor and NodeVisitor allow you to traverse the DOMNode.replaceWith(...) allows for replacing a node in the DOMHere's the code:
public class JsoupReplacer {
  public static void main(String[] args) {
    so6527876();
  }
  public static void so6527876() {
    String html = 
    "<html>" +
    "<head>" +
    "</head>" +
    "<body>" +
    "    <div id=\"wrapper\" >" +
    "         <div class=\"s2\">I am going <a title=\"some title\" href=\"\">by flying</a>" +
    "           <p>mr tt</p>" +
    "         </div> " +
    "    </div>" +
    "</body>    " +
    "</html>";
    Document doc = Jsoup.parse(html);
    final List<TextNode> nodesToChange = new ArrayList<TextNode>();
    NodeTraversor nd  = new NodeTraversor(new NodeVisitor() {
      @Override
      public void tail(Node node, int depth) {
        if (node instanceof TextNode) {
          TextNode textNode = (TextNode) node;
          String text = textNode.getWholeText();
          String[] words = text.trim().split(" ");
          for (String word : words) {
            if (word.length() > 4) {
              nodesToChange.add(textNode);
              break;
            }
          }
        }
      }
      @Override
      public void head(Node node, int depth) {        
      }
    });
    nd.traverse(doc.body());
    for (TextNode textNode : nodesToChange) {
      Node newNode = buildElementForText(textNode);
      textNode.replaceWith(newNode);
    }
    System.out.println("result: ");
    System.out.println();
    System.out.println(doc);
  }
  private static Node buildElementForText(TextNode textNode) {
    String text = textNode.getWholeText();
    String[] words = text.trim().split(" ");
    Set<String> longWords = new HashSet<String>();
    for (String word : words) {
      if (word.length() > 4) {
        longWords.add(word);
      } 
    }
    String newText = text;
    for (String longWord : longWords) {
      newText = newText.replaceAll(longWord, 
          "<span>" + longWord + "</span>");
    }
    return new DataNode(newText, textNode.baseUri());
  }
}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With