Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

get docx file contents using javascript/jquery

I want to open / read docx file using client side technologies (HTML/JS).

I have found a Javascript library named docx.js but personally cannot seem to locate any documentation for it. (http://blog.innovatejs.com/?p=184)

The goal is to make a browser based search tool for docx files and txt files.

like image 576
Abdul Ali Avatar asked Feb 10 '15 19:02

Abdul Ali


2 Answers

With docxtemplater, you can easily get the full text of a word (works with docx only) by using the doc.getFullText() method.

HTML code:

<body>
    <button onclick="gettext()">Get document text</button>
</body>
<script src="https://cdnjs.cloudflare.com/ajax/libs/docxtemplater/3.26.2/docxtemplater.js"></script>
<script src="https://unpkg.com/[email protected]/dist/pizzip.js"></script>
<script src="https://unpkg.com/[email protected]/dist/pizzip-utils.js"></script>
<script>
    function loadFile(url, callback) {
        PizZipUtils.getBinaryContent(url, callback);
    }
    function gettext() {
        loadFile(
            "https://docxtemplater.com/tag-example.docx",
            function (error, content) {
                if (error) {
                    throw error;
                }
                var zip = new PizZip(content);
                var doc = new window.docxtemplater(zip);
                var text = doc.getFullText();
                console.log(text);
                alert("Text is " + text);
            }
        );
    }
</script>
like image 141
edi9999 Avatar answered Nov 07 '22 17:11

edi9999


I know this is an old post, but doctemplater has moved on and the accepted answer no longer works. This worked for me:

function loadDocx(filename) {
  // Read document.xml from docx document
  const AdmZip = require("adm-zip");
  const zip = new AdmZip(filename);
  const xml = zip.readAsText("word/document.xml");
  // Load xml DOM
  const cheerio = require('cheerio');
  $ = cheerio.load(xml, {
    normalizeWhitespace: true,
    xmlMode: true
  })
  // Extract text
  let out = new Array()
  $('w\\:t').each((i, el) => {
    out.push($(el).text())
  })
  return out
}
like image 5
Brian Dobby Avatar answered Nov 07 '22 17:11

Brian Dobby