I want to open / read docx file using client side technologies (HTML/JS).
I have found a Javascript library named docx.js but personally cannot seem to locate any documentation for it. (http://blog.innovatejs.com/?p=184)
The goal is to make a browser based search tool for docx files and txt files.
With docxtemplater, you can easily get the full text of a word (works with docx only) by using the doc.getFullText() method.
<body>
<button onclick="gettext()">Get document text</button>
</body>
<script src="https://cdnjs.cloudflare.com/ajax/libs/docxtemplater/3.26.2/docxtemplater.js"></script>
<script src="https://unpkg.com/[email protected]/dist/pizzip.js"></script>
<script src="https://unpkg.com/[email protected]/dist/pizzip-utils.js"></script>
<script>
function loadFile(url, callback) {
PizZipUtils.getBinaryContent(url, callback);
}
function gettext() {
loadFile(
"https://docxtemplater.com/tag-example.docx",
function (error, content) {
if (error) {
throw error;
}
var zip = new PizZip(content);
var doc = new window.docxtemplater(zip);
var text = doc.getFullText();
console.log(text);
alert("Text is " + text);
}
);
}
</script>
I know this is an old post, but doctemplater has moved on and the accepted answer no longer works. This worked for me:
function loadDocx(filename) {
// Read document.xml from docx document
const AdmZip = require("adm-zip");
const zip = new AdmZip(filename);
const xml = zip.readAsText("word/document.xml");
// Load xml DOM
const cheerio = require('cheerio');
$ = cheerio.load(xml, {
normalizeWhitespace: true,
xmlMode: true
})
// Extract text
let out = new Array()
$('w\\:t').each((i, el) => {
out.push($(el).text())
})
return out
}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With