Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Automatically paraphrasing sentences in JavaScript

In JavaScript, is it possible to automatically replace a regular expression in a sentence with a randomly generated match of that regular expression? I'm trying to use this approach to automatically paraphrase a sentence using a list of regular expressions, like so:

replaceWithRandomFromRegexes("You aren't a crackpot! You're a prodigy!", ["(genius|prodigy)", "(freak|loony|crackpot|crank|crazy)", "(You're |You are |Thou art )", "(aren't|ain't|are not)"])

Here, each match of each regular expression in the input string should be replaced with a randomly generated match of the regular expression.

function replaceWithRandomFromRegexes(theString, theRegexes){
    //For each regex in theRegexes, replace the first match of the regex in the string with a randomly generated match of that regex.
}
like image 474
Anderson Green Avatar asked May 18 '13 17:05

Anderson Green


2 Answers

This seems to be much simpler than you think. How about:

function randomReplace(subject, groups, wordsOnly) {
    var meta = /([.?*+^$[\]\\(){}|-])/g, all = {};
    groups.forEach(function(group) {
        group.forEach(function(word) { all[word] = group })
    });
    var r = Object.keys(all).
        sort(function(x, y) { return y.length - x.length }).
        map(function(x) { return x.replace(meta, "\\$&") }).
        join("|");
    if(wordsOnly)
        r = "\\b(" + r + ")\\b";
    return subject.replace(new RegExp(r, "g"), function($0) {
        return all[$0][Math.floor(Math.random() * all[$0].length)]
    });
}

Example:

s = randomReplace(
    "You aren't a crackpot! You're a prodigy!",
    [
        ["genius", "prodigy"], 
        ["freak", "loony", "crackpot", "crank", "crazy"], 
        ["You're ", "You are ", "Thou art "], 
        ["aren't", "ain't", "are not"]
    ]
);
console.log(s) // You ain't a crank! Thou art a genius!

The expansion function, as discussed in the comments, could be like this:

function expand(s) {
    var d = [];

    function product(a, b) {
        var p = [];
        a.map(function(x) { b.map(function(y) { p.push(x + y) })});
        return p;
    }

    function reduce(s) {
        var m;
        if(s.indexOf("|") >= 0)
            return [].concat.apply([], s.split("|").map(reduce));
        if(m = s.match(/~(\d+)(.*)/))
            return product(reduce(d[m[1]]), reduce(m[2]));
        return [s];
    }

    function add($0, $1) { d.push($1); return '~' + (d.length - 1) }

    s = s.replace(/([^()|]+)/g, add);
    for(var r = /\(([^()]*)\)/g; s.match(r);)
        s = s.replace(r, add);

    return reduce(s);
}

Example:

z = "(He|She|It|(B|R)ob(by|)) (real|tru|sure)ly is"
console.log(expand(z)) 

Result:

[
 "He really is",
 "He truly is",
 "He surely is",
 "She really is",
 "She truly is",
 "She surely is",
 "It really is",
 "It truly is",
 "It surely is",
 "Bobby really is",
 "Bobby truly is",
 "Bobby surely is",
 "Bob really is",
 "Bob truly is",
 "Bob surely is",
 "Robby really is",
 "Robby truly is",
 "Robby surely is",
 "Rob really is",
 "Rob truly is",
 "Rob surely is"
]
like image 172
georg Avatar answered Oct 23 '22 13:10

georg


Yes, this is certainly possible. I created a function called replaceWithRandomFromRegexes to accomplish this task.

http://jsfiddle.net/KZyZW/2/

for(var i = 0; i < 10; i++){
document.body.innerHTML += (replaceWithRandomFromRegexes("You aren't a crackpot! You're a prodigy!", ["(genius|prodigy)", "(freak|loony|crackpot|crank|crazy)", "(You're |You are |Thou art )", "(aren't|ain't|are not)"]))+"<br/>";
}

function replaceWithRandomFromRegexes(theString, theRegexes) {
    //alert(theRegexes);
    for (var i = 0; i < theRegexes.length; i++) {
        theString = globalReplaceWithRandomFromRegex(theString, theRegexes[i]);
        //alert(theRegexes[i]);
    }
    //alert("All the regexes: " + theRegexes);
    return theString;
}

function globalReplaceWithRandomFromRegex(theString, theRegexString) {
    var theRegex = new RegExp(theRegexString, "gi");
    //replace all matches of theRegex with '<thing to replace>'
    theString = theString.replace(theRegex, "<thing to replace>")


    //replace the first match of '<thing>'

    while (theString.indexOf("<thing to replace>") != -1) {
        theString = theString.replace("<thing to replace>", getRandomStringFromNestedParentheses(theRegexString));
    }

    //alert(theString);
    return theString;
}

function getRandomStringFromNestedParentheses(theString) {
    while (theString.indexOf("(") != -1) {
        theString = replaceInFirstParentheses(theString);
    }
    return theString;
}

function replaceInFirstParentheses(theString) {
    //find the index of the first parentheses
    var parenthesesIndex = theString.indexOf("(");
    var randomString = getRandomStringInsideParentheses(theString, parenthesesIndex);
    //alert(randomString);

    //theString = theString.replace();

    //find the string to replace
    var stringToReplace = theString.substring(parenthesesIndex, getCorrespondingParenthesesIndex(theString, parenthesesIndex) + 1);
    //alert(stringToReplace);
    theString = theString.replace(stringToReplace, randomString);
    //alert(theString);
    return theString;
}

function getRandomStringInsideParentheses(string, parenthesesIndex) {
    var stringArray = getStringsInsideParentheses(string, parenthesesIndex)
    //find out how to pick random in a range
    //pick something random from the array declared above
    //var theMin = 0;
    //var theMax = stringArray.length-1;
    var randomNumber = Math.floor(Math.random() * stringArray.length);
    return stringArray[randomNumber];
}

function getStringsInsideParentheses(string, parenthesesIndex) {
    //alert("calling function getStringsInsideParentheses");
    var theString = getStringFromParentheses(string, parenthesesIndex);
    for (var i = 0; i < theString.length; i++) {
        var theParenthesesNum = getParenthesesNum(theString, i);
        if (theString[i] == '|') {
            //alert("Parentheses num: " + theParenthesesNum);
            if (theParenthesesNum == 0) {
                theString = theString.substring(0, i) + "|" + theString.substring(i, theString.length);
                i++;
            }
        }
    }
    //alert(theString);
    return theString.split("||")
}

function getStringFromParentheses(theString, parenthesesIndex) {
    return theString.substring(parenthesesIndex + 1, getCorrespondingParenthesesIndex(theString, parenthesesIndex))
}

function getCorrespondingParenthesesIndex(theString, openingParenthesesIndex) {
    if (!parenthesesAreMatching(theString)) {
        writeMessage("Error: The parentheses do not match!");
        return false;
    }
    if (theString.charAt(openingParenthesesIndex) != "(") {
        writeMessage("Error: The index must be an opening parentheses!");
        return false;
    }
    var num = 0;
    for (var i = openingParenthesesIndex; i < theString.length; i++) {
        if (theString.charAt(i) == "(") {
            num++;
        }
        if (theString.charAt(i) == ")") {
            num--;
        }
        if (num == 0) {
            return i;
        }
    }
    writeMessage("Error: The parentheses do not match!");
    return false;
}

function parenthesesAreMatching(theString) {
    var num = 0;
    for (var i = 0; i < theString.length; i++) {
        if (theString.charAt(i) == "(") {
            num++;
        }
        if (theString.charAt(i) == ")") {
            num--;
        }
    }
    if (num == 0) {
        return i;
    } else {
        return false;
    }
}

function getParenthesesNum(theString, index) {
    //this should be based on parenthesesAreMatching, but stopping at index
    var num = 0;
    for (var i = 0; i < index; i++) {
        if (theString.charAt(i) == "(") {
            num++;
        }
        if (theString.charAt(i) == ")") {
            num--;
        }
    }
    return num;
}

The original sentence was You aren't a crackpot! You're a prodigy! Here are some automatically paraphrased versions of this sentence, as produced by this script:

You are not a freak! Thou art a genius!
You aren't a crackpot! You're a prodigy!
You ain't a crackpot! You are a genius!
You ain't a freak! You're a prodigy!
You are not a crackpot! You are a prodigy!
You are not a loony! You're a prodigy!
You are not a loony! You are a genius!
You are not a loony! You are a prodigy!
You ain't a crackpot! Thou art a prodigy!
You are not a loony! Thou art a prodigy!
like image 5
Anderson Green Avatar answered Oct 23 '22 13:10

Anderson Green