Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Javascript: Regular Expression to parse a formula

I have been working on a function to parse a formula for some time, but haven't been able to make it work properly. It seems to not always work - it filters some parts of the text but not all.

parseFormula(e) {
    var formula = e.value, value = 0.00, tValue = 0.00, tFormula = '', dObj = {};
    if(formula !== undefined && formula !== "") {
        dObj._formulaIn = formula;
        var f = formula.split(/\s/g);    

        for(var i = 0; i < f.length; i++) {
            tFormula = f[i];
            // Replacing PI
            tFormula = tFormula.replace(/(pi)/gi,Math.PI);
            dObj._1_pi_done = tFormula;

            // Replacing Squareroot with placeholder
            tFormula = tFormula.replace(/(sqrt)/gi,"__sqrt__");
            tFormula = tFormula.replace(/(sqr)/gi,"__sqrt__");
            tFormula = tFormula.replace(/(kvrt)/gi,"__sqrt__");
            tFormula = tFormula.replace(/(kvr)/gi,"__sqrt__");
            dObj._2_sqrt_done = tFormula;

            // Removing units that may cause trouble
            tFormula = tFormula.replace(/(m2||m3||t2||t3||c2||c3)/gi,"");
            dObj._3_units_done = tFormula;

            // Removing text
            tFormula = tFormula.replace(/\D+[^\*\/\+\-]+[^\,\.]/gi,"");
            dObj._4_text_done = tFormula;

            // Removing language specific decimals
            if(Language.defaultLang === "no_NB") {
                tFormula = tFormula.replace(/(\.)/gi,"");
                tFormula = tFormula.replace(/(\,)/gi,".");
            } else {
                tFormula = tFormula.replace(/(\,)/gi,"");           
            }
            dObj._5_lang_done = tFormula;

            // Re-applying Squareroot
            tFormula = tFormula.replace(/(__sqrt__)/g,"Math.sqrt");
            dObj._6_sqrt_done = tFormula;

            if(tFormula === "") {
                f.splice(i,1);
            } else {
                f[i] = tFormula;
            }
            dObj._7_splice_done = tFormula;
            console.log(dObj);
        }

        formula = "";
        for(var j = 0; j < f.length; j++) {
            formula += f[j];   
        }

        try {
            value = eval(formula);
        } 
        catch(err) {}
        return value === 0 ? 0 : value.toFixed(4);
    } else {
        return 0;
    }
}

I am not sure about any of the RegEx used in this function, hence why I am asking for help. For example, I am not sure if /(pi)/ is the right way to get the exact text "pi" and replace it with 3.141.

(I am using eval at the moment, but it's merely used for development)

Any help appreciated.

Edit:

The Formula I am trying to parse is a user input formula. Where he/she would type something like: 2/0.6 pcs of foo * pi bar + sqrt(4) foobar. Where I would want it to strip all the non-math letters and calculate the rest. Meaning the above formula would be interpreted as (2/0.6) * 3.141 + Math.sqrt(4) => 12.47

Edit 2:

e is a ExtJS object, passed through by a field in a grid, it contains the following variables:

  • colIdx (int)
  • column (Ext.grid.column.Column)
  • field (string)
  • grid (Ext.grid.Panel)
  • originalValue (string)
  • record (Ext.data.Model)
  • row (css selector)
  • rowIdx (int)
  • store (Ext.data.Store)
  • value (string)
  • view (Ext.grid.View)

Am currently unable to get the JSFiddle to work properly.

like image 683
GauteR Avatar asked Mar 23 '23 15:03

GauteR


1 Answers

It's probably easier to tokenize the expression you want to parse. When tokenized it's way easier to read that stream of tokens and build your own expressions.

I've put up a demo on jsFiddle which can parse your given formula

In the demo I used this Tokenizer class and tokens to build a TokenStream from the formula.

function Tokenizer() {
    this.tokens = {};
    // The regular expression which matches a token per group.
    this.regex = null;
    // Holds the names of the tokens. Index matches group. See buildExpression()
    this.tokenNames = [];
}

Tokenizer.prototype = {
    addToken: function(name, expression) {
        this.tokens[name] = expression;
    },

    tokenize: function(data) {
        this.buildExpression(data);
        var tokens = this.findTokens(data);
        return new TokenStream(tokens);
    },

    buildExpression: function (data) {
        var tokenRegex = [];
        for (var tokenName in this.tokens) {
            this.tokenNames.push(tokenName);
            tokenRegex.push('('+this.tokens[tokenName]+')');
        }

        this.regex = new RegExp(tokenRegex.join('|'), 'g');
    },

    findTokens: function(data) {
        var tokens = [];
        var match;

        while ((match = this.regex.exec(data)) !== null) {
            if (match == undefined) {
                continue;
            }

            for (var group = 1; group < match.length; group++) {
                if (!match[group]) continue;

                tokens.push({
                    name: this.tokenNames[group - 1],
                    data: match[group]
                });
            }
        }

        return tokens;
    }
}


TokenStream = function (tokens) {
    this.cursor = 0;
    this.tokens = tokens;
}
TokenStream.prototype = {
    next: function () {
        return this.tokens[this.cursor++];
    },
    peek: function (direction) {
        if (direction === undefined) {
            direction = 0;
        }

        return this.tokens[this.cursor + direction];
    }
}

Defined tokens

tokenizer.addToken('whitespace', '\\s+');
tokenizer.addToken('l_paren', '\\(');
tokenizer.addToken('r_paren', '\\)');
tokenizer.addToken('float', '[0-9]+\\.[0-9]+');
tokenizer.addToken('int', '[0-9]+');
tokenizer.addToken('div', '\\/');
tokenizer.addToken('mul', '\\*');
tokenizer.addToken('add', '\\+');
tokenizer.addToken('constant', 'pi|PI');
tokenizer.addToken('id', '[a-zA-Z_][a-zA-Z0-9_]*');

With the above tokens defined the tokenizer can recognize everything in your formula. When the formula

2/0.6 pcs of foo * pi bar + sqrt(4) foobar

is tokenized the result would be a token stream similar to

int(2), div(/), float(0.6), whitespace( ), id(pcs), whitespace( ), id(of), whitespace( ), id(foo), whitespace( ), mul(*), whitespace( ), constant(pi), whitespace( ), id(bar), whitespace( ), add(+), whitespace( ), id(sqrt), l_paren((), int(4), r_paren()), whitespace( ), id(foobar)
like image 104
Bart Avatar answered Mar 31 '23 16:03

Bart