I am looking for a way to use Regex Replace functions on IBM iseries.
As far as i know, i can use C++ librairies (regex.h) (source) With this, i can only match regex, but not replace. (using regcomp() to compile and regexec() to match the regex)
Does anyone know a way to do it ?
It's true that the C/C++ POSIX regular expression library doesn't have a built in regexp replace function, but you can accomplish the same thing using positional information from regexec() and the RPGLE %replace() built in function. (I'm assuming you're going to use RPGLE but you could use another language.)
For example, if you wanted to mask all but the last four digits of a phone number you could do this:
  /include qcpysrc,regex_h
 d regex_phone_number...
 d                 ds                  inz likeds(regex_t)
 d dsrm            ds                  inz likeds(regmatch_t) dim(20)
 d data            s             52a   inz varying
 d pattern         s            256a   inz varying
 d rc              s             10i 0 inz(0)
  /FREE
   *inlr = *on ;
   data = 'My phone #''s are: (444) 555 - 6666 and 777.888.9999' ;
   dsply data ;
   pattern = '\(?([0-9]{3})[ .)]*([0-9]{3})[ .-]*([0-9]{4})' ;
   rc = regcomp(regex_phone_number :pattern :REG_EXTENDED) ;
   if rc = 0 ;
     dow '1' ;
       rc = regexec(regex_phone_number :data
              :regex_phone_number.re_nsub  :%addr(dsrm) :0) ;
       if rc <> 0 ;
         leave ;
       endif ;
       data = %replace('***': data :dsrm(2).rm_so+1
                :dsrm(2).rm_eo - dsrm(2).rm_so) ;
       data = %replace('***': data :dsrm(3).rm_so+1
                :dsrm(3).rm_eo - dsrm(3).rm_so) ;
     enddo ;
   endif ;
   dsply data ;
   regfree(regex_phone_number) ;
  /END-FREE 
Here's what the copy book regex_h looks like:
  ** Header file for calling the "Regular Expression" functions
  **   provided by the ILE C Runtime Library from an RPG IV
  **   program.                 Scott Klement, 2001-05-04
  **                       Converted to qualified DS 2003-11-29
  **                       Modified by Jarrett Gilliam 2014-11-05
  **
  ** This copy book is for using the C regular expression library, regex.h, in RPG.
  ** You can go to http://www.regular-expressions.info/ to learn more about
  ** regular expressions. This regex flavor is POSIX ERE. You can go to
  ** http://www-01.ibm.com/support/knowledgecenter/ssw_ibm_i_71/rtref/regexec.htm
  ** to learn more about how the C functions work.
 d/if defined(REGEX_H)
 d/eof
 d/endif
 d/define REGEX_H
  **------------------------------------------------------------
  * cflags for regcomp()
  **------------------------------------------------------------
 d REG_BASIC       c                   CONST(0)
 d REG_EXTENDED    c                   CONST(1)
 d REG_ICASE       c                   CONST(2)
 d REG_NEWLINE     c                   CONST(4)
 d REG_NOSUB       c                   CONST(8)
  **------------------------------------------------------------
  * eflags for regexec()
  **------------------------------------------------------------
 d REG_NOTBOL      c                   CONST(256)
 d REG_NOTEOL      c                   CONST(512)
  **------------------------------------------------------------
  *  errors returned
  **------------------------------------------------------------
  * RE pattern not found
 d REG_NOMATCH     c                   CONST(1)
  * Invalid Regular Expression
 d REG_BADPAT      c                   CONST(2)
  * Invalid collating element
 d REG_ECOLLATE    c                   CONST(3)
  * Invalid character class
 d REG_ECTYPE      c                   CONST(4)
  * Last character is \
 d REG_EESCAPE     c                   CONST(5)
  * Invalid number in \digit
 d REG_ESUBREG     c                   CONST(6)
  * imbalance
 d REG_EBRACK      c                   CONST(7)
  * \( \) or () imbalance
 d REG_EPAREN      c                   CONST(8)
  * \{ \} or { } imbalance
 d REG_EBRACE      c                   CONST(9)
  * Invalid \{ \} range exp
 d REG_BADBR       c                   CONST(10)
  * Invalid range exp endpoint
 d REG_ERANGE      c                   CONST(11)
  * Out of memory
 d REG_ESPACE      c                   CONST(12)
  * ?*+ not preceded by valid RE
 d REG_BADRPT      c                   CONST(13)
  * invalid multibyte character
 d REG_ECHAR       c                   CONST(14)
  * (shift 6 caret or not) anchor and not BOL
 d REG_EBOL        c                   CONST(15)
  * $ anchor and not EOL
 d REG_EEOL        c                   CONST(16)
  * Unknown error in regcomp() call
 d REG_ECOMP       c                   CONST(17)
  * Unknown error in regexec() call
 d REG_EEXEC       c                   CONST(18)
  **------------------------------------------------------------
  *  Structure of a compiled regular expression:
  **------------------------------------------------------------
 d REG_SUBEXP_MAX  c                   20
 d regex_t         ds                  qualified align based(template)
 d   re_nsub                     10i 0
 d   re_comp                       *
 d   re_cflags                   10i 0
 d   re_erroff                   10i 0
 d   re_len                      10i 0
 d   re_ucoll                    10i 0 dim(2)
 d   re_lsub                       *   DIM(REG_SUBEXP_MAX)
 d   re_esub                       *   DIM(REG_SUBEXP_MAX)
 d   re_map                     256a
 d   re_shift                     5i 0
 d   re_dbcs                      5i 0
  **------------------------------------------------------------
  *  structure used to report matches found by regexec()
  **------------------------------------------------------------
 d regmatch_t      ds                  qualified align based(template)
 d   rm_so                       10i 0
 d   rm_ss                        5i 0
 d   rm_eo                       10i 0
 d   rm_es                        5i 0
  **------------------------------------------------------------
  * regcomp() -- Compile a Regular Expression ("RE")
  *
  *     int regcomp(regex_t *preg, const char *pattern,
  *              int cflags);
  *
  * where:
  *       preg (output) = the compiled regular expression.
  *    pattern (input)  = the RE to be compiled.
  *     cflags (input)  = the sum of the cflag constants
  *                       (listed above) for this RE.
  *
  * Returns 0 = success, otherwise an error number.
  **------------------------------------------------------------
 d regcomp         pr            10i 0 extproc('regcomp')
 d   preg                              like(regex_t)
 d   pattern                       *   value options(*string)
 d   cflags                      10i 0 value
  **------------------------------------------------------------
  * regexec() -- Execute a compiled Regular Expression ("RE")
  *
  *     int regexec(const regex_t *preg, const char *string,
  *              size_t nmatch, regmatch_t *pmatch, int eflags);
  *
  * where:
  *       preg (input)  = the compiled regular expression
  *                       (the output of regcomp())
  *     string (input)  = string to run the RE upon
  *     nmatch (input)  = the number of matches to return.
  *     pmatch (output) = array of regmatch_t DS's
  *                       showing what matches were found.
  *     eflags (input)  = the sum of the flags (constants
  *                       provided above) modifying the RE
  *
  * Returns 0 = success, otherwise an error number.
  **------------------------------------------------------------
 d regexec         pr            10i 0 extproc('regexec')
 d   preg                              like(regex_t) const
 d   string                        *   value options(*string)
 d   nmatch                      10u 0 value
 d   pmatch                        *   value
 d   eflags                      10i 0 value
  **------------------------------------------------------------
  * regerror() -- return error information from regcomp/regexec
  *
  *   size_t regerror(int errcode, const regex_t *preg,
  *              char *errbuf, size_t errbuf_size);
  *
  *  where:
  *    errcode (input)  = the error code to return info on
  *                      (obtained as the return value from
  *                      either regcomp() or regexec())
  *       preg (input)  = the (compiled) RE to return the
  *                      error for.
  *     errbuf (output) = buffer containing human-readable
  *                      error message.
  * errbuf_size (input) = size of errbuf (max length of msg
  *                      that will be returned)
  *
  * returns:  length of buffer needed to get entire error msg
  **------------------------------------------------------------
 d regerror        pr            10u 0 extproc('regerror')
 d   errcode                     10i 0 value
 d   preg                              like(regex_t) const
 d   errbuf                        *   value
 d   errbuf_size                 10i 0 value
  **------------------------------------------------------------
  * regfree() -- free memory locked by Regular Expression
  *
  *    void regfree(regex_t *preg);
  *
  *   where:
  *        preg (input) = regular expression to free mem for.
  *
  *   NOTE:  regcomp() will always allocate extra memory
  *        to be pointed to by the various pointers in
  *        the regex_t structure.  if you don't call this,
  *        that memory will never be returned to the system!
  **------------------------------------------------------------
 d regfree         pr                  extproc('regfree')
 d   preg                              like(regex_t)
Here's the output:
DSPLY  My phone #'s are: (444) 555 - 6666 and 777.888.9999
DSPLY  My phone #'s are: (***) *** - 6666 and ***.***.9999
The code could be improved by extracting the replace logic and putting it in a Procedure of it's own, creating a custom regexp replace function based on the POSIX library but it's not absolutely necessary.
The ILE C/C++ runtime library does not have a regex replace function available.
Java, however, has excellent support for regular expressions and integrates easily with RPGLE.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With