/*
 *  regexp.sli
 *
 *  This file is part of NEST.
 *
 *  Copyright (C) 2004 The NEST Initiative
 *
 *  NEST is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  NEST is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with NEST.  If not, see <http://www.gnu.org/licenses/>.
 *
 */

/regexp /SLI ($Revision: 9988 $) provide-component
/regexp /C++ (1.3) require-component


/* BeginDocumentation
Name: regcomp - Create a regular expression 
Synopsis: string integer regcomp -> regex
          string         regcomp -> regex
Description:
regcomp will prepare a regular expression to be used with regexec.
Any allowed flags are found in dictionary regexdict.
Parameters: in:  string: defining the regular expression,
                 integer: flag, see Remarks. If in doubt, choose second
                          call of regcomp which presets integer to REG_EXTENDED.
           out:  the regular expression object

Examples: 1) (.*) regcomp -> <regextype>
          2) regexdict begin 
             (\() REG_EXTENDED regcomp -> <ERROR>
Diagnostics: 
If the string cannot be converted to a regular expression,
an error message is displayed and /InvalidRegexError is raised.

Variants:
The variant "regcomp_" never raises an error, but returns:
 -> regex true
 -> regex integer false
In case of an error, the regex error code is returned as an integer.
This error code can be translated to a string using ":regerror" (see there).

Author: Diesmann & Hehl, R Kupper (added error handling)

FirstVersion: 27.9.99

Remarks: See man regcomp for futher details on POSIX regcomp.

SeeAlso: regexec, :regerror
*/ 

/regcomp [/stringtype /integertype]
{
  % The regcomp_ function does not itself raise an error.
  % In case of an error, it returns an error code and false.
  % This error code can be decoded by :regerror.
  %
  % Synopsis: string integer regcomp_ -> regex true  
  %                                      regex integer false
  %           string regcomp          -> regex true
  %                                      regex integer false
  regcomp_ not
  {
    :regerror % leaves a string on stack
    M_ERROR (regcomp) rolld message
    /regcomp /InvalidRegexError raiseerror
  } if
} def

/regcomp [/stringtype] {regexdict /REG_EXTENDED get regcomp} def


/* BeginDocumentation
Name: :regerror - return message of what went wrong with "regcomp_"
Synopsis: regex integer -> string
Description: :regerror will decode the integer error code and return the related 
      error description.
Parameters: in:  regex: a regular expression generated by "regcomp_"
                 integer code of what went wrong.
           out:  string of error description.

Examples:  preparation: 
           (\() regcomp_     -> <regextype> 8 False
           pop
           now there's a wrong regex and an errorcode on the stack
           :regerror = -> Unmatched ( or \(
           
Bugs: no known ;-)
Diagnostics: no errors raised - this _is_ a command to check an error!
Author: Diesmann & Hehl
FirstVersion: 27.9.99

Remarks:
See man regerror for further details on POSIX regerror.
Note that the command "regcomp" performs automatic error checking. You should
probably use "regcomp", not "regcomp_".

SeeAlso: regexec, regcomp
*/ 

/:regerror trie
  [/regextype /integertype] /regerror_ load addtotrie
def

/* BeginDocumentation

Name: regexdict - dictionary with flags and codes for regular expressions
Synopsis: -
Description: This dictionary provides flags and codes signaling 
    properties of or results from regular expression evaluations.
SeeAlso: regexec
*/

/* BeginDocumentation
Name: regexec - compare string and regular expression 
Synopsis: regex string integer integer -> array integer
          regex string 0       integer -> integer

Description: regexec evaluates the given regular expression and looks whether it 
    is contained within a given string. It returns failure/success and the 
    offsets of the position where the regular expression was matched. 
    The same is true for any paranthesized subexpression: Those offsets are
    stored in an array up to the chosen depth.
    Flags and other codes REG_... are found in dictionary regexdict.
Parameters: in: string is the string where te regular expression should be 
                looked for; regex is a regular expression generated by regcomp
                first integer is the depth of paranthesized subexpression offsets
                that should be returned,
                second integer is a flag, for details see POSIX regexec
                (set to zero if in doubt!)
           out: integer is 0 if it matched, nonzero else (POSIX regexec error code)
                array: an array of [beginoffset,endoffset] arrays indicating
                the matches of the regex an any paranthesized subregexes within
                the string.
 
Examples:  (remember regexdict begin for these examples!)
            1) (simple) REG_EXTENDED regcomp -> <regextype>
               (   simple   ) 0 0 regexec -> 0  %this 0 indicating success
            2)  (simple) REG_EXTENDED regcomp -> <regextype>
               (   simple   ) 1 0 regexec -> [[3 9]] 0
               % this 3,9 are begin and end offsets of matched regex!
            3) ((paranthesize)+.*example) regcomp -> <regextype>
               (This is a paranthesize using example) 2 0 regexec -> 0 [[10 36] [10 22]]
 
Bugs: no knwon ;-)
Diagnostics: no errors raised
Author: Diesmann & Hehl
FirstVersion: 27.9.99
Remarks: See man regexec for further details on POSIX regexec.
SeeAlso: regexec, regcomp
*/ 

/regexec trie
 [/regextype /stringtype /integertype /integertype] /regexec_ load addtotrie
def

/* BeginDocumentation
Name: regex_find_sf - Check if a regex is included in a stream 
Synopsis: string istream -> boolean
Description: Takes the first argument. Converts to regex
   and calls regexec to find out if this regex matches the
   stream. Reports success/failure in a boolean true/false.
Parameters: in: first argument  : a string which will be converted
                                  to a regex by a regcomp call.
                second argument : an istream where this
                                  regex should be matched.
           out: true/false telling if there is/is no match. 
Examples: See examples of regex_find, exchange second string with a file.
Bugs: - 
Diagnostics: Will raise an /InvalidRegexError if regcomp cannot
    compile the regex. Try immidiate 
    :regerror = to find out why!
Author: Hehl
FirstVersion: 1.10.99
Remarks: Does _not_ return any information about the matched
         expression more than matched/not matched; use lower
         level commands regcomp, regexec if in need!
SeeAlso: regexec, regcomp, regex_replace
*/ 

/regex_find_sf
{
   exch regcomp  
   exch regex_find_rf
} bind def

/* BeginDocumentation
Name: regex_find_rf - Check if a regex is included in a stream 
Synopsis: regex istream -> boolean
Description: Takes the first argument. Calls regexec to find out
             if this regex matches the stream. Reports 
             success/failure in a boolean true/false.
Parameters: in: first argument  : a regex generated by regcomp
                second argument : an istream where this
                                  regex should be matched.
           out: true/false telling if there is a/is no match. 
Examples: See examples of regex_find, exchange second string with a file.
Bugs: - 
Diagnostics: no errors raised
Author: Hehl
FirstVersion: 1.10.99
Remarks: Does _not_ return any information about the matched
         expression more than matched/not matched; use lower
         level commands regcomp, regexec if in need!
SeeAlso: regexec, regcomp, regex_replace
*/ 

/regex_find_rf
{
  << >> begin 
    /Where Set
    /TheRegex Set
    /regex_found false def
    { 
      Where getline not {pop exit} if
      exch pop TheRegex exch regex_find_r
      {/regex_found true def exit} if
    } loop
    regex_found
  end
} bind def

/* BeginDocumentation
Name: regex_find_s - Check if a regex is included in a string
Synopsis: string string  -> boolean
Description: Takes the first argument. Converts to regex and calls
    regexec to find out if this regex matches the string. Reports
    success/failure in a boolean true/false.
Parameters: in: first argument  :a string which will be converted
                                 to a regex by a regcomp call.
                second argument : a string where this
                                  regex should be matched.
           out: true/false telling if there is/is no match. 
Examples: (hello) (is there a hello hiding) regex_find -> true
          (hello) (is there a HeLlO hiding) regex_find -> false
Bugs: - 
Diagnostics: Will raise an /InvalidRegexError if regcomp cannot 
             compile the regex. Try immidiate 
             :regerror = to find out why!
Author: Hehl
FirstVersion: 1.10.99
Remarks: Compiles regex and calls regex_find_s.
         Does _not_ return any information about the matched
         expression more than matched/not matched; use lower
         level commands regcomp, regexec if in need!
SeeAlso: regexec, regcomp, regex_replace
*/ 

/regex_find_s
{
  exch regcomp 
  exch regex_find_r
} bind def

/* BeginDocumentation
Name: regex_find_r - Check if a regex is included in a string
Synopsis: regex  string  -> boolean
Description: Takes the first argument and calls regexec to find out if
       this regex matches the string. Reports success/failure in a
       boolean true/false.
Parameters: in: first argument  : a regex generated by regcomp
                second argument : a string where this
                                  regex should be matched.
           out: true/false telling if there is/is no match. 
Examples: (hello) regexdict /REG_ICASE get regcomp pop
          (is there a HeLlO hiding)         regex_find -> true 
Bugs: - 
Diagnostics: no errors raised
Author: Hehl
FirstVersion: 1.10.99
Remarks: Does _not_ return any information about the matched
         expression more than matched/not matched; use lower
         level commands regcomp, regexec if in need!
SeeAlso: regexec, regcomp, regex_replace
*/ 

/regex_find_r
{
  0 0 regexec
  0 eq {true}{false}ifelse
} bind def

/* BeginDocumentation
Name: regex_find - Check if a regex is included in a string or stream 
Synopsis: string istream -> boolean
          string string  -> boolean
          regex  istream -> boolean
          regex  string  -> boolean
Description: Takes the first argument. Converts to regex, if
   necessary, and calls regexec to find out if this regex matches the
   string/stream. Reports success/failure in a boolean true/false.
Parameters: in: first argument  : a regex generated by regcomp
                               OR a string which will be converted
                                 to a regex by a regcomp call.
                second argument : an istream or a string where this
                                  regex should be matched.
           out: true/false telling if there is/is no match. 
Examples: (hello) (is there a hello hiding) regex_find -> true
          (hello) (is there a HeLlO hiding) regex_find -> false
          
          (hello) regexdict /REG_ICASE get regcomp pop
          (is there a HeLlO hiding)         regex_find -> true 
Bugs: - 
Diagnostics: If called with a string as first argument, will raise an
    /InvalidRegexError if regcomp cannot compile the regex. Try
    immidiate :regerror = to find out why!
Author: Hehl
FirstVersion: 1.10.99
Remarks: Does _not_ return any information about the matched
         expression more than matched/not matched; use lower
         level commands regcomp, regexec if in need!
SeeAlso: regexec, regcomp, regex_replace
*/ 

/regex_find trie
  [/stringtype /istreamtype] /regex_find_sf load addtotrie
  [/regextype /istreamtype] /regex_find_rf load addtotrie
  [/stringtype /stringtype] /regex_find_s load addtotrie
  [/regextype /stringtype] /regex_find_r load addtotrie
def

/regex_replace_sf
{
  4 -1 roll
  regcomp 
  4 1 roll regex_replace_rf
} bind def

/regex_replace_rf
{
  << >> begin
    /DestFile Set
    /SourceFile Set
    /ReplaceString Set
    /TheRegex Set
    {
      SourceFile getline not {pop exit} if
      exch pop
      TheRegex ReplaceString 3 -1 roll regex_replace
      DestFile exch <- endl ;
    } loop
  end
} bind def

/regex_replace_s
{  3 -1 roll
  regcomp 
  3 1 roll regex_replace_r
} bind def

/regex_replace_r
{
  << >> begin
    /SourceString Set
    /ReplaceString Set
    /Regex Set
    /DestString () def
    {
      Regex SourceString 1 0 regexec 0 eq not
      {pop exit}
      { 
         0 get /offsets Set
         SourceString offsets 0 get 
         SourceString length offsets 0 get sub
         erase_s ReplaceString join_s
         DestString exch join /DestString Set
         SourceString 0 offsets 1 get erase_s
         /SourceString Set
      } ifelse
    } loop 
    DestString SourceString join_s
  end 
} bind def

/* BeginDocumentation
Name: regex_replace - replace all occurences of a regex
Synopsis: string string istreamtype ostreamtype -> -
          regex  string istreamtype ostreamtype -> -
          string string string -> string
          regex  string string -> string
Description: regex_replace tries to match the regex in
  istream/string.  Any occurence of regex is replaced by 
  the given string.
Parameters: in: first argument  : a regex generated by regcomp
                               OR a string which will be converted
                                  to a regex by a regcomp call.
                second argument : the string with which regex should
                                  be replaced.
                third argument  : an istream or a string where this
                                  regex should be matched.
                fourth argument (if any): an ostream where the changes
                                  are saved.
           out: either a string or, if called with streams, the
                                ostream will now contain replaced stream.

Examples: 
Bugs: 
Diagnostics: if called with a string as first argument, will raise an
   /InvalidRegexError if regcomp fails to compile a regex.
Author: Hehl
FirstVersion: 4.10.99
Remarks: 
SeeAlso: regexec, regcomp, regex_find
*/ 

/regex_replace trie
  [/stringtype /stringtype /istreamtype /ostreamtype] 
    /regex_replace_sf load addtotrie
  [/regextype /stringtype /istreamtype/ostreamtype] 
    /regex_replace_rf load addtotrie
  [/stringtype /stringtype /stringtype] /regex_replace_s load addtotrie
  [/regextype /stringtype /stringtype] /regex_replace_r load addtotrie
def



/* BeginDocumentation
Name: grep - extract lines matching a regular expression pattern

Synopsis: 
               (filename) (expression) grep -> [(line1) (line2) ...]
[(string1) (string2) ...] (expression) grep -> [(match1) (match2) ...]

Description:
"grep" is similar to the Unix command "grep".
It performs a regular expression match, either on the lines of a
file, or on the strings in an array of strings. A valid regular
expression must be passed as a string as second argument.

"grep" returns an array of matching lines or strings. It returns the
full lines or strings that matched. If not match was found, the
empty array is returned.

Parameters:
(filename)      - name of the file to search for the pattern
[(string1) ...] - array of strings to search for the pattern
[(match1)  ...] - result: array of matching strings

Options: 
By default, "grep" uses extended regular expressions and performs
case sensitive matching on the single lines or strings. This
behaviour can be customized via the "SetOptions" command.

"grep" has the following options that can be set via the
"SetOptions" command:

 /flags_regcomp (integer) - flags passed to the "regcomp" command.
                            Must be the logical AND of any of
                            regexdict::REG_{EXTENDED,ICASE,NOSUB,NEWLINE}.
                            Default: REG_EXTENDED

 /flags_regexec (integer) - flags passed to the "regexec" command.
                            Must be the logical AND of any of
                            regexdict::REG_{NOTBOL,NOTEOL}.
                            Default: 0

Please read the Unix manpage for "regcomp" for explanation of these
flags.

To perform case insensitive matches, add regexdict::REG_ICASE to
/flags_regcomp.
                           
Examples:
[(hello) (world)] (^hell)       grep -> [(hello)]
[(hello) (world)] (not in here) grep -> []
[(hello) (world)] ()            grep -> [(hello) (world)]
statusdict /prgdocdir get (LICENSE) joinpath (http) grep -> [(or visit http://www.nest-initiative.org)]

Diagnostics:
Raises /InvalidRegexError if the expression string is not a valid
regular expression.

Author: R Kupper

FirstVersion: 23-jul-2008

Availability: standard SLI

References:
Unix manpage for "regcomp"

SeeAlso: regex_find, regcomp, regexec, regexdict
*/

/grep << /flags_regcomp regexdict/REG_EXTENDED ::
         /flags_regexec 0                         >> Options

/grep[/stringtype /filename
      /stringtype /regexstr]
{
  /regex  regexstr  /grep /flags_regcomp GetOption  regcomp  def
  /result [] def
  
  filename (r) file
  {
    getline not {exit} if
    /line Set      
    regex line 0  /grep /flags_regexec GetOption  regexec
    0 eq {/result result line append def} if      
  } loop
  pop % the stream
  result  
} SLIFunctionWrapper

/grep[/arraytype /a
      /stringtype /regexstr]
{
  /regex  regexstr  /grep /flags_regcomp GetOption  regcomp  def
  /result [] def
  
  a {
    /line Set      
    regex line 0  /grep /flags_regexec GetOption  regexec
    0 eq {/result result line append def} if      
  } forall
  result  
} SLIFunctionWrapper