diff options
-rw-r--r-- | Makefile | 14 | ||||
-rw-r--r-- | README.md | 5 | ||||
-rw-r--r-- | Regex.dcl | 38 | ||||
-rw-r--r-- | Regex.icl | 50 | ||||
-rw-r--r-- | cleanregex.c | 40 | ||||
-rw-r--r-- | cleanregex.h | 12 | ||||
-rw-r--r-- | regex.c | 38 | ||||
-rw-r--r-- | regex.h | 10 | ||||
-rw-r--r-- | test.icl | 6 |
9 files changed, 101 insertions, 112 deletions
@@ -1,13 +1,18 @@ CFLAGS=-O0 -Wall -std=c99 CLM=clm -CLMFLAGS=-I /opt/clean/lib/StdLib/ -l -lpcre2-8 +CLMFLAGS=-I $$CLEAN_HOME/lib/StdEnv \ + -I $$CLEAN_HOME/lib/clean-platform/OS-Independent \ + -I $$CLEAN_HOME/lib/Generics \ + -I $$CLEAN_HOME/lib/Dynamics \ + -dynamics -all: test Clean\ System\ Files/regex.o +all: test Clean\ System\ Files/cleanregex.o -Clean\ System\ Files/regex.o: regex.c regex.h +Clean\ System\ Files/cleanregex.o: cleanregex.c cleanregex.h + mkdir -p Clean\ System\ Files $(CC) $(CFLAGS) -c $< -o "$@" -test: test.icl Regex.dcl Regex.icl Clean\ System\ Files/regex.o +test: test.icl Regex.dcl Regex.icl Clean\ System\ Files/cleanregex.o $(CLM) $(CLMFLAGS) $@ -o $@ run_test: test @@ -17,4 +22,3 @@ clean: rm -frv Clean\ System\ Files test .PHONY: all clean run_test - @@ -1,8 +1,9 @@ # CleanRegex -A PCRE-compatible regular expression library for Clean +A POSIX regular expression library for Clean ## Status -Currently, this is a wrapper for the C library. Ideally, at some point this would be pure Clean for efficiency and provability reasons. +Currently, this is a wrapper for the C library. Ideally, at some point this +would be pure Clean for efficiency and provability reasons. This is a work in progress. @@ -1,6 +1,6 @@ definition module Regex -from StdMaybe import ::Maybe +from Data.Maybe import ::Maybe from StdOverloaded import class toString, class fromString, class zero :: Regex @@ -8,44 +8,18 @@ from StdOverloaded import class toString, class fromString, class zero :: Flag :== Int :: Flags :== Int +instance zero Flags + class toRegex a :: !Flags !a -> Maybe Regex class fromRegex a :: !Regex -> a instance toRegex String - instance fromRegex String -instance zero Flags - instance toString Regex instance fromString (Maybe Regex) -// Nothing if no match; otherwise Just i where i is the index of the match -match :: !Regex !String -> Maybe Int - -// From pcre2.h -Regex_ALLOW_EMPTY_CLASS :== 0x00000001 /* C */ -Regex_ALT_BSUX :== 0x00000002 /* C */ -Regex_AUTO_CALLOUT :== 0x00000004 /* C */ -Regex_CASELESS :== 0x00000008 /* C */ -Regex_DOLLAR_ENDONLY :== 0x00000010 /* J M D */ -Regex_DOTALL :== 0x00000020 /* C */ -Regex_DUPNAMES :== 0x00000040 /* C */ -Regex_EXTENDED :== 0x00000080 /* C */ -Regex_FIRSTLINE :== 0x00000100 /* J M D */ -Regex_MATCH_UNSET_BACKREF :== 0x00000200 /* C J M */ -Regex_MULTILINE :== 0x00000400 /* C */ -Regex_NEVER_UCP :== 0x00000800 /* C */ -Regex_NEVER_UTF :== 0x00001000 /* C */ -Regex_NO_AUTO_CAPTURE :== 0x00002000 /* C */ -Regex_NO_AUTO_POSSESS :== 0x00004000 /* C */ -Regex_NO_DOTSTAR_ANCHOR :== 0x00008000 /* C */ -Regex_NO_START_OPTIMIZE :== 0x00010000 /* J M D */ -Regex_UCP :== 0x00020000 /* C J M D */ -Regex_UNGREEDY :== 0x00040000 /* C */ -Regex_UTF :== 0x00080000 /* C J M D */ -Regex_NEVER_BACKSLASH_C :== 0x00100000 /* C */ -Regex_ALT_CIRCUMFLEX :== 0x00200000 /* J M D */ -Regex_ALT_VERBNAMES :== 0x00400000 /* C */ -Regex_USE_OFFSET_LIMIT :== 0x00800000 /* J M D */ +freeRegex :: !Regex -> String +// Nothing on error; otherwise True iff match +match :: !Regex !String -> Maybe Bool @@ -1,39 +1,45 @@ implementation module Regex import StdEnv -import StdMaybe -import code from "regex.o" +import Data.Maybe +import code from "cleanregex.o" + +instance zero Flags where zero = 0 :: Regex = { ptr :: Int // pointer to a pcre2_code object , str :: String // string representation } instance toRegex String -where toRegex flags s - # r = c_compile s flags - | r == 0 = Nothing - | otherwise = Just {ptr=r, str=s} - where - c_compile :: !String !Int -> Int - c_compile reg flags = code { - ccall cleanregex_pcre2_compile "SI:p" - } +where + toRegex flags s + # (ok,r) = c_compile s flags + | ok <> 0 = Nothing + | r == 0 = Nothing + | otherwise = Just {ptr=r, str=s} + where + c_compile :: !String !Int -> (!Int,!Int) + c_compile reg flags = code { + ccall cleanrgx_compile "SI:VIp" + } instance fromRegex String where fromRegex {str} = str instance toString Regex where toString r = fromRegex r instance fromString (Maybe Regex) where fromString s = toRegex zero s -instance zero Flags where zero = 0 - -match :: !Regex !String -> Maybe Int -match {ptr} s -# res = match` ptr s -| res < 0 = Nothing -| otherwise = Just res +freeRegex :: !Regex -> String +freeRegex {ptr,str} = free ptr str where - match` :: !Int !String -> Int - match` _ _ = code { - ccall cleanregex_match "pS:I" - } + free :: !Int !String -> String + free ptr pass = code { + ccall cleanrgx_free "p:V:S" + } +match :: !Regex !String -> Maybe Bool +match {ptr} s = case match` ptr s of 0 = Just False; 1 = Just True; _ = Nothing +where + match` :: !Int !String -> Int + match` ptr s = code { + ccall cleanrgx_exec "pS:I" + } diff --git a/cleanregex.c b/cleanregex.c new file mode 100644 index 0000000..20f3154 --- /dev/null +++ b/cleanregex.c @@ -0,0 +1,40 @@ +#include "cleanregex.h" +#include <regex.h> +#include <stdio.h> +#include <string.h> + +char* clstocs(CleanString* cs) { + char* s = calloc(CleanStringLength(cs) + 1, 1); + uint8_t i; + for (i = 0; i < CleanStringLength(cs); i++) + s[i] = CleanStringCharacters(cs)[i]; + s[i] = 0; + return s; +} + +void cleanrgx_compile( + CleanString* cs, int64_t flags, + int64_t* re_code, int64_t* re_regex) { + char* s = (char*) clstocs(cs); + regex_t* regex = malloc(sizeof(regex_t*)); + *re_code = regcomp(regex, s, flags); + *re_regex = (int64_t) regex; + free(s); +} + +int64_t cleanrgx_exec(int64_t* rgx, CleanString* cs) { + char* s = (char*) clstocs(cs); + int64_t result = regexec((regex_t*) rgx, s, 0, NULL, 0); + free(s); + if (!result) { + return 1; + } else if (result == REG_NOMATCH) { + return 0; + } else { + return -1; + } +} + +void cleanrgx_free(regex_t* rgx) { + regfree(rgx); +} diff --git a/cleanregex.h b/cleanregex.h new file mode 100644 index 0000000..397eab8 --- /dev/null +++ b/cleanregex.h @@ -0,0 +1,12 @@ +#include <inttypes.h> +#include <stdlib.h> +#include <regex.h> +#include "Clean.h" + +void cleanrgx_compile( + CleanString*, int64_t flags, + int64_t* re_code, int64_t* re_regex); + +int64_t cleanrgx_exec(int64_t* rgx, CleanString* cs); + +void cleanrgx_free(regex_t* rgx); diff --git a/regex.c b/regex.c deleted file mode 100644 index 8d46a31..0000000 --- a/regex.c +++ /dev/null @@ -1,38 +0,0 @@ -#include "regex.h" -#include <stdio.h> -#include <string.h> - -char* clstocs(CleanString* cs) { - char* s = calloc(CleanStringLength(cs) + 1, 1); - uint8_t i; - for (i = 0; i < CleanStringLength(cs); i++) - s[i] = CleanStringCharacters(cs)[i]; - s[i] = 0; - return s; -} - -pcre2_code* cleanregex_pcre2_compile(CleanString* cs, int64_t flags) { - uint8_t* s = (uint8_t*) clstocs(cs); - int error; PCRE2_SIZE offset; - pcre2_code* code = pcre2_compile(s, PCRE2_ZERO_TERMINATED, flags, - &error, &offset, NULL); - if (code) - return code; - else - return NULL; -} - -int64_t cleanregex_match(pcre2_code* re, CleanString* sub) { - pcre2_match_data *data = pcre2_match_data_create_from_pattern(re, NULL); - uint8_t* csub = (uint8_t*) clstocs(sub); - unsigned long len = CleanStringLength(sub); - int match = pcre2_match(re, csub, len, 0, 0, data, NULL); - if (match < 0) { - pcre2_match_data_free(data); - return match; - } - PCRE2_SIZE* ovector = pcre2_get_ovector_pointer(data); - pcre2_match_data_free(data); - return ovector[0]; -} - diff --git a/regex.h b/regex.h deleted file mode 100644 index af8e0a2..0000000 --- a/regex.h +++ /dev/null @@ -1,10 +0,0 @@ -#define PCRE2_CODE_UNIT_WIDTH 8 - -#include <inttypes.h> -#include <pcre2.h> -#include <stdlib.h> -#include "Clean.h" - -pcre2_code* cleanregex_pcre2_compile(CleanString*, int64_t flags); -int64_t cleanregex_match(pcre2_code* regex, CleanString* subject); - @@ -2,9 +2,9 @@ module test import StdEnv import Regex -import StdMaybe +import Data.Maybe -Start = (match rgx "123a123", toString rgx) +Start = map (match rgx) ["hello", "abc"] where - (Just rgx) = toRegex Regex_CASELESS "[a-z]\\d+" + (Just rgx) = toRegex 0 "^a[[:alnum:]]" |