aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile14
-rw-r--r--README.md5
-rw-r--r--Regex.dcl38
-rw-r--r--Regex.icl50
-rw-r--r--cleanregex.c40
-rw-r--r--cleanregex.h12
-rw-r--r--regex.c38
-rw-r--r--regex.h10
-rw-r--r--test.icl6
9 files changed, 101 insertions, 112 deletions
diff --git a/Makefile b/Makefile
index 4babffe..3ae9509 100644
--- a/Makefile
+++ b/Makefile
@@ -1,13 +1,18 @@
CFLAGS=-O0 -Wall -std=c99
CLM=clm
-CLMFLAGS=-I /opt/clean/lib/StdLib/ -l -lpcre2-8
+CLMFLAGS=-I $$CLEAN_HOME/lib/StdEnv \
+ -I $$CLEAN_HOME/lib/clean-platform/OS-Independent \
+ -I $$CLEAN_HOME/lib/Generics \
+ -I $$CLEAN_HOME/lib/Dynamics \
+ -dynamics
-all: test Clean\ System\ Files/regex.o
+all: test Clean\ System\ Files/cleanregex.o
-Clean\ System\ Files/regex.o: regex.c regex.h
+Clean\ System\ Files/cleanregex.o: cleanregex.c cleanregex.h
+ mkdir -p Clean\ System\ Files
$(CC) $(CFLAGS) -c $< -o "$@"
-test: test.icl Regex.dcl Regex.icl Clean\ System\ Files/regex.o
+test: test.icl Regex.dcl Regex.icl Clean\ System\ Files/cleanregex.o
$(CLM) $(CLMFLAGS) $@ -o $@
run_test: test
@@ -17,4 +22,3 @@ clean:
rm -frv Clean\ System\ Files test
.PHONY: all clean run_test
-
diff --git a/README.md b/README.md
index 9692ff7..148ec86 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,9 @@
# CleanRegex
-A PCRE-compatible regular expression library for Clean
+A POSIX regular expression library for Clean
## Status
-Currently, this is a wrapper for the C library. Ideally, at some point this would be pure Clean for efficiency and provability reasons.
+Currently, this is a wrapper for the C library. Ideally, at some point this
+would be pure Clean for efficiency and provability reasons.
This is a work in progress.
diff --git a/Regex.dcl b/Regex.dcl
index 0765320..f13fc68 100644
--- a/Regex.dcl
+++ b/Regex.dcl
@@ -1,6 +1,6 @@
definition module Regex
-from StdMaybe import ::Maybe
+from Data.Maybe import ::Maybe
from StdOverloaded import class toString, class fromString, class zero
:: Regex
@@ -8,44 +8,18 @@ from StdOverloaded import class toString, class fromString, class zero
:: Flag :== Int
:: Flags :== Int
+instance zero Flags
+
class toRegex a :: !Flags !a -> Maybe Regex
class fromRegex a :: !Regex -> a
instance toRegex String
-
instance fromRegex String
-instance zero Flags
-
instance toString Regex
instance fromString (Maybe Regex)
-// Nothing if no match; otherwise Just i where i is the index of the match
-match :: !Regex !String -> Maybe Int
-
-// From pcre2.h
-Regex_ALLOW_EMPTY_CLASS :== 0x00000001 /* C */
-Regex_ALT_BSUX :== 0x00000002 /* C */
-Regex_AUTO_CALLOUT :== 0x00000004 /* C */
-Regex_CASELESS :== 0x00000008 /* C */
-Regex_DOLLAR_ENDONLY :== 0x00000010 /* J M D */
-Regex_DOTALL :== 0x00000020 /* C */
-Regex_DUPNAMES :== 0x00000040 /* C */
-Regex_EXTENDED :== 0x00000080 /* C */
-Regex_FIRSTLINE :== 0x00000100 /* J M D */
-Regex_MATCH_UNSET_BACKREF :== 0x00000200 /* C J M */
-Regex_MULTILINE :== 0x00000400 /* C */
-Regex_NEVER_UCP :== 0x00000800 /* C */
-Regex_NEVER_UTF :== 0x00001000 /* C */
-Regex_NO_AUTO_CAPTURE :== 0x00002000 /* C */
-Regex_NO_AUTO_POSSESS :== 0x00004000 /* C */
-Regex_NO_DOTSTAR_ANCHOR :== 0x00008000 /* C */
-Regex_NO_START_OPTIMIZE :== 0x00010000 /* J M D */
-Regex_UCP :== 0x00020000 /* C J M D */
-Regex_UNGREEDY :== 0x00040000 /* C */
-Regex_UTF :== 0x00080000 /* C J M D */
-Regex_NEVER_BACKSLASH_C :== 0x00100000 /* C */
-Regex_ALT_CIRCUMFLEX :== 0x00200000 /* J M D */
-Regex_ALT_VERBNAMES :== 0x00400000 /* C */
-Regex_USE_OFFSET_LIMIT :== 0x00800000 /* J M D */
+freeRegex :: !Regex -> String
+// Nothing on error; otherwise True iff match
+match :: !Regex !String -> Maybe Bool
diff --git a/Regex.icl b/Regex.icl
index 87ab703..e85c630 100644
--- a/Regex.icl
+++ b/Regex.icl
@@ -1,39 +1,45 @@
implementation module Regex
import StdEnv
-import StdMaybe
-import code from "regex.o"
+import Data.Maybe
+import code from "cleanregex.o"
+
+instance zero Flags where zero = 0
:: Regex = { ptr :: Int // pointer to a pcre2_code object
, str :: String // string representation
}
instance toRegex String
-where toRegex flags s
- # r = c_compile s flags
- | r == 0 = Nothing
- | otherwise = Just {ptr=r, str=s}
- where
- c_compile :: !String !Int -> Int
- c_compile reg flags = code {
- ccall cleanregex_pcre2_compile "SI:p"
- }
+where
+ toRegex flags s
+ # (ok,r) = c_compile s flags
+ | ok <> 0 = Nothing
+ | r == 0 = Nothing
+ | otherwise = Just {ptr=r, str=s}
+ where
+ c_compile :: !String !Int -> (!Int,!Int)
+ c_compile reg flags = code {
+ ccall cleanrgx_compile "SI:VIp"
+ }
instance fromRegex String where fromRegex {str} = str
instance toString Regex where toString r = fromRegex r
instance fromString (Maybe Regex) where fromString s = toRegex zero s
-instance zero Flags where zero = 0
-
-match :: !Regex !String -> Maybe Int
-match {ptr} s
-# res = match` ptr s
-| res < 0 = Nothing
-| otherwise = Just res
+freeRegex :: !Regex -> String
+freeRegex {ptr,str} = free ptr str
where
- match` :: !Int !String -> Int
- match` _ _ = code {
- ccall cleanregex_match "pS:I"
- }
+ free :: !Int !String -> String
+ free ptr pass = code {
+ ccall cleanrgx_free "p:V:S"
+ }
+match :: !Regex !String -> Maybe Bool
+match {ptr} s = case match` ptr s of 0 = Just False; 1 = Just True; _ = Nothing
+where
+ match` :: !Int !String -> Int
+ match` ptr s = code {
+ ccall cleanrgx_exec "pS:I"
+ }
diff --git a/cleanregex.c b/cleanregex.c
new file mode 100644
index 0000000..20f3154
--- /dev/null
+++ b/cleanregex.c
@@ -0,0 +1,40 @@
+#include "cleanregex.h"
+#include <regex.h>
+#include <stdio.h>
+#include <string.h>
+
+char* clstocs(CleanString* cs) {
+ char* s = calloc(CleanStringLength(cs) + 1, 1);
+ uint8_t i;
+ for (i = 0; i < CleanStringLength(cs); i++)
+ s[i] = CleanStringCharacters(cs)[i];
+ s[i] = 0;
+ return s;
+}
+
+void cleanrgx_compile(
+ CleanString* cs, int64_t flags,
+ int64_t* re_code, int64_t* re_regex) {
+ char* s = (char*) clstocs(cs);
+ regex_t* regex = malloc(sizeof(regex_t*));
+ *re_code = regcomp(regex, s, flags);
+ *re_regex = (int64_t) regex;
+ free(s);
+}
+
+int64_t cleanrgx_exec(int64_t* rgx, CleanString* cs) {
+ char* s = (char*) clstocs(cs);
+ int64_t result = regexec((regex_t*) rgx, s, 0, NULL, 0);
+ free(s);
+ if (!result) {
+ return 1;
+ } else if (result == REG_NOMATCH) {
+ return 0;
+ } else {
+ return -1;
+ }
+}
+
+void cleanrgx_free(regex_t* rgx) {
+ regfree(rgx);
+}
diff --git a/cleanregex.h b/cleanregex.h
new file mode 100644
index 0000000..397eab8
--- /dev/null
+++ b/cleanregex.h
@@ -0,0 +1,12 @@
+#include <inttypes.h>
+#include <stdlib.h>
+#include <regex.h>
+#include "Clean.h"
+
+void cleanrgx_compile(
+ CleanString*, int64_t flags,
+ int64_t* re_code, int64_t* re_regex);
+
+int64_t cleanrgx_exec(int64_t* rgx, CleanString* cs);
+
+void cleanrgx_free(regex_t* rgx);
diff --git a/regex.c b/regex.c
deleted file mode 100644
index 8d46a31..0000000
--- a/regex.c
+++ /dev/null
@@ -1,38 +0,0 @@
-#include "regex.h"
-#include <stdio.h>
-#include <string.h>
-
-char* clstocs(CleanString* cs) {
- char* s = calloc(CleanStringLength(cs) + 1, 1);
- uint8_t i;
- for (i = 0; i < CleanStringLength(cs); i++)
- s[i] = CleanStringCharacters(cs)[i];
- s[i] = 0;
- return s;
-}
-
-pcre2_code* cleanregex_pcre2_compile(CleanString* cs, int64_t flags) {
- uint8_t* s = (uint8_t*) clstocs(cs);
- int error; PCRE2_SIZE offset;
- pcre2_code* code = pcre2_compile(s, PCRE2_ZERO_TERMINATED, flags,
- &error, &offset, NULL);
- if (code)
- return code;
- else
- return NULL;
-}
-
-int64_t cleanregex_match(pcre2_code* re, CleanString* sub) {
- pcre2_match_data *data = pcre2_match_data_create_from_pattern(re, NULL);
- uint8_t* csub = (uint8_t*) clstocs(sub);
- unsigned long len = CleanStringLength(sub);
- int match = pcre2_match(re, csub, len, 0, 0, data, NULL);
- if (match < 0) {
- pcre2_match_data_free(data);
- return match;
- }
- PCRE2_SIZE* ovector = pcre2_get_ovector_pointer(data);
- pcre2_match_data_free(data);
- return ovector[0];
-}
-
diff --git a/regex.h b/regex.h
deleted file mode 100644
index af8e0a2..0000000
--- a/regex.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#define PCRE2_CODE_UNIT_WIDTH 8
-
-#include <inttypes.h>
-#include <pcre2.h>
-#include <stdlib.h>
-#include "Clean.h"
-
-pcre2_code* cleanregex_pcre2_compile(CleanString*, int64_t flags);
-int64_t cleanregex_match(pcre2_code* regex, CleanString* subject);
-
diff --git a/test.icl b/test.icl
index 5346aca..a89ffe4 100644
--- a/test.icl
+++ b/test.icl
@@ -2,9 +2,9 @@ module test
import StdEnv
import Regex
-import StdMaybe
+import Data.Maybe
-Start = (match rgx "123a123", toString rgx)
+Start = map (match rgx) ["hello", "abc"]
where
- (Just rgx) = toRegex Regex_CASELESS "[a-z]\\d+"
+ (Just rgx) = toRegex 0 "^a[[:alnum:]]"