aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCamil Staps2016-03-21 19:31:24 +0100
committerCamil Staps2016-03-21 19:42:59 +0100
commit58ea61a19cb9bd0f6c600ebbb643e209fdf9d7cb (patch)
tree864264245f5f1ec8d8af0162830afd9bb5a63473
parentInitial commit (diff)
Matching works
-rw-r--r--.gitignore1
-rw-r--r--Clean.h76
-rw-r--r--Makefile20
-rw-r--r--Regex.dcl51
-rw-r--r--Regex.icl39
-rw-r--r--regex.c38
-rw-r--r--regex.h10
-rw-r--r--test.icl10
8 files changed, 245 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
index 5e648b4..3076de8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,3 +10,4 @@ Clean System Files/
*-data/
sapl/
+test
diff --git a/Clean.h b/Clean.h
new file mode 100644
index 0000000..bf22e6e
--- /dev/null
+++ b/Clean.h
@@ -0,0 +1,76 @@
+
+#define Clean(a)
+
+typedef struct clean_string *CleanString;
+
+/* a string in Clean is:
+ struct clean_string {
+ size_t clean_string_length;
+ char clean_string_characters[clean_string_length];
+ };
+ The string does not end with a '\0' !
+*/
+
+#ifndef _WIN64
+
+/* CleanStringLength(clean_string) returns the length of the clean_string in characters */
+#define CleanStringLength(clean_string) (*(unsigned long *)(clean_string))
+
+/* CleanStringCharacters(clean_string) returns a pointer to the characters of the clean_string */
+#define CleanStringCharacters(clean_string) ((char*)(1+(unsigned long *)(clean_string)))
+
+/* CleanStringSizeInts(string_length) return size of *CleanString in integers */
+#define CleanStringSizeInts(string_length) (1+(((unsigned long)(string_length)+(sizeof(unsigned long)-1))>>(1+(sizeof(unsigned long)>>2))))
+
+/* CleanStringVariable(clean_string,string_length) defines variable clean_string with length string_length,
+ before using the clean_string variable, cast to CleanString, except for the macros above */
+#define CleanStringVariable(clean_string,string_length) unsigned long clean_string[CleanStringSizeInts(string_length)]
+
+/* CleanStringSizeBytes(string_length) return size of *CleanString in bytes */
+#define CleanStringSizeBytes(string_length) ((sizeof(unsigned long)<<1)+(((unsigned long)(string_length)+(sizeof(unsigned long)-1)) & -(sizeof(unsigned long))))
+
+typedef long *CleanIntArray;
+
+/* CleanIntArraySize(clean_array) returns the size (number of elements) of the clean_int_array */
+#define CleanIntArraySize(clean_int_array) (((unsigned long *)(clean_int_array))[-2])
+
+/* CleanRealArraySize(clean_real_array) returns the size (number of elements) of the clean_real_array */
+#define CleanRealArraySize(clean_real_array) (((unsigned long *)(clean_real_array))[-2])
+
+/* CleanCharArraySize(clean_char_array) returns the size (number of elements) of the clean_char_array */
+#define CleanCharArraySize(clean_char_array) (((unsigned long *)(clean_char_array))[-1])
+
+#else
+
+/* CleanStringLength(clean_string) returns length of the clean_string in characters */
+#define CleanStringLength(clean_string) (*(unsigned __int64 *)(clean_string))
+
+/* CleanStringCharacters(clean_string) returns a pointer to the characters of the clean_string */
+#define CleanStringCharacters(clean_string) ((char*)(1+(unsigned __int64 *)(clean_string)))
+
+/* CleanStringSizeInts(string_length) return size of *CleanString in integers */
+#define CleanStringSizeInts(string_length) (1+(((unsigned __int64)(string_length)+7)>>3))
+
+/* CleanStringVariable(clean_string,string_length) defines variable clean_string with length string_length,
+ before using the clean_string variable, cast to CleanString, except for the macros above */
+#define CleanStringVariable(clean_string,string_length) unsigned __int64 clean_string[CleanStringSizeInts(string_length)]
+
+/* CleanStringSizeBytes(string_length) return size of *CleanString in bytes */
+#define CleanStringSizeBytes(string_length) (8+(((unsigned __int64)(string_length)+7) & -8))
+
+typedef __int64 *CleanIntArray;
+
+/* CleanIntArraySize(clean_array) returns the size (number of elements) of the clean_int_array */
+#define CleanIntArraySize(clean_int_array) (((unsigned __int64 *)(clean_int_array))[-2])
+
+/* CleanRealArraySize(clean_real_array) returns the size (number of elements) of the clean_real_array */
+#define CleanRealArraySize(clean_real_array) (((unsigned __int64 *)(clean_real_array))[-2])
+
+/* CleanCharArraySize(clean_char_array) returns the size (number of elements) of the clean_char_array */
+#define CleanCharArraySize(clean_char_array) (((unsigned __int64 *)(clean_char_array))[-1])
+
+#endif
+
+typedef double *CleanRealArray;
+
+typedef unsigned char *CleanCharArray;
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..4babffe
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,20 @@
+CFLAGS=-O0 -Wall -std=c99
+CLM=clm
+CLMFLAGS=-I /opt/clean/lib/StdLib/ -l -lpcre2-8
+
+all: test Clean\ System\ Files/regex.o
+
+Clean\ System\ Files/regex.o: regex.c regex.h
+ $(CC) $(CFLAGS) -c $< -o "$@"
+
+test: test.icl Regex.dcl Regex.icl Clean\ System\ Files/regex.o
+ $(CLM) $(CLMFLAGS) $@ -o $@
+
+run_test: test
+ ./test
+
+clean:
+ rm -frv Clean\ System\ Files test
+
+.PHONY: all clean run_test
+
diff --git a/Regex.dcl b/Regex.dcl
new file mode 100644
index 0000000..0765320
--- /dev/null
+++ b/Regex.dcl
@@ -0,0 +1,51 @@
+definition module Regex
+
+from StdMaybe import ::Maybe
+from StdOverloaded import class toString, class fromString, class zero
+
+:: Regex
+
+:: Flag :== Int
+:: Flags :== Int
+
+class toRegex a :: !Flags !a -> Maybe Regex
+class fromRegex a :: !Regex -> a
+
+instance toRegex String
+
+instance fromRegex String
+
+instance zero Flags
+
+instance toString Regex
+instance fromString (Maybe Regex)
+
+// Nothing if no match; otherwise Just i where i is the index of the match
+match :: !Regex !String -> Maybe Int
+
+// From pcre2.h
+Regex_ALLOW_EMPTY_CLASS :== 0x00000001 /* C */
+Regex_ALT_BSUX :== 0x00000002 /* C */
+Regex_AUTO_CALLOUT :== 0x00000004 /* C */
+Regex_CASELESS :== 0x00000008 /* C */
+Regex_DOLLAR_ENDONLY :== 0x00000010 /* J M D */
+Regex_DOTALL :== 0x00000020 /* C */
+Regex_DUPNAMES :== 0x00000040 /* C */
+Regex_EXTENDED :== 0x00000080 /* C */
+Regex_FIRSTLINE :== 0x00000100 /* J M D */
+Regex_MATCH_UNSET_BACKREF :== 0x00000200 /* C J M */
+Regex_MULTILINE :== 0x00000400 /* C */
+Regex_NEVER_UCP :== 0x00000800 /* C */
+Regex_NEVER_UTF :== 0x00001000 /* C */
+Regex_NO_AUTO_CAPTURE :== 0x00002000 /* C */
+Regex_NO_AUTO_POSSESS :== 0x00004000 /* C */
+Regex_NO_DOTSTAR_ANCHOR :== 0x00008000 /* C */
+Regex_NO_START_OPTIMIZE :== 0x00010000 /* J M D */
+Regex_UCP :== 0x00020000 /* C J M D */
+Regex_UNGREEDY :== 0x00040000 /* C */
+Regex_UTF :== 0x00080000 /* C J M D */
+Regex_NEVER_BACKSLASH_C :== 0x00100000 /* C */
+Regex_ALT_CIRCUMFLEX :== 0x00200000 /* J M D */
+Regex_ALT_VERBNAMES :== 0x00400000 /* C */
+Regex_USE_OFFSET_LIMIT :== 0x00800000 /* J M D */
+
diff --git a/Regex.icl b/Regex.icl
new file mode 100644
index 0000000..87ab703
--- /dev/null
+++ b/Regex.icl
@@ -0,0 +1,39 @@
+implementation module Regex
+
+import StdEnv
+import StdMaybe
+import code from "regex.o"
+
+:: Regex = { ptr :: Int // pointer to a pcre2_code object
+ , str :: String // string representation
+ }
+
+instance toRegex String
+where toRegex flags s
+ # r = c_compile s flags
+ | r == 0 = Nothing
+ | otherwise = Just {ptr=r, str=s}
+ where
+ c_compile :: !String !Int -> Int
+ c_compile reg flags = code {
+ ccall cleanregex_pcre2_compile "SI:p"
+ }
+
+instance fromRegex String where fromRegex {str} = str
+
+instance toString Regex where toString r = fromRegex r
+instance fromString (Maybe Regex) where fromString s = toRegex zero s
+
+instance zero Flags where zero = 0
+
+match :: !Regex !String -> Maybe Int
+match {ptr} s
+# res = match` ptr s
+| res < 0 = Nothing
+| otherwise = Just res
+where
+ match` :: !Int !String -> Int
+ match` _ _ = code {
+ ccall cleanregex_match "pS:I"
+ }
+
diff --git a/regex.c b/regex.c
new file mode 100644
index 0000000..8d46a31
--- /dev/null
+++ b/regex.c
@@ -0,0 +1,38 @@
+#include "regex.h"
+#include <stdio.h>
+#include <string.h>
+
+char* clstocs(CleanString* cs) {
+ char* s = calloc(CleanStringLength(cs) + 1, 1);
+ uint8_t i;
+ for (i = 0; i < CleanStringLength(cs); i++)
+ s[i] = CleanStringCharacters(cs)[i];
+ s[i] = 0;
+ return s;
+}
+
+pcre2_code* cleanregex_pcre2_compile(CleanString* cs, int64_t flags) {
+ uint8_t* s = (uint8_t*) clstocs(cs);
+ int error; PCRE2_SIZE offset;
+ pcre2_code* code = pcre2_compile(s, PCRE2_ZERO_TERMINATED, flags,
+ &error, &offset, NULL);
+ if (code)
+ return code;
+ else
+ return NULL;
+}
+
+int64_t cleanregex_match(pcre2_code* re, CleanString* sub) {
+ pcre2_match_data *data = pcre2_match_data_create_from_pattern(re, NULL);
+ uint8_t* csub = (uint8_t*) clstocs(sub);
+ unsigned long len = CleanStringLength(sub);
+ int match = pcre2_match(re, csub, len, 0, 0, data, NULL);
+ if (match < 0) {
+ pcre2_match_data_free(data);
+ return match;
+ }
+ PCRE2_SIZE* ovector = pcre2_get_ovector_pointer(data);
+ pcre2_match_data_free(data);
+ return ovector[0];
+}
+
diff --git a/regex.h b/regex.h
new file mode 100644
index 0000000..af8e0a2
--- /dev/null
+++ b/regex.h
@@ -0,0 +1,10 @@
+#define PCRE2_CODE_UNIT_WIDTH 8
+
+#include <inttypes.h>
+#include <pcre2.h>
+#include <stdlib.h>
+#include "Clean.h"
+
+pcre2_code* cleanregex_pcre2_compile(CleanString*, int64_t flags);
+int64_t cleanregex_match(pcre2_code* regex, CleanString* subject);
+
diff --git a/test.icl b/test.icl
new file mode 100644
index 0000000..5346aca
--- /dev/null
+++ b/test.icl
@@ -0,0 +1,10 @@
+module test
+
+import StdEnv
+import Regex
+import StdMaybe
+
+Start = (match rgx "123a123", toString rgx)
+where
+ (Just rgx) = toRegex Regex_CASELESS "[a-z]\\d+"
+