diff options
author | Camil Staps | 2016-03-21 19:31:24 +0100 |
---|---|---|
committer | Camil Staps | 2016-03-21 19:42:59 +0100 |
commit | 58ea61a19cb9bd0f6c600ebbb643e209fdf9d7cb (patch) | |
tree | 864264245f5f1ec8d8af0162830afd9bb5a63473 | |
parent | Initial commit (diff) |
Matching works
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | Clean.h | 76 | ||||
-rw-r--r-- | Makefile | 20 | ||||
-rw-r--r-- | Regex.dcl | 51 | ||||
-rw-r--r-- | Regex.icl | 39 | ||||
-rw-r--r-- | regex.c | 38 | ||||
-rw-r--r-- | regex.h | 10 | ||||
-rw-r--r-- | test.icl | 10 |
8 files changed, 245 insertions, 0 deletions
@@ -10,3 +10,4 @@ Clean System Files/ *-data/ sapl/ +test @@ -0,0 +1,76 @@ + +#define Clean(a) + +typedef struct clean_string *CleanString; + +/* a string in Clean is: + struct clean_string { + size_t clean_string_length; + char clean_string_characters[clean_string_length]; + }; + The string does not end with a '\0' ! +*/ + +#ifndef _WIN64 + +/* CleanStringLength(clean_string) returns the length of the clean_string in characters */ +#define CleanStringLength(clean_string) (*(unsigned long *)(clean_string)) + +/* CleanStringCharacters(clean_string) returns a pointer to the characters of the clean_string */ +#define CleanStringCharacters(clean_string) ((char*)(1+(unsigned long *)(clean_string))) + +/* CleanStringSizeInts(string_length) return size of *CleanString in integers */ +#define CleanStringSizeInts(string_length) (1+(((unsigned long)(string_length)+(sizeof(unsigned long)-1))>>(1+(sizeof(unsigned long)>>2)))) + +/* CleanStringVariable(clean_string,string_length) defines variable clean_string with length string_length, + before using the clean_string variable, cast to CleanString, except for the macros above */ +#define CleanStringVariable(clean_string,string_length) unsigned long clean_string[CleanStringSizeInts(string_length)] + +/* CleanStringSizeBytes(string_length) return size of *CleanString in bytes */ +#define CleanStringSizeBytes(string_length) ((sizeof(unsigned long)<<1)+(((unsigned long)(string_length)+(sizeof(unsigned long)-1)) & -(sizeof(unsigned long)))) + +typedef long *CleanIntArray; + +/* CleanIntArraySize(clean_array) returns the size (number of elements) of the clean_int_array */ +#define CleanIntArraySize(clean_int_array) (((unsigned long *)(clean_int_array))[-2]) + +/* CleanRealArraySize(clean_real_array) returns the size (number of elements) of the clean_real_array */ +#define CleanRealArraySize(clean_real_array) (((unsigned long *)(clean_real_array))[-2]) + +/* CleanCharArraySize(clean_char_array) returns the size (number of elements) of the clean_char_array */ +#define CleanCharArraySize(clean_char_array) (((unsigned long *)(clean_char_array))[-1]) + +#else + +/* CleanStringLength(clean_string) returns length of the clean_string in characters */ +#define CleanStringLength(clean_string) (*(unsigned __int64 *)(clean_string)) + +/* CleanStringCharacters(clean_string) returns a pointer to the characters of the clean_string */ +#define CleanStringCharacters(clean_string) ((char*)(1+(unsigned __int64 *)(clean_string))) + +/* CleanStringSizeInts(string_length) return size of *CleanString in integers */ +#define CleanStringSizeInts(string_length) (1+(((unsigned __int64)(string_length)+7)>>3)) + +/* CleanStringVariable(clean_string,string_length) defines variable clean_string with length string_length, + before using the clean_string variable, cast to CleanString, except for the macros above */ +#define CleanStringVariable(clean_string,string_length) unsigned __int64 clean_string[CleanStringSizeInts(string_length)] + +/* CleanStringSizeBytes(string_length) return size of *CleanString in bytes */ +#define CleanStringSizeBytes(string_length) (8+(((unsigned __int64)(string_length)+7) & -8)) + +typedef __int64 *CleanIntArray; + +/* CleanIntArraySize(clean_array) returns the size (number of elements) of the clean_int_array */ +#define CleanIntArraySize(clean_int_array) (((unsigned __int64 *)(clean_int_array))[-2]) + +/* CleanRealArraySize(clean_real_array) returns the size (number of elements) of the clean_real_array */ +#define CleanRealArraySize(clean_real_array) (((unsigned __int64 *)(clean_real_array))[-2]) + +/* CleanCharArraySize(clean_char_array) returns the size (number of elements) of the clean_char_array */ +#define CleanCharArraySize(clean_char_array) (((unsigned __int64 *)(clean_char_array))[-1]) + +#endif + +typedef double *CleanRealArray; + +typedef unsigned char *CleanCharArray; diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..4babffe --- /dev/null +++ b/Makefile @@ -0,0 +1,20 @@ +CFLAGS=-O0 -Wall -std=c99 +CLM=clm +CLMFLAGS=-I /opt/clean/lib/StdLib/ -l -lpcre2-8 + +all: test Clean\ System\ Files/regex.o + +Clean\ System\ Files/regex.o: regex.c regex.h + $(CC) $(CFLAGS) -c $< -o "$@" + +test: test.icl Regex.dcl Regex.icl Clean\ System\ Files/regex.o + $(CLM) $(CLMFLAGS) $@ -o $@ + +run_test: test + ./test + +clean: + rm -frv Clean\ System\ Files test + +.PHONY: all clean run_test + diff --git a/Regex.dcl b/Regex.dcl new file mode 100644 index 0000000..0765320 --- /dev/null +++ b/Regex.dcl @@ -0,0 +1,51 @@ +definition module Regex + +from StdMaybe import ::Maybe +from StdOverloaded import class toString, class fromString, class zero + +:: Regex + +:: Flag :== Int +:: Flags :== Int + +class toRegex a :: !Flags !a -> Maybe Regex +class fromRegex a :: !Regex -> a + +instance toRegex String + +instance fromRegex String + +instance zero Flags + +instance toString Regex +instance fromString (Maybe Regex) + +// Nothing if no match; otherwise Just i where i is the index of the match +match :: !Regex !String -> Maybe Int + +// From pcre2.h +Regex_ALLOW_EMPTY_CLASS :== 0x00000001 /* C */ +Regex_ALT_BSUX :== 0x00000002 /* C */ +Regex_AUTO_CALLOUT :== 0x00000004 /* C */ +Regex_CASELESS :== 0x00000008 /* C */ +Regex_DOLLAR_ENDONLY :== 0x00000010 /* J M D */ +Regex_DOTALL :== 0x00000020 /* C */ +Regex_DUPNAMES :== 0x00000040 /* C */ +Regex_EXTENDED :== 0x00000080 /* C */ +Regex_FIRSTLINE :== 0x00000100 /* J M D */ +Regex_MATCH_UNSET_BACKREF :== 0x00000200 /* C J M */ +Regex_MULTILINE :== 0x00000400 /* C */ +Regex_NEVER_UCP :== 0x00000800 /* C */ +Regex_NEVER_UTF :== 0x00001000 /* C */ +Regex_NO_AUTO_CAPTURE :== 0x00002000 /* C */ +Regex_NO_AUTO_POSSESS :== 0x00004000 /* C */ +Regex_NO_DOTSTAR_ANCHOR :== 0x00008000 /* C */ +Regex_NO_START_OPTIMIZE :== 0x00010000 /* J M D */ +Regex_UCP :== 0x00020000 /* C J M D */ +Regex_UNGREEDY :== 0x00040000 /* C */ +Regex_UTF :== 0x00080000 /* C J M D */ +Regex_NEVER_BACKSLASH_C :== 0x00100000 /* C */ +Regex_ALT_CIRCUMFLEX :== 0x00200000 /* J M D */ +Regex_ALT_VERBNAMES :== 0x00400000 /* C */ +Regex_USE_OFFSET_LIMIT :== 0x00800000 /* J M D */ + diff --git a/Regex.icl b/Regex.icl new file mode 100644 index 0000000..87ab703 --- /dev/null +++ b/Regex.icl @@ -0,0 +1,39 @@ +implementation module Regex + +import StdEnv +import StdMaybe +import code from "regex.o" + +:: Regex = { ptr :: Int // pointer to a pcre2_code object + , str :: String // string representation + } + +instance toRegex String +where toRegex flags s + # r = c_compile s flags + | r == 0 = Nothing + | otherwise = Just {ptr=r, str=s} + where + c_compile :: !String !Int -> Int + c_compile reg flags = code { + ccall cleanregex_pcre2_compile "SI:p" + } + +instance fromRegex String where fromRegex {str} = str + +instance toString Regex where toString r = fromRegex r +instance fromString (Maybe Regex) where fromString s = toRegex zero s + +instance zero Flags where zero = 0 + +match :: !Regex !String -> Maybe Int +match {ptr} s +# res = match` ptr s +| res < 0 = Nothing +| otherwise = Just res +where + match` :: !Int !String -> Int + match` _ _ = code { + ccall cleanregex_match "pS:I" + } + @@ -0,0 +1,38 @@ +#include "regex.h" +#include <stdio.h> +#include <string.h> + +char* clstocs(CleanString* cs) { + char* s = calloc(CleanStringLength(cs) + 1, 1); + uint8_t i; + for (i = 0; i < CleanStringLength(cs); i++) + s[i] = CleanStringCharacters(cs)[i]; + s[i] = 0; + return s; +} + +pcre2_code* cleanregex_pcre2_compile(CleanString* cs, int64_t flags) { + uint8_t* s = (uint8_t*) clstocs(cs); + int error; PCRE2_SIZE offset; + pcre2_code* code = pcre2_compile(s, PCRE2_ZERO_TERMINATED, flags, + &error, &offset, NULL); + if (code) + return code; + else + return NULL; +} + +int64_t cleanregex_match(pcre2_code* re, CleanString* sub) { + pcre2_match_data *data = pcre2_match_data_create_from_pattern(re, NULL); + uint8_t* csub = (uint8_t*) clstocs(sub); + unsigned long len = CleanStringLength(sub); + int match = pcre2_match(re, csub, len, 0, 0, data, NULL); + if (match < 0) { + pcre2_match_data_free(data); + return match; + } + PCRE2_SIZE* ovector = pcre2_get_ovector_pointer(data); + pcre2_match_data_free(data); + return ovector[0]; +} + @@ -0,0 +1,10 @@ +#define PCRE2_CODE_UNIT_WIDTH 8 + +#include <inttypes.h> +#include <pcre2.h> +#include <stdlib.h> +#include "Clean.h" + +pcre2_code* cleanregex_pcre2_compile(CleanString*, int64_t flags); +int64_t cleanregex_match(pcre2_code* regex, CleanString* subject); + diff --git a/test.icl b/test.icl new file mode 100644 index 0000000..5346aca --- /dev/null +++ b/test.icl @@ -0,0 +1,10 @@ +module test + +import StdEnv +import Regex +import StdMaybe + +Start = (match rgx "123a123", toString rgx) +where + (Just rgx) = toRegex Regex_CASELESS "[a-z]\\d+" + |