diff --git a/.travis.yml b/.travis.yml index 77846892..00e4b840 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,10 +1,15 @@ +language: cpp + sudo: false env: - JIT_OPTS='--opt=jit' TARGET_OPTS='target.py' - JIT_OPTS='' TARGET_OPTS='target.py' matrix: - fast_finish: true + fast_finish: true + +install: + - if [ "$CXX" = "g++" ]; then export CXX="g++-4.8" CC="gcc-4.8"; fi script: - make PYTHON=python build @@ -14,12 +19,16 @@ script: addons: apt: + sources: + - ubuntu-toolchain-r-test packages: - - libffi-dev - - libedit-dev - - libboost-all-dev - - zlib1g-dev - - zlib-bin + - libffi-dev + - libedit-dev + - libboost-all-dev + - zlib1g-dev + - zlib-bin + - gcc-4.8 + - g++-4.8 notifications: irc: "chat.freenode.net#pixie-lang" diff --git a/Makefile b/Makefile index e27182ee..ac8dbf55 100644 --- a/Makefile +++ b/Makefile @@ -18,17 +18,17 @@ help: @echo "make build_no_jit - build without jit" @echo "make fetch_externals - download and unpack external deps" -build_with_jit: fetch_externals +build_with_jit: fetch_externals re2_cre2 @if [ ! -d /usr/local/include/boost -a ! -d /usr/include/boost ] ; then echo "Boost C++ Library not found" && false; fi && \ $(PYTHON) $(EXTERNALS)/pypy/rpython/bin/rpython $(COMMON_BUILD_OPTS) --opt=jit target.py && \ make compile_basics -build_no_jit: fetch_externals +build_no_jit: fetch_externals re2_cre2 @if [ ! -d /usr/local/include/boost -a ! -d /usr/include/boost ] ; then echo "Boost C++ Library not found" && false; fi && \ $(PYTHON) $(EXTERNALS)/pypy/rpython/bin/rpython $(COMMON_BUILD_OPTS) target.py && \ make compile_basics -build_no_jit_shared: fetch_externals +build_no_jit_shared: fetch_externals re2_cre2 @if [ ! -d /usr/local/include/boost -a ! -d /usr/include/boost ] ; then echo "Boost C++ Library not found" && false; fi && \ $(PYTHON) $(EXTERNALS)/pypy/rpython/bin/rpython $(COMMON_BUILD_OPTS) --shared target.py && \ make compile_basics @@ -36,9 +36,9 @@ build_no_jit_shared: fetch_externals compile_basics: @echo -e "\n\n\n\nWARNING: Compiling core libs. If you want to modify one of these files delete the .pxic files first\n\n\n\n" - ./pixie-vm -c pixie/uv.pxi -c pixie/io.pxi -c pixie/stacklets.pxi -c pixie/stdlib.pxi -c pixie/repl.pxi + ./pixie-vm -c pixie/uv.pxi -c pixie/io.pxi -c pixie/stacklets.pxi -c pixie/stdlib.pxi -c pixie/repl.pxi -c pixie/re.pxi -c pixie/re/cre2.pxi -build: fetch_externals +build: fetch_externals re2_cre2 $(PYTHON) $(EXTERNALS)/pypy/rpython/bin/rpython $(COMMON_BUILD_OPTS) $(JIT_OPTS) $(TARGET_OPTS) fetch_externals: $(EXTERNALS)/pypy externals.fetched @@ -49,19 +49,51 @@ externals.fetched: tar -jxf /tmp/externals.tar.bz2 --strip-components=2 touch externals.fetched +$(EXTERNALS): + mkdir $(EXTERNALS) -$(EXTERNALS)/pypy: - mkdir $(EXTERNALS); \ +$(EXTERNALS)/pypy: $(EXTERNALS) cd $(EXTERNALS); \ curl https://bitbucket.org/pypy/pypy/get/81254.tar.bz2 > pypy.tar.bz2; \ mkdir pypy; \ cd pypy; \ tar -jxf ../pypy.tar.bz2 --strip-components=1 +$(EXTERNALS)/re2: $(EXTERNALS) + cd $(EXTERNALS) && \ + curl -sL https://github.com/google/re2/archive/2016-02-01.tar.gz > re2.tar.gz && \ + shasum -a 256 re2.tar.gz | grep -q f246c43897ac341568a7460622138ec0dd8de9b6f5459686376fa23e9d8c1bb8 && \ + mkdir -p re2 && \ + cd re2 && \ + tar -zxf ../re2.tar.gz --strip-components=1 && \ + make CPPFLAGS="-fPIC" + +$(EXTERNALS)/cre2: $(EXTERNALS) + cd $(EXTERNALS) && \ + curl -sL https://bitbucket.org/marcomaggi/cre2/downloads/cre2-0.2.0.tar.xz > cre2.tar.xz && \ + shasum -a 256 cre2.tar.xz | grep -q d31118dbc9d2b1cf95c1b763ca92ae2ec4e262b1f8d8e995c1ffdc8eb40a82fc && \ + mkdir -p cre2 && \ + cd cre2 && \ + tar -Jxf ../cre2.tar.xz --strip-components=1 && \ + mkdir -p build && \ + cd build && \ + ../configure LDFLAGS="-L`pwd`/../../re2/obj" CPPFLAGS="-I`pwd`/../../re2" && \ + chmod +x ../meta/autotools/install-sh && \ + make + +re2: $(EXTERNALS)/re2 + +cre2: $(EXTERNALS)/cre2 + mkdir -p lib/ include/ && \ + ln -sf ../$(EXTERNALS)/cre2/src/cre2.h include/ && \ + cd lib && \ + ln -sf ../$(EXTERNALS)/cre2/build/.libs/* ./ + +re2_cre2: re2 cre2 + run: ./pixie-vm - run_interactive: @PYTHONPATH=$(PYTHONPATH) $(PYTHON) target.py @@ -82,7 +114,7 @@ compile_src: find * -name "*.pxi" | grep "^pixie/" | xargs -L1 ./pixie-vm $(EXTERNALS_FLAGS) -c clean_pxic: - find * -name "*.pxic" | xargs --no-run-if-empty rm + find * -name "*.pxic" -delete clean: clean_pxic rm -rf ./lib diff --git a/pixie/re.pxi b/pixie/re.pxi new file mode 100644 index 00000000..e394c705 --- /dev/null +++ b/pixie/re.pxi @@ -0,0 +1,20 @@ +(in-ns :pixie.stdlib) + +(defprotocol IRegex + (re-matches [r t]) + (re-find [r t]) + (re-seq [r t])) + +(def ^:dynamic *default-re-engine* 'pixie.re.cre2) + +;; an "open" engine registry +(defmulti re-engine (fn [k s o] k)) + +(defn re-pattern + {:doc "Returns internal representation for regular + expression, used in matching functions." + :signatures [[rexeg-str opts]]} + ([pattern opts] (re-pattern pattern opts *default-re-engine*)) + ([pattern opts engine] (re-engine engine pattern opts))) + +(load-ns *default-re-engine*) diff --git a/pixie/re/cre2.pxi b/pixie/re/cre2.pxi new file mode 100644 index 00000000..b22c473b --- /dev/null +++ b/pixie/re/cre2.pxi @@ -0,0 +1,97 @@ +(ns pixie.re.cre2 + (:require [pixie.ffi-infer :as f])) + +(f/with-config {:library "cre2" + :includes ["cre2.h"]} + + (f/defcstruct cre2_string_t [:data :length]) + (f/defcfn cre2_version_string) + + ;; Options + (f/defcfn cre2_opt_new) + (f/defcfn cre2_opt_delete) + (f/defcfn cre2_opt_set_posix_syntax) + (f/defcfn cre2_opt_set_longest_match) + (f/defcfn cre2_opt_set_log_errors) + (f/defcfn cre2_opt_set_literal) + (f/defcfn cre2_opt_set_never_nl) + (f/defcfn cre2_opt_set_case_sensitive) + (f/defcfn cre2_opt_set_perl_classes) + (f/defcfn cre2_opt_set_word_boundary) + (f/defcfn cre2_opt_set_one_line) + (f/defcfn cre2_opt_set_max_mem) + (f/defcfn cre2_opt_set_encoding) + + ;; Construction / destruction + (f/defcfn cre2_new) + (f/defcfn cre2_delete) + + ;; Inspection + (f/defcfn cre2_pattern) + (f/defcfn cre2_error_code) + (f/defcfn cre2_num_capturing_groups) + (f/defcfn cre2_program_size) + + ;; Errors something? + (f/defcfn cre2_error_string) + (f/defcfn cre2_error_arg) + + ;; Matching + (f/defcstruct cre2_range_t [:start :past]) + (f/defcfn cre2_match) + (f/defcfn cre2_easy_match) + (f/defcfn cre2_strings_to_ranges) +) + +(def cre2-optmap + { :ascii #(cre2_set_encoding % 2) + :posix #(cre2_opt_set_posix_syntax % 1) + :longest_match #(cre2_opt_set_longest_match % 1) + :silent #(cre2_opt_set_log_errors % 0) + :literal #(cre2_opt_set_literal % 1) + :never_nl #(cre2_opt_set_never_nl % 1) + :dot_nl #(cre2_opt_set_one_line % 0) + :never_capture #(do %) ;; ?? + :ignore_case #(cre2_opt_set_case_sensitive % 0) }) + +(defn cre2-make-opts [opts] + (let [opt (cre2_opt_new)] + (doseq [key opts] ((key cre2-optmap) opt)) + opt)) + +(defn cre2-make-match-array [size] + (cre2_string_t)) + +(defn cre2-delete-match-array [arr size] + ) + +(defn cre2-matches-to-seq [matches size] + (repeat size true)) + +(defn cre2-matches + [pattern text] + (let [text-size (count text) + match-arr-size (+ 1 (cre2_num_capturing_groups pattern)) + match-arr (cre2-make-match-array match-arr-size) + result (cre2_match pattern + text text-size 0 text-size 1 + match-arr match-arr-size)] + (if (= 1 result) + (let [match-seq (cre2-matches-to-seq match-arr match-arr-size)] + (cre2-delete-match-array match-arr match-arr-size) + match-seq) + nil))) + +(deftype CRE2Regex [pattern opts] + IFinalize + (-finalize! [this] + (cre2_opt_delete opts) + (cre2_delete pattern)) + + IRegex + (pixie.stdlib/re-matches [_ text] (cre2-matches pattern text))) + +;; add cre2 to registry +(defmethod pixie.stdlib/re-engine 'pixie.re.cre2 [_ regex-str opts] + (let [copts (cre2-make-opts opts)] + (->CRE2Regex (cre2_new regex-str (count regex-str) copts) copts))) diff --git a/pixie/stdlib.pxi b/pixie/stdlib.pxi index 311df7fb..2305213f 100644 --- a/pixie/stdlib.pxi +++ b/pixie/stdlib.pxi @@ -17,6 +17,10 @@ (def srand (ffi-fn libc "srand" [CInt] CInt)) (def fopen (ffi-fn libc "fopen" [CCharP CCharP] CVoidP)) (def fread (ffi-fn libc "fread" [CVoidP CInt CInt CVoidP] CInt)) +(def mkdtemp (ffi-fn libc "mkdtemp" [CCharP] CCharP)) +(def mkdir (ffi-fn libc "mkdir" [CCharP] CCharP)) +(def rmdir (ffi-fn libc "rmdir" [CCharP] CCharP)) +(def rm (ffi-fn libc "remove" [CCharP] CCharP)) (def libm (ffi-library (str "libm." pixie.platform/so-ext))) (def atan2 (ffi-fn libm "atan2" [CDouble CDouble] CDouble)) @@ -899,7 +903,7 @@ If further arguments are passed, invokes the method named by symbol, passing the (if (next coll) (recur (next coll)) (first coll)) - + (satisfies? ISeqable coll) (recur (seq coll)))) @@ -2102,7 +2106,7 @@ For more information, see http://clojure.org/special_forms#binding-forms"} val not-found))) ISeq - (-first [this] + (-first [this] (when (not= start stop) start)) (-next [this] @@ -3076,11 +3080,11 @@ ex: (vary-meta x assoc :foo 42)" (deftype Iterate [f x] IReduce (-reduce [self rf init] - (loop [col (rest self) - acc (rf init (first self))] + (loop [next (f x) + acc (rf init x)] (if (reduced? acc) @acc - (recur (rest col) (rf acc (first col)))))) + (recur (f next) (rf acc next))))) ISeq (-seq [self] (cons x (lazy-seq* (fn [] (->Iterate f (f x))))))) @@ -3092,3 +3096,5 @@ ex: (vary-meta x assoc :foo 42)" :added "0.1"} [f x] (->Iterate f x)) + +(load-ns 'pixie.re) diff --git a/pixie/vm/reader.py b/pixie/vm/reader.py index 09e4fdf9..59f44c4d 100644 --- a/pixie/vm/reader.py +++ b/pixie/vm/reader.py @@ -620,10 +620,42 @@ def invoke(self, rdr, ch): read_inner(rdr, True, always_return_form=True) return rdr +class RegexReader(ReaderHandler): + def invoke(self, rdr, ch): + regex_str = LiteralStringReader().invoke(rdr, ch) + regex_opts = EMPTY_SET + opts_dict = { + u"a": keyword(u"ascii"), + u"p": keyword(u"posix"), + u"l": keyword(u"longest_match"), + u"s": keyword(u"silent"), + u"L": keyword(u"literal"), + u"n": keyword(u"never_nl"), + u"m": keyword(u"dot_nl"), + u"c": keyword(u"never_capture"), + u"i": keyword(u"ignore_case") + } + + # read options (https://github.com/google/re2/blob/master/re2/re2.h#L517) + while True: + try: + opt = opts_dict.get(rdr.read(), None) + if opt is None: + rdr.unread() + break + else: + regex_opts = regex_opts.conj(opt) + except EOFError: + break + + return rt.cons(symbol(u"pixie.stdlib/re-pattern"), + rt.cons(regex_str, rt.cons(regex_opts, nil))) + dispatch_handlers = { u"{": SetReader(), u"(": FnReader(), - u"_": CommentReader() + u"_": CommentReader(), + u"\"": RegexReader() } class DispatchReader(ReaderHandler):