From d38b2c7b3f9a51c91a6d9886dada0e06797344b6 Mon Sep 17 00:00:00 2001 From: Yusheng Li Date: Mon, 5 Feb 2024 22:53:53 +0800 Subject: [PATCH] feat: regex pattern --- .github/workflows/examples.yml | 1 + .luacheckrc | 4 ++ Makefile | 1 + README.md | 6 ++- benchmark/simple-regex.lua | 33 ++++++++++++ examples/regular-expression.lua | 12 +++++ radix-router-dev-1.rockspec | 2 +- spec/parser_spec.lua | 40 ++++++++++++++- spec/router_spec.lua | 46 ++++++++++++++++- spec/utils_spec.lua | 14 +++++ src/parser/parser.lua | 2 +- src/parser/style/default.lua | 90 +++++++++++++++++++++++++++++---- src/route.lua | 2 +- src/router.lua | 60 +++++++++++----------- src/utils.lua | 42 ++++++++++++++- 15 files changed, 305 insertions(+), 50 deletions(-) create mode 100644 benchmark/simple-regex.lua create mode 100644 examples/regular-expression.lua diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml index ebcf1c1..a8e7836 100644 --- a/.github/workflows/examples.yml +++ b/.github/workflows/examples.yml @@ -39,3 +39,4 @@ jobs: run: | lua examples/example.lua lua examples/custom-matcher.lua + lua examples/regular-expression.lua diff --git a/.luacheckrc b/.luacheckrc index e095e99..fc15181 100644 --- a/.luacheckrc +++ b/.luacheckrc @@ -1,3 +1,7 @@ unused_args = false max_line_length = false redefined = false + +globals = { + "ngx", +} diff --git a/Makefile b/Makefile index fb247d4..7e41a44 100644 --- a/Makefile +++ b/Makefile @@ -18,6 +18,7 @@ bench: RADIX_ROUTER_ROUTES=100000 RADIX_ROUTER_TIMES=10000000 $(CMD) benchmark/simple-variable.lua RADIX_ROUTER_ROUTES=1000000 RADIX_ROUTER_TIMES=10000000 $(CMD) benchmark/simple-variable.lua RADIX_ROUTER_ROUTES=100000 RADIX_ROUTER_TIMES=10000000 $(CMD) benchmark/simple-prefix.lua + RADIX_ROUTER_ROUTES=100000 RADIX_ROUTER_TIMES=1000000 $(CMD) benchmark/simple-regex.lua RADIX_ROUTER_ROUTES=100000 RADIX_ROUTER_TIMES=1000000 $(CMD) benchmark/complex-variable.lua RADIX_ROUTER_ROUTES=100000 RADIX_ROUTER_TIMES=10000000 $(CMD) benchmark/simple-variable-binding.lua RADIX_ROUTER_TIMES=1000000 $(CMD) benchmark/github-routes.lua diff --git a/README.md b/README.md index dabb1e0..82b5cd0 100644 --- a/README.md +++ b/README.md @@ -27,11 +27,13 @@ The router can be run in different runtimes such as Lua, LuaJIT, or OpenResty. **Custom matcher:** The router has two efficient matchers built in, MethodMatcher(`method`) and HostMatcher(`host`). They can be disabled via `opts.matcher_names`. You can also add your custom matchers via `opts.matchers`. For example, an IpMatcher to evaluate whether the `ctx.ip` is matched with the `ips` of a route. +**Regular Expression:** You can define regex pattern in variables. a variable without regex pattern is treated as `[^/]+`. + +- `/users/{id:\\d+}/profile-{year:\\d{4}}.{format:(html|pdf)}` + **Features in the roadmap**: - Expression condition: defines custom matching conditions by using expression language. -- Regex in variable - ## 📖 Getting started diff --git a/benchmark/simple-regex.lua b/benchmark/simple-regex.lua new file mode 100644 index 0000000..a6b9623 --- /dev/null +++ b/benchmark/simple-regex.lua @@ -0,0 +1,33 @@ +local Router = require "radix-router" +local utils = require "benchmark.utils" + +local route_n = os.getenv("RADIX_ROUTER_ROUTES") or 1000 * 100 +local times = os.getenv("RADIX_ROUTER_TIMES") or 1000 * 1000 * 10 + +local router +do + local routes = {} + for i = 1, route_n do + routes[i] = { paths = { string.format("/%d/{name:[^/]+}", i) }, handler = i } + end + router = Router.new(routes) +end + +local rss_mb = utils.get_rss() + +local path = "/1/a" +local elapsed = utils.timing(function() + for _ = 1, times do + router:match(path) + end +end) + +utils.print_result({ + title = "regex", + routes = route_n, + times = times, + elapsed = elapsed, + benchmark_path = path, + benchmark_handler = router:match(path), + rss = rss_mb, +}) diff --git a/examples/regular-expression.lua b/examples/regular-expression.lua new file mode 100644 index 0000000..9f4edb2 --- /dev/null +++ b/examples/regular-expression.lua @@ -0,0 +1,12 @@ +local Router = require "radix-router" +local router, err = Router.new({ + { + paths = { "/users/{id:\\d+}/profile-{year:\\d{4}}.{format:(html|pdf)}" }, + handler = "1" + }, +}) +if not router then + error("failed to create router: " .. err) +end + +assert("1" == router:match("/users/100/profile-2024.pdf")) \ No newline at end of file diff --git a/radix-router-dev-1.rockspec b/radix-router-dev-1.rockspec index a9453a4..c704832 100644 --- a/radix-router-dev-1.rockspec +++ b/radix-router-dev-1.rockspec @@ -22,7 +22,7 @@ description = { } dependencies = { - "lua >= 5.1, < 5.5" + "lrexlib-pcre2", } build = { diff --git a/spec/parser_spec.lua b/spec/parser_spec.lua index c715714..26e7001 100644 --- a/spec/parser_spec.lua +++ b/spec/parser_spec.lua @@ -14,7 +14,8 @@ describe("parser", function() ["/aa/{var1}/cc/{var2}"] = { "/aa/", "{var1}", "/cc/", "{var2}" }, ["/user/profile.{format}"] = { "/user/profile.", "{format}" }, ["/user/{filename}.{format}"] = { "/user/", "{filename}", ".", "{format}" }, - ["/aa/{name:[0-9]+}/{*suffix}"] = { "/aa/", "{name:[0-9]+}", "/", "{*suffix}" } + ["/aa/{name:[0-9]+}/{*suffix}"] = { "/aa/", "{name:[0-9]+}", "/", "{*suffix}" }, + ["/user/{id:\\d+}/profile-{year:\\d{4}}.{format:(html|pdf)}"] = { "/user/", "{id:\\d+}", "/profile-", "{year:\\d{4}}", ".", "{format:(html|pdf)}" }, } for path, expected_tokens in pairs(tests) do @@ -63,5 +64,42 @@ describe("parser", function() assert.same(test.params, params, "assertion failed: " .. i) end end) + it("compile_regex()", function() + local tests = { + { + path = "/a/b/c", + regex = "^\\Q/a/b/c\\E$" + }, + { + path = "/a/{b}/c/{d}", + regex = "^\\Q/a/\\E[^/]+\\Q/c/\\E[^/]+$" + }, + { + path = "/a/{b:\\d+}/c/{d:\\d{3}}", + regex = "^\\Q/a/\\E\\d+\\Q/c/\\E\\d{3}$" + }, + { + path = "/a/{*catchall}", + regex = "^\\Q/a/\\E.*$" + }, + { + path = "/a/{b}/c/{*catchall}", + regex = "^\\Q/a/\\E[^/]+\\Q/c/\\E.*$" + }, + { + path = "/a/{b:[a-z]+}/c/{*catchall}", + regex = "^\\Q/a/\\E[a-z]+\\Q/c/\\E.*$" + }, + { + path = "/users/{id:\\d+}/profile-{year:\\d{4}}.{format:(html|pdf)}", + regex = "^\\Q/users/\\E\\d+\\Q/profile-\\E\\d{4}\\Q.\\E(html|pdf)$" + } + } + for i, test in pairs(tests) do + local parser = Parser.new("default") + local regex = parser:update(test.path):compile_regex() + assert.same(test.regex, regex, "assertion failed: " .. i) + end + end) end) end) diff --git a/spec/router_spec.lua b/spec/router_spec.lua index 9e9e012..f61ac42 100644 --- a/spec/router_spec.lua +++ b/spec/router_spec.lua @@ -88,7 +88,7 @@ describe("Router", function() local router, err = Router.new({}, { matcher_names = { "inexistent" } }) assert.is_nil(router) - assert.equal("invalid matcher name: inexistent", err) + assert.equal("invalid args opts: invalid matcher name: inexistent", err) end) end) describe("match", function() @@ -480,6 +480,50 @@ describe("Router", function() assert.same({ cat = "suffix" }, binding) end) end) + describe("regex", function() + it("sanity", function() + local router = Router.new({ + { + paths = { "/a/{b:\\d{3}}/c" }, + handler = "/a/{b:\\d{3}}/c", + }, + { + paths = { "/a/{b:\\d+}/c" }, + handler = "/a/{b:\\d+}/c", + }, + { + paths = { "/a/{b:[a-z]+}/c" }, + handler = "/a/{b:[a-z]+}/c", + }, + { + paths = { "/a/{b:[^/]+}/c" }, + handler = "/a/{b:[^/]+}/c", + }, + { + paths = { "/users/{id:\\d+}/profile-{year:\\d{4}}.{format:(html|pdf)}" }, + handler = "1", + }, + { + paths = { "/escape/{var}/{var1:[a-z]+}|{var2:[A-Z]+}|{var3:\\d+}|{var4:(html|pdf)}" }, + handler = "2", + } + }) + assert.equal("/a/{b:\\d+}/c", router:match("/a/2024/c")) + assert.equal("/a/{b:\\d{3}}/c", router:match("/a/123/c")) + assert.equal("/a/{b:[a-z]+}/c", router:match("/a/abc/c")) + assert.equal("/a/{b:[^/]+}/c", router:match("/a/abc0/c")) + + -- /users/{id:\\d+}/profile-{year:\\d{4}}.{format:html|pdf} + assert.equal("1", router:match("/users/123/profile-2024.html")) + assert.equal("1", router:match("/users/123/profile-2024.pdf")) + assert.equal(nil, router:match("/users/abc/profile-2024.html")) + assert.equal(nil, router:match("/users/123/profile-123.html")) + assert.equal(nil, router:match("/users/123/profile-2024.jpg")) + + -- /escape/{var}/{var1:[a-z]+}|{var2:[A-Z]+}|{var3:\\d+}|{var4:(html|pdf)} + assert.equal("2", router:match("/escape/var/aaa|AAA|111|html")) + end) + end) describe("matching order", function() it("first registered first match", function() local router = Router.new({ diff --git a/spec/utils_spec.lua b/spec/utils_spec.lua index 04e5210..398fe36 100644 --- a/spec/utils_spec.lua +++ b/spec/utils_spec.lua @@ -36,4 +36,18 @@ describe("utils", function() assert.equal(0, utils.lcp("", "/abcd")) assert.equal(0, utils.lcp("a", "c")) end) + pending("is_digit()", function() + assert.is_true(utils.is_digit(string.byte("0"))) + assert.is_true(utils.is_digit(string.byte("1"))) + assert.is_true(utils.is_digit(string.byte("2"))) + assert.is_true(utils.is_digit(string.byte("3"))) + assert.is_true(utils.is_digit(string.byte("4"))) + assert.is_true(utils.is_digit(string.byte("5"))) + assert.is_true(utils.is_digit(string.byte("6"))) + assert.is_true(utils.is_digit(string.byte("7"))) + assert.is_true(utils.is_digit(string.byte("8"))) + assert.is_true(utils.is_digit(string.byte("9"))) + assert.is_false(utils.is_digit(string.byte("/"))) + assert.is_false(utils.is_digit(string.byte(":"))) + end) end) diff --git a/src/parser/parser.lua b/src/parser/parser.lua index 8815b9d..b23e527 100644 --- a/src/parser/parser.lua +++ b/src/parser/parser.lua @@ -12,7 +12,7 @@ local parsers = { function Parser.new(style) local parser = parsers[style] if not parser then - return nil, "invalid style: " .. style + return nil, "unknown parser style: " .. style end return parser.new() diff --git a/src/parser/style/default.lua b/src/parser/style/default.lua index cc5144d..7a04c3b 100644 --- a/src/parser/style/default.lua +++ b/src/parser/style/default.lua @@ -47,6 +47,7 @@ function _M:reset() self.anchor = 1 self.pos = 1 self.state = nil + self.bracket_depth = 0 end @@ -58,7 +59,8 @@ function _M:next() local char, token, token_type while self.pos <= self.path_n do char = byte(self.path, self.pos) - --print("pos: " .. self.pos .. "(" .. string.char(char) .. ")") + --local char_str = string.char(char) + --print("pos: " .. self.pos .. "(" .. char_str .. ")") if self.state == nil or self.state == STATES.static then if char == BYTE_LEFT_BRACKET then if self.state == STATES.static then @@ -67,12 +69,18 @@ function _M:next() self.anchor = self.pos end self.state = STATES.variable_start + self.bracket_depth = 1 else self.state = STATES.static end elseif self.state == STATES.variable_start then - if char == BYTE_RIGHT_BRACKET then - self.state = STATES.variable_end + if char == BYTE_LEFT_BRACKET then + self.bracket_depth = self.bracket_depth + 1 + elseif char == BYTE_RIGHT_BRACKET then + self.bracket_depth = self.bracket_depth - 1 + if self.bracket_depth == 0 then + self.state = STATES.variable_end + end end elseif self.state == STATES.variable_end then self.state = STATES.static @@ -93,6 +101,7 @@ function _M:next() return token, self.token_type(token) end + function _M:parse() self:reset() @@ -108,6 +117,7 @@ function _M:parse() return tokens end + function _M.token_type(token) if byte(token) == BYTE_LEFT_BRACKET and byte(token, #token) == BYTE_RIGHT_BRACKET then @@ -120,15 +130,39 @@ function _M.token_type(token) return TOKEN_TYPES.literal end -function _M.is_dynamic(path) - local patn_n = #path - for i = 1, patn_n do - local char = byte(path, i) - if char == BYTE_LEFT_BRACKET or char == BYTE_RIGHT_BRACKET then - return true + +local function parse_token_regex(token) + for i = 1, #token do + if byte(token, i) == BYTE_COLON then + return sub(token, i + 1, -2) end end - return false + return nil +end + + +-- compile path to regex pattern +function _M:compile_regex() + local tokens = { "^" } + + local token, token_type = self:next() + while token do + if token_type == TOKEN_TYPES.variable then + local pattern = parse_token_regex(token) or "[^/]+" + table.insert(tokens, pattern) + elseif token_type == TOKEN_TYPES.catchall then + table.insert(tokens, ".*") + else + -- quote the literal token + table.insert(tokens, "\\Q") + table.insert(tokens, token) + table.insert(tokens, "\\E") + end + token, token_type = self:next() + end + table.insert(tokens, "$") + + return table.concat(tokens) end function _M:params() @@ -240,4 +274,40 @@ function _M:bind_params(req_path, req_path_n, params, trailing_slash_mode) end end + +local function contains_regex(path) + local bracket_depth = 0 + + for i = 1, #path do + local char = byte(path, i) + if char == BYTE_LEFT_BRACKET then + bracket_depth = bracket_depth + 1 + elseif char == BYTE_RIGHT_BRACKET then + bracket_depth = bracket_depth - 1 + elseif char == BYTE_COLON and bracket_depth == 1 then + -- regex syntax {var:[^/]+} + -- return true only if the colon is in the first depth + return true + end + end + + return false +end + + +local function is_dynamic(path) + local patn_n = #path + for i = 1, patn_n do + local char = byte(path, i) + if char == BYTE_LEFT_BRACKET or char == BYTE_RIGHT_BRACKET then + return true + end + end + return false +end + + +_M.contains_regex = contains_regex +_M.is_dynamic = is_dynamic + return _M diff --git a/src/route.lua b/src/route.lua index 90b9f87..f6d9217 100644 --- a/src/route.lua +++ b/src/route.lua @@ -10,7 +10,7 @@ local Route = {} local mt = { __index = Route } -function Route.new(route, _) +function Route.new(route) if route.handler == nil then return nil, "handler must not be nil" end diff --git a/src/router.lua b/src/router.lua index 9054bbc..9c1ad92 100644 --- a/src/router.lua +++ b/src/router.lua @@ -15,6 +15,7 @@ local ipairs = ipairs local str_byte = string.byte local str_sub = string.sub local idx = constants.node_indexs +local regex_test = utils.regex_test local BYTE_SLASH = str_byte("/") local EMPTY = utils.readonly({}) @@ -22,6 +23,7 @@ local EMPTY = utils.readonly({}) local Router = {} local mt = { __index = Router } + local function add_route(self, path, route) local path_route = { path, route } local is_dynamic = self.parser.is_dynamic(path) @@ -57,6 +59,10 @@ local function add_route(self, path, route) return route1:compare(route2) end) end, self.parser) + + if self.parser.contains_regex(path) then + self.regexs[path] = self.parser:update(path):compile_regex() + end end @@ -85,28 +91,27 @@ function Router.new(routes, opts) local matcher, err = Matcher.new(options.matcher_names, options.matchers) if err then - return nil, err + return nil, "invalid args opts: " .. err end local self = { options = options, parser = Parser.new("default"), static = {}, + regexs = {}, + regexs_cache = {}, trie = Trie.new(), iterator = Iterator.new(options), matcher = matcher, } - local route_opts = { - parser = self.parser - } - for i, route in ipairs(routes or EMPTY) do local ok, err = self.matcher:process(route) if not ok then return nil, "unable to process route(index " .. i .. "): " .. err end - local route_t, err = Route.new(route, route_opts) + + local route_t, err = Route.new(route) if err then return nil, "invalid route(index " .. i .. "): " .. err end @@ -120,23 +125,26 @@ function Router.new(routes, opts) end -local function find_route(matcher, routes, ctx, matched) - if routes[0] == 1 then - local route = routes[1][2] - if matcher:match(route, ctx, matched) then - return route, routes[1][1] - end - return nil, nil - end - +local function find_route(self, path, routes, ctx, matched, evaluate_regex) for n = 1, routes[0] do + local route_path = routes[n][1] local route = routes[n][2] - if matcher:match(route, ctx, matched) then - return route, routes[n][1] + local regex_matched = true + if evaluate_regex then + local regex = self.regexs[route_path] + if regex then + regex_matched = regex_test(path, regex, self.regexs_cache) + end + end + if regex_matched and self.matcher:match(route, ctx, matched) then + if matched then + matched.path = route_path + end + return route, route_path end end - return nil, nil + return nil end @@ -155,15 +163,11 @@ function Router:match(path, ctx, params, matched) local trailing_slash_match = self.options.trailing_slash_match local matched_route, matched_path - local matcher = self.matcher local routes = self.static[path] if routes then - matched_route, matched_path = find_route(matcher, routes, ctx, matched) + matched_route, matched_path = find_route(self, path, routes, ctx, matched) if matched_route then - if matched then - matched.path = matched_path - end return matched_route.handler end end @@ -175,11 +179,8 @@ function Router:match(path, ctx, params, matched) routes = self.static[path .. "/"] end if routes then - matched_route, matched_path = find_route(matcher, routes, ctx, matched) + matched_route, matched_path = find_route(self, path, routes, ctx, matched) if matched_route then - if matched then - matched.path = matched_path - end return matched_route.handler end end @@ -193,11 +194,8 @@ function Router:match(path, ctx, params, matched) local values, count = self.iterator:find(node, state_path, state_path_n) if values then for n = count, 1, -1 do - matched_route, matched_path = find_route(matcher, values[n], ctx, matched) + matched_route, matched_path = find_route(self, path, values[n], ctx, matched, true) if matched_route then - if matched then - matched.path = matched_path - end break end end diff --git a/src/utils.lua b/src/utils.lua index f6b083a..b976b68 100644 --- a/src/utils.lua +++ b/src/utils.lua @@ -1,11 +1,14 @@ +--- + local str_byte = string.byte local math_min = math.min local type = type local is_luajit = type(_G.jit) == "table" + +--- clear a table local clear_table -local new_table do local ok ok, clear_table = pcall(require, "table.clear") @@ -17,9 +20,16 @@ do end end end +end + + +--- allocate a pre-sized table +local new_table +do + local ok ok, new_table = pcall(require, "table.new") if not ok then - new_table = function(narr , nrec) + new_table = function(narr, nrec) return {} end end @@ -123,6 +133,9 @@ local function lcp(str1, str2) return n end +--local function is_digit(char) +-- return char >= 48 and char <= 57 +--end local function readonly(t) return setmetatable(t, { @@ -130,6 +143,29 @@ local function readonly(t) }) end +local regex_test +do + if ngx and ngx.re then + local ngx_re_find = ngx.re.find + regex_test = function(str, regex) + local from, to = ngx_re_find(str, regex, "jo") + return from == 0 and to == #str + end + else + local lrex = require "rex_pcre2" + regex_test = function(str, regex, cache) + local compiled = cache[regex] + if not compiled then + compiled = lrex.new(regex) + compiled:jit_compile() + cache[regex] = compiled + end + local from, to = compiled:find(str) + return from == 1 and to == #str + end + end +end + return { lcp = lcp, @@ -139,4 +175,6 @@ return { new_table = new_table, is_luajit = is_luajit, readonly = readonly, + --is_digit = is_digit, + regex_test = regex_test, }