Skip to content

Commit

Permalink
Search: no more single quotes, fix backslash escaping, support \* (fix
Browse files Browse the repository at this point in the history
  • Loading branch information
edemaine committed Nov 21, 2023
1 parent f4c6e09 commit 65e8605
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 15 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ instead of version numbers.

* Only cluster by leading tags in the sort order, so if you click to sort by
e.g. Updated then there's no longer awkward clustering by repeated tags.
* Search no longer supports quoting phrases with single quotes, so that it's
easier to search for possessives (e.g. `Erik's`).
[[#638](https://github.com/edemaine/coauthor/issues/638)]
* Fix escaping search queries with backslash: `\:`, `\"`, `\|`, `\(`, `\)`, `\\`
* Newly allow escaping of `*` with `\*`
[[#29](https://github.com/edemaine/coauthor/issues/29)]

## 2023-11-17

Expand Down
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -200,11 +200,13 @@ To see what's changed in Coauthor recently, check out the
* Negative match with minus sign
(e.g., `-word` excludes documents with whole `word`).
* Search for a regular expression via `regex:...`.
* Use quotes (`'...'` or `"..."`) to search for phrases or `regex:"..."`
* Use double quotes (`"..."`) to search for phrases or `regex:"..."`
to search for regular expressions with spaces in them; normally,
spaces act as an AND query.
* Connect words/phrases with `|` to get an OR query instead.
* Use parentheses to mix AND and OR arbitrarily, e.g. `always (this | that)`.
* Escape special characters with backslash:
`\:`, `\"`, `\|`, `\(`, `\)`, `\\`, `\*`.
* `by:username` searches for messages coauthored by a specified username
(which can include `*`s or use regular expressions via `regex:`);
`by:me` is shorthand for searching for your own username.
Expand Down
27 changes: 13 additions & 14 deletions lib/search.coffee
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,11 @@ unescapeRegExp = (regex) ->
export parseSearch = (search, group) ->
## Quoted strings turn off separation by spaces.
## Last quoted strings doesn't have to be terminated.
tokenRe = /(\s+)|((?:"[^"]*"|'[^']*'|[^'"\s\|\(\)])+)('[^']*$|"[^"]*$)?|'([^']*)$|"([^"]*)$|([\|\(\)])/g
tokenRe = ///
(\s+) |
([\|\(\)]) |
( (?: " (?:[^"\\]|\\.)* (?:"|$) | [^"\s\|\(\)\\] | \\. )+ )
///g
wants = [] # array of Mongo queries that will be $and'd together
options = [wants] # array of wants that will be $or'd together
stack = [] # stack of strict-ancestor options objects
Expand Down Expand Up @@ -63,8 +67,8 @@ export parseSearch = (search, group) ->
while (token = tokenRe.exec search)?
continue if token[1] ## ignore whitespace tokens

if token[6] # top-level grouping operators
switch token[6]
if token[2] # top-level grouping operators
switch token[2]
when '|' # OR
options.push wants = []
when '(' # start group
Expand All @@ -84,19 +88,12 @@ export parseSearch = (search, group) ->
colon = /^-?(?:(?:regex|title|body|tag|emoji|by|root|is|isnt|not):)*/.exec token[0]
colon = colon[0]
## Remove quotes (which are just used for avoiding space parsing).
if token[4]
token = token[4] ## unterminated initial '
else if token[5]
token = token[5] ## unterminated initial "
else
token = (token[2].replace /"([^"]|\\")*"|'([^']|\\')*'/g, (match) ->
match[1...match.length-1]
) + (token[3] ? '')[1..]
token = token[3].replace /(^|[^\\](?:\\\\)*)"((?:[^"\\]|\\[^])*)(?:"|$)/g, "$1$2"
## Remove leading colon part if we found one.
## (Can't have had quotes or escapes.)
token = token[colon.length..]
## Remove escapes.
token = token.replace /\\([:'"\\])/g, '$1'
token = token.replace /\\([:"\|\(\)\\])/g, '$1'
## Construct regex for token
regexMode = 0 <= colon.indexOf 'regex:'
colon = colon.replace /regex:/g, '' if regexMode
Expand All @@ -109,14 +106,16 @@ export parseSearch = (search, group) ->
if starStart
word = word[1..]
return unless word
starEnd = word[word.length-1] == '*'
starEnd = /[^\\]\*$/.test word
if starEnd
word = word[0...word.length-1]
return unless word
regex = escapeRegExp word
## Outside regex mode, lower-case letters are case-insensitive
regex = caseInsensitiveRegExp regex
regex = regex.replace /\\\*/g, '\\S*' ## * was already escaped
regex = regex
.replace /(^|[^\\])\\\*/g, '$1\\S*' # * in input becomes singly escaped
.replace /\\\\\\\*/g, '\\*' # \* in input becomes doubly escaped
if not starStart and regex.match /^[\[\w]/ ## a or [aA]
regex = "\\b#{regex}"
if not starEnd and regex.match /[\w\]]$/ ## a or [aA]
Expand Down

0 comments on commit 65e8605

Please sign in to comment.