-
Notifications
You must be signed in to change notification settings - Fork 2
/
dix.el
2676 lines (2407 loc) · 102 KB
/
dix.el
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
;;; dix.el --- Apertium XML editing minor mode -*- lexical-binding: t -*-
;; Copyright (C) 2009-2023 Kevin Brubeck Unhammer
;; Author: Kevin Brubeck Unhammer <[email protected]>
;; Version: 0.4.1
;; Url: http://wiki.apertium.org/wiki/Emacs
;; Keywords: languages
;; Package-Requires: ((cl-lib "0.5") (emacs "26.2"))
;; This file is not part of GNU Emacs.
;; This program is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.
;;
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with this program. If not, see <http://www.gnu.org/licenses/>.
;;; Commentary:
;; Basic usage:
;;
;; (add-hook 'nxml-mode-hook #'dix-on-nxml-mode)
;;
;; Unless you installed from MELPA, you'll also need
;;
;; (add-to-list 'load-path "/path/to/dix.el-folder")
;; (autoload 'dix-mode "dix"
;; "dix-mode is a minor mode for editing Apertium XML dictionary files." t)
;;
;; If you actually work on Apertium packages, you'll probaby want some
;; other related Emacs extensions as well; see
;; http://wiki.apertium.org/wiki/Emacs#Quickstart for an init file
;; that installs and configures both dix.el and some related packages.
;; Optional dependencies:
;; * `strie' – for the `dix-guess-pardef' function
;; If you want keybindings that use `C-c' followed by letters, you
;; should also add
;; (add-hook 'dix-mode-hook #'dix-C-c-letter-keybindings)
;; These are not turned on by default, since `C-c' followed by letters
;; is meant to be reserved for user preferences.
;; Useful functions (some using C-c-letter-keybindings): `C-c <left>'
;; creates an LR-restricted copy of the <e>-element at point, `C-c
;; <right>' an RL-restricted one. `C-TAB' cycles through the
;; restriction possibilities (LR, RL, none), while `M-n' and `M-p'
;; move to the next and previous "important bits" of <e>-elements
;; (just try it!). `C-c S' sorts a pardef, while `M-.' moves point
;; to the pardef of the entry at point, leaving mark where you left
;; from (`M-.' will go back). `C-c \' greps the pardef/word at point
;; using the dictionary files represented by the string
;; `dix-dixfiles', while `C-c D' gives you a list of all pardefs which
;; use these suffixes (where a suffix is the contents of an
;; <l>-element).
;; `M-x dix-suffix-sort' is a general function, useful outside of dix
;; XML files too, that just reverses each line, sorts them, and
;; reverses them back. `C-c % %' is a convenience function for
;; regexp-replacing text within certain XML elements, eg. all <e>
;; elements; `C-c % r' and `C-c % l' are specifically for <r> and <l>
;; elements, respectively.
;; I like having the following set too:
;; (setq nxml-sexp-element-flag t ; treat <e>...</e> as a sexp
;; nxml-completion-hook '(rng-complete t) ; C-RET completes based on DTD
;; rng-nxml-auto-validate-flag nil) ; 8MB of XML takes a while
;; You can always turn on validation again with C-c C-v. Validation
;; is necessary for the C-RET completion, which is really handy in
;; transfer files.
;; I haven't bothered with defining a real indentation function, but
;; if you like having all <i> elements aligned at eg. column 25, the
;; align rules defined here let you do M-x align on a region to
;; achieve that, and also aligns <p> and <r>. Set your favorite
;; column numbers with M-x customize-group RET dix.
;; Plan / long term TODO:
;; - Yank into <i/l/r> or pardef n="" should replace spaces with either
;; a <b/> or a _
;; - Functions shouldn't modify the kill-ring.
;; - Functions should be agnostic to formatting (ie. only use nxml
;; movement functions, never forward-line).
;; - Real indentation function for one-entry-one-line format.
;; - `dix-LR-restriction-copy' should work on a region of <e>'s.
;; - `dix-expand-lemma-at-point' (either using `dix-goto-pardef' or
;; `lt-expand')
;; - Some sort of interactive view of the translation process . When
;; looking at a word in monodix, you should easily get confirmation on
;; whether (and what) it is in the bidix or other monodix (possibly
;; just using `apertium-transfer' and `lt-proc' on the expanded
;; paradigm).
;; - Function for creating a prelimenary list of bidix entries from
;; monodix entries, and preferably from two such lists which
;; we "paste" side-by-side.
;; - `dix-LR-restriction-copy' (and the other copy functions) could
;; add a="author"
;; - `dix-dixfiles' could auto-add files from Makefile?
;; - `dix-sort-e-by-r' doesn't work if there's an <re> element after
;; the <r>; and doesn't sort correctly by <l>-element, possibly to
;; do with spaces
;; - `dix-reverse' should be able to reverse on a regexp match, so
;; that we can do `dix-suffix-sort' by eg. <l>-elements.
;; - Investigate if Emacs built-in `tildify-mode' should be used to
;; implement `dix-space'.
;;; Code:
(defconst dix-version "0.4.1")
(require 'nxml-mode)
(require 'cl-lib)
(require 'easymenu)
(require 'subr-x)
(eval-when-compile (require 'align))
;;;============================================================================
;;;
;;; Define the formal stuff for a minor mode named dix.
;;;
(defvar dix-mode-map (make-sparse-keymap)
"Keymap for dix minor mode.")
(defvar dix-mode-syntax-table
(let ((st (copy-syntax-table nxml-mode-syntax-table)))
(modify-syntax-entry ?< "(" st)
(modify-syntax-entry ?> ")" st)
(modify-syntax-entry ?@ "_" st)
(modify-syntax-entry ?: "_" st)
(modify-syntax-entry ?. "_" st)
st)
"Syntax table for dix minor mode.")
(defgroup dix nil
"Minor mode for editing Apertium XML dictionaries."
:tag "Apertium dix"
:group 'nxml)
;;;###autoload
(define-minor-mode dix-mode
"Toggle dix-mode.
With arg, turn on dix-mode if and only if arg is positive.
dix-mode is a minor mode for editing Apertium XML dictionary files.
KEY BINDINGS
------------
\\{dix-mode-map}
Entering dix-mode calls the hook dix-mode-hook.
------------------------------------------------------------------------------"
:init-value nil
:lighter " dix"
:keymap dix-mode-map
:require nxml-mode
(when (member (file-name-extension (buffer-file-name)) '("dix" "metadix"))
(font-lock-add-keywords nil
'(("<[lr]>\\(?:[^<]\\|<b/>\\)*\\( \\)"
. (progn ; based on rng-mark-error
(dix-mark-error "Use <b/> instead of literal space"
(match-beginning 1)
(match-end 1))
nil)))))
(when (dix-is-transfer)
(font-lock-add-keywords nil
'(("<lit-tag v=\"\"/>"
. (progn ; based on rng-mark-error
(dix-mark-error "Use lit instead of lit-tag to match empty strings"
(match-beginning 0)
(match-end 0))
nil)))))
(when (dix-is-lrx)
(font-lock-add-keywords nil
'(("<match[^>]*\\(></match>\\)"
. (progn ; based on rng-mark-error
(dix-mark-error "Use /> instead of ></match>"
(match-beginning 1)
(match-end 1))
nil)))))
(set-syntax-table dix-mode-syntax-table)
(dix-imenu-setup))
(defvar dix-file-name-patterns
"\\.\\(meta\\|multi\\)?dix$\\|\\.t[0-9s]x$\\|\\.l[sr]x$\\|\\.metalrx$\\|/modes\\.xml$\\|/cross-model\\.xml$")
(defun dix-on-nxml-mode ()
"Turn on dix-mode if suitable dix file extension.
Usage: (add-hook 'nxml-mode-hook #'dix-on-nxml-mode)."
(when (and (buffer-file-name)
(string-match dix-file-name-patterns buffer-file-name))
(modify-syntax-entry ?> ")<" nxml-mode-syntax-table)
(modify-syntax-entry ?< "(>" nxml-mode-syntax-table)
(dix-mode 1)))
(defun dix-mark-error (message beg end)
"Create an error overlay with the dix-error category.
MESSAGE, BEG and END as in `rng-mark-error'."
(let ((overlay
(make-overlay beg end nil t
(= beg end))))
(overlay-put overlay 'priority beg)
(overlay-put overlay 'category 'dix-error)
(overlay-put overlay 'help-echo message)))
(put 'dix-error 'face 'rng-error)
;;;============================================================================
;;;
;;; Menu
;;;
(easy-menu-define dix-mode-easy-menu dix-mode-map "dix-mode menu"
'("dix"
["View pardef" dix-view-pardef
:help "View the pardef in another window"]
["Go to pardef" dix-goto-pardef]
("Guess pardef of the word on this line..."
:help "Write a single word on a line, place point somewhere inside the word, and this will guess the pardef using the above entries."
["with no PoS restriction" dix-guess-pardef
:help "Write a single word on a line, place point somewhere inside the word, and this will guess the pardef using the above entries."])
"---"
["Sort pardef" dix-sort-pardef
:help "Must be called from within a pardef"]
["Grep for this pardef in dix-dixfiles" dix-grep-all
:help "Must be called from within a pardef. Uses the variable dix-dixfiles"]
["Show Duplicate pardefs" dix-find-duplicate-pardefs
:help "Must be called from within a pardef. Calculate must have been called at least once"]
["Calculate and Show Duplicate pardefs" (dix-find-duplicate-pardefs 'recompile)
:keys "C-u C-c D"
:help "Must be called from within a pardef. Slower, but must be called at least once before showing duplicate pardefs"]
"---"
["Narrow Buffer to Given sdef" dix-narrow-to-sdef
:help "Show only that part of the buffer which contains a given sdef, eg. work only on nouns for a while. Widen with `C-x n w' as per usual."]
"---"
["Change Restriction of <e> (LR, RL, none)" dix-restriction-cycle]
["Go to Next Useful Position in the Buffer" dix-next]
["Go to Previous Useful Position in the Buffer" dix-previous]
("Replace Regexp Within..."
["Certain Elements" dix-replace-regexp-within-elt
:help "Prompts for an element name"]
["<l> Elements" dix-replace-regexp-within-l]
["<r> Elements" dix-replace-regexp-within-r])
("Copy <e> and..."
["Keep Contents" dix-copy
:help "Make a copy of the current <e> element"]
["Apply an LR Restriction" dix-LR-restriction-copy
:help "Make a copy of the current <e> element"]
["Apply an RL Restriction" dix-RL-restriction-copy
:help "Make a copy of the current <e> element"]
["Clear Contents" (dix-copy 'remove-lex)
:keys "C-u C-c C"
:help "Make a copy of the current <e> element"]
["Prepend kill-buffer into lm and <i>" dix-copy-yank
:help "Make a copy of the current <e> element"])
["Turn one-word-per-line into XML using above <e> as template" dix-xmlise-using-above-elt
:help "Write one word (or colon-separated word-pair) per line, then use the above <e> as a template to turn them into XML"]
["I-search Within lm's (rather buggy)" dix-word-search-forward]
"---"
["Go to transfer rule number" dix-goto-rule-number]
"---"
["Customize dix-mode" (customize-group 'dix)]
["Help for dix-mode" (describe-function 'dix-mode)
:keys "C-h m"]
["Show dix-mode Version" (message "dix-mode version %s" dix-version)]))
;;;============================================================================
;;;
;;; Helpers
;;;
(defmacro dix-with-sexp (&rest body)
"Execute `BODY' with `nxml-sexp-element-flag' set to true."
(declare (indent 1) (debug t))
`(let ((old-sexp-element-flag nxml-sexp-element-flag))
(setq nxml-sexp-element-flag t)
(let ((ret ,@body))
(setq nxml-sexp-element-flag old-sexp-element-flag)
ret)))
(defmacro dix-with-no-case-fold (&rest body)
"Execute `BODY' with `case-fold-search' set to nil."
(declare (indent 1) (debug t))
`(let ((old-case-fold-search case-fold-search))
(setq case-fold-search nil)
,@body
(setq case-fold-search old-case-fold-search)))
(defun dix--completing-read (&rest args)
"Call `dix-completing-read-function' on ARGS."
(apply dix-completing-read-function args))
(defvar dix-parse-bound 10000
"Max amount of chars (not lines) to parse through in dix xml operations.
Useful since dix tend to get huge. Relative bound. Decrease the
number if operations ending in \"No parent element\" take too
long.")
(put 'dix-bound-error 'error-conditions '(error dix-parse-error dix-bound-error))
(put 'dix-bound-error 'error-message "Hit `dix-parse-bound' when parsing")
(put 'dix-barrier-error 'error-conditions '(error dix-parse-error dix-barrier-error))
(put 'dix-barrier-error 'error-message "Hit barrier when parsing")
(defun dix-backward-up-element (&optional arg bound)
"Modified from `nxml-backward-up-element' to include a search boundary.
Optional argument ARG says how many elements to move; won't go
past buffer position BOUND."
(interactive "p")
(or arg (setq arg 1))
(if (< arg 0)
(nxml-up-element (- arg))
(condition-case err
(while (and (> arg 0)
(< (point-min) (point)))
(let ((token-end (nxml-token-before)))
(goto-char (cond ((or (memq xmltok-type '(start-tag
partial-start-tag))
(and (memq xmltok-type
'(empty-element
partial-empty-element))
(< (point) token-end)))
xmltok-start)
((nxml-scan-element-backward
(if (and (eq xmltok-type 'end-tag)
(= (point) token-end))
token-end
xmltok-start)
t
bound)
xmltok-start)
(t (signal 'dix-bound-error "No parent element")))))
(setq arg (1- arg)))
(nxml-scan-error
(goto-char (cadr err))
(apply 'error (cddr err))))))
(defun dix-up-to (eltname &optional barrier)
"Move point to start of element `ELTNAME' (a string, eg. \"e\")
which we're looking at. Optional `BARRIER' is the outer element,
so we don't go all the way through the file looking for our
element (ultimately constrained by the variable
`dix-parse-bound'). Ideally `dix-backward-up-element' should
stop on finding another `ELTNAME' element."
(nxml-token-after)
(when (eq xmltok-type 'space)
(goto-char (1+ (nxml-token-after)))
(nxml-token-after))
(goto-char xmltok-start)
(let ((tok (xmltok-start-tag-qname))
(bound (max (point-min)
(- (point) dix-parse-bound))))
(while (not (or (equal tok eltname)
(equal tok barrier)
(equal tok (concat "<" eltname))))
(dix-backward-up-element 1 bound)
(nxml-token-after)
(setq tok (xmltok-start-tag-qname)))
(if (equal tok barrier)
(signal 'dix-barrier-error (format "Didn't find %s" eltname)))))
(defvar dix-transfer-entities
'((condition "and" "or" "not" "equal" "begins-with" "begins-with-list" "ends-with" "ends-with-list" "contains-substring" "in")
(container "var" "clip")
(sentence "let" "out" "choose" "modify-case" "call-macro" "append" "reject-current-rule")
(value "b" "clip" "lit" "lit-tag" "var" "get-case-from" "case-of" "concat" "lu" "mlu" "chunk")
(stringvalue "clip" "lit" "var" "get-case-from" "case-of")
(choice "when" "otherwise"))
"From transfer.dtd; interchunk/postchunk TODO.")
(defvar dix-transfer-elements
'((def-macro sentence)
(action sentence)
(when sentence)
(otherwise sentence)
(test condition)
(and condition)
(or condition)
(not condition)
(equal value)
(begins-with value)
(ends-with value)
(begins-with-list value)
(ends-with-list value)
(contains-substring value)
(in value)
(let value container)
(append value)
(modify-case stringvalue container)
(concat value)
(lu value)
(tag value)
(choose choice))
"From transfer.dtd; interchunk/postchunk TODO.")
(defun dix-transfer-allowed-children (parent)
"Return a list of strings of allowed child elts of PARENT."
(let* ((parent (if (stringp parent) (intern parent) parent))
(ent-types (cdr (assoc parent dix-transfer-elements))))
(cl-reduce #'append
(mapcar (lambda (type) (cdr (assoc type dix-transfer-entities)))
ent-types))))
(defun dix-transfer-enclosing-allows (child)
"Answer if the element we're inside can contain CHILD in a transfer file."
(let ((parent (dix-enclosing-elt 'noerror)))
(and parent
(member child (dix-transfer-allowed-children parent)))))
(defun dix-enclosing-is-mono-section ()
"Heuristically answer if the element we're inside is (monolingual) <section>.
A `dix-enclosing-elt' from outside an <e> in a <section> will
often hit `dix-parse-bound', in which case we just search back
for some hints."
(let ((elt (dix-enclosing-elt 'noerror)))
(or (and elt (equal elt "section"))
(save-excursion
(and (re-search-backward " lm=\"\\|<pardef\\|</section>" nil 'noerror)
(equal " lm=\"" (match-string 0)))))))
(defun dix-enclosing-elt-helper (bound)
"Get the qname of the enclosing element.
Will error if we don't find anything before the buffer position
BOUND."
(dix-backward-up-element 1 bound)
(nxml-token-after)
(xmltok-start-tag-qname))
(defun dix-enclosing-elt (&optional noerror)
"Return name of element we're in.
Optional argument NOERROR will make parse bound errors return
nil."
(let ((bound (max (point-min)
(- (point) dix-parse-bound))))
(save-excursion
(if noerror
(condition-case nil
(dix-enclosing-elt-helper bound)
(dix-bound-error nil))
(dix-enclosing-elt-helper bound)))))
(defun dix-pardef-at-point (&optional clean)
"Give the name of the pardef we're in.
Optional argument CLEAN removes trailing __n and such."
(save-excursion
(dix-up-to "pardef" "pardefs")
(re-search-forward "n=\"" nil t)
(let ((pardef (symbol-name (symbol-at-point))))
(if clean (replace-regexp-in-string
"\\([^/_]*\\)/?\\([^/_]*\\)__.*"
"\\1\\2"
pardef)
pardef))))
(defun dix-lemma-at-point ()
"Find the nearest lm attribute of this e element.
In a bidix, gives the contents of nearest of l/r."
;; TODO: handle <b>'s and <g>'s correctly, skipping <s>'s
(if (dix-is-bidix)
(dix-l/r-word-at-point) ;; bidix
(save-excursion ;; monodix
(dix-up-to "e" "section")
(re-search-forward "lm=\"\\([^\"]*\\)" nil t)
(match-string-no-properties 1))))
(defun dix-i-at-point ()
"Find the nearest i element of this e."
;; TODO less roundabout
(let ((rs (dix-split-root-suffix)))
(concat (car rs) (cdr rs))))
(defun dix-par-at-point ()
"Find the nearest par element of this e."
(save-excursion
(dix-up-to "e" "section")
(re-search-forward "<par[^/>]*n=\"\\([^\"]*\\)" nil t)
(match-string-no-properties 1)))
(defun dix-pardef-suggest-at-point ()
"Return a list of pardef names for suggestions.
First we look in the context around point (up to
`dix-parse-bound' in both directions), then append the full list
from <pardefs>. Tries to be fast, so no actual XML parsing,
meaning commented out pardefs may be suggested as well."
(save-restriction
(widen)
(let* ((par-rex "<par [^>]*n=['\"]\\([^'\"> ]+\\)")
(pardef-rex "<pardef [^>]*n=['\"]\\([^'\"> ]+\\)")
(pardefs-end (or (save-excursion
(re-search-backward "</pardefs>" nil 'noerror))
(point-min)))
(bound-above (max pardefs-end
(- (point) dix-parse-bound)))
(bound-below (min (+ (point) dix-parse-bound)
(point-max)))
pdnames)
(save-excursion
(while (re-search-backward par-rex bound-above 'noerror)
(cl-pushnew (match-string-no-properties 1) pdnames :test #'equal)))
(save-excursion
(while (re-search-forward par-rex bound-below 'noerror)
(cl-pushnew (match-string-no-properties 1) pdnames :test #'equal)))
(save-excursion
(goto-char pardefs-end)
(while (re-search-backward pardef-rex nil 'noerror)
(cl-pushnew (match-string-no-properties 1) pdnames :test #'equal)))
(nreverse pdnames))))
(defun dix-pardef-suggest-for (lemma)
"Return a list of pardef names to suggest for `LEMMA'.
Names used near point are prioritised, and names marked for
lemma-suffixes that don't match the suffix of the lemma (e.g.
pardef \"foo/er__verb\" when the lemma is \"fooable\") are
filtered out."
(cl-remove-if-not (lambda (par)
(if (string-match "/\\([^_]+\\)_" par)
(string-match (concat (match-string 1 par) "$") lemma)
'no-slash-so-match-all))
(dix-pardef-suggest-at-point)))
(defun dix-pardef-type-of-e ()
"Give the part following `__' in a pardef name, or nil."
(let ((par (dix-par-at-point)))
(when (string-match "[^_]*__\\([^\"]*\\)" par)
(match-string-no-properties 1 par))))
(defun dix-split-root-suffix ()
"Give a pair of the <i>-contents and pardef <r>-contents.
The pardef <r>-contents are guessed by the letters following the
slash of the pardef. Does not give the correct root of it's not
all contained within an <i> (eg. lemma pardefs will give wrong
roots)."
(save-excursion
(dix-up-to "e" "section")
(let ((e-end (nxml-scan-element-forward (point))))
(nxml-down-element 2)
(cons (symbol-name (dix-with-sexp (sexp-at-point)))
(progn
(nxml-up-element)
(when (re-search-forward "n=\"[^/]*/\\([^_\"]*\\)[^\"]*\"" e-end 'noerror)
(match-string-no-properties 1)))))))
(defun dix-get-attrib (attributes name)
"Look in list ATTRIBUTES for one with name NAME (a string).
Assumes ATTRIBUTES of the same format as `xmltok-attributes'.
Return nil if no such attribute is found."
(if attributes
(if (equal name (buffer-substring-no-properties
(xmltok-attribute-name-start (car attributes))
(xmltok-attribute-name-end (car attributes))))
(car attributes)
(dix-get-attrib (cdr attributes) name))))
(defun dix-attrib-start (attributes name)
"Look in ATTRIBUTES for start position of attribute NAME, or nil if no such.
Assumes ATTRIBUTES is of the format of `xmltok-attributes'."
(let ((attrib (dix-get-attrib attributes name)))
(when attrib (xmltok-attribute-value-start attrib))))
(defvar dix-interesting
'(;; dix:
("clip" "pos" "side" "part")
("e" "lm" "r" "c")
("par" "n")
("section" "id" "type")
("pardef" "n")
("s" "n")
;; transfer:
("sdef" "n")
("b" "pos")
("with-param" "pos")
("call-macro" "n")
("def-macro" "n" "npar")
("cat-item" "lemma" "tags" "name")
("attr-item" "lemma" "tags")
("list-item" "v")
("list" "n")
("def-attr" "n")
("def-cat" "n")
("def-list" "n")
("def-var" "n")
("pattern-item" "n")
("chunk" "name" "case" "namefrom")
("var" "n")
("lit" "v")
("lit-tag" "v")
;; modes:
("pipeline")
("mode" "name" "install")
("program" "name")
("file" "name")
;; lrx:
("match" "lemma" "tags")
("select" "lemma" "tags")
("seq" "n")
("def-seq" "n")
;; cross-model:
("cross-action" "id" "a")
("v" "n")
("t" "n")
;; tsx:
("def-label" "name" "closed")
("def-mult" "name")
("tags-item" "tags" "lemma")
("label-item" "label")
("tagger" "name"))
"Association list of elements and which attributes are considered interesting.
Used by `dix-next'.")
(defvar dix-skip-empty
'("dictionary" "alphabet" "sdefs" "pardefs" "lu" "p" "e" "tags" "chunk" "tag" "pattern" "rule" "action" "out" "b" "def-macro" "choose" "when" "test" "equal" "not" "otherwise" "let" "forbid" "label-sequence" "tagset")
"Skip past these elements when using `dix-next'.
They'll not be skipped if they have interesting attributes as defined by
`dix-interesting', however.")
;;; TODO: skip <[lr]><g><b/> and go to nearest CDATA in e.g. <l><g><b/>for</g></l>
(defmacro dix-filter (pred lst)
"Test PRED on each elt of LST, removing non-true values."
`(delq nil
(mapcar (lambda (elt) (when (funcall ,pred elt) elt))
,lst)))
(defun dix-nearest (pivot backward &rest args)
"Find the element numerically nearest PIVOT.
If BACKWARD, we we want only elements of ARGS that are lower than
PIVOT, otherwise only higher."
(let ((cmp (if backward '< '>))
(nearest (if backward 'max 'min)))
(let ((OK (dix-filter (lambda (x) (and x (funcall cmp x pivot)))
args)))
(when OK (apply nearest OK)))))
(defun dix-nearest-interesting (attributes pivot backward interest)
"Find the nearest \"interesting\" element.
This will return the position of the nearest member of list
INTEREST which is also a member of ATTRIBUTES (in the format of
`xmltok-attributes') but not crossing PIVOT. If BACKWARD, we we
want only elements of ARGS that are lower than PIVOT, otherwise
only higher."
(apply 'dix-nearest pivot backward
(mapcar (lambda (attname)
(dix-attrib-start attributes attname))
interest)))
(defun dix-next-one (&optional backward)
"Move forward one interesting element.
Helper for `dix-next' (move back if BACKWARD non-nil).
TODO: handle pardef entries too; make non-recursive."
(cl-flet ((move (spot)
(if (if backward (< spot (point)) (> spot (point)))
(goto-char spot)
(progn (forward-char (if backward -1 1))
(dix-next-one backward)))))
(let* ((token-end (nxml-token-before))
(token-next (if backward
xmltok-start
(1+ token-end)))
(qname (xmltok-start-tag-qname))
(interest (cdr (assoc qname dix-interesting)))
(near-int (dix-nearest-interesting xmltok-attributes
(point)
backward
interest)))
(cond ((eq (point) (if backward (point-min) (point-max)))
t)
((memq xmltok-type '(prolog comment))
(goto-char token-next)
(dix-next-one backward))
(near-int ; interesting attribute
(move near-int)) ; to go to
((or interest ; interesting element but no next interesting attribute
(member qname dix-skip-empty)) ; skip if empty
(move token-next)
(dix-next-one backward))
((memq xmltok-type '(space data end-tag))
(and (goto-char token-next)
(not (and backward ; need to goto these elts from data
(nxml-token-before) ; before looping on:
(member (xmltok-start-tag-qname) '("r" "l" "i"))))
(dix-next-one backward)))
;; TODO: should instead while-loop until the next member of
;; dix-interesting, or maybe the default should be to go to
;; the next _attribute_, whatever it is?
(t (move token-end))))))
(defun dix-compile-suffix-map (partype)
"Build a hash map where keys are sorted lists of suffixes in
pardefs, eg. '(\"en\" \"ing\" \"s\"), and the value is a list of
the pardef names containing these suffixes.
Argument PARTYPE is eg. adj, vblex, vblex_adj, ..., and is the
string following \"__\", thus assumes you keep to the Apertium
standard. Also assumes there is no \"_\" before \"__\" in pardef
names."
(let ((suffmap (make-hash-table :test 'equal)))
(save-excursion
(goto-char (point-min))
;; find all pardefs of `partype' in the file:
(while (re-search-forward
(concat "pardef[^n>]*n=\"\\([^\"]*__" partype "\\)\"") nil 'noerror)
(let ((pardef (match-string-no-properties 1))
(sufflist (dix-compile-sorted-suffix-list)))
(puthash sufflist
(cons pardef (gethash sufflist suffmap)) suffmap))))
suffmap))
(defvar dix-suffix-maps nil
"Internal association list used to store compiled suffix maps;
keys are symbols formed from the string `partype' (see
`dix-compile-suffix-map' and interactive function
`dix-find-duplicate-pardefs').")
(make-variable-buffer-local 'dix-suffix-maps)
(defun dix-get-pardefs (sufflist suffmap)
"Get the list of pardefs in SUFFMAP which have the list of suffixes SUFFLIST.
See `dix-compile-suffix-map' for more information."
(gethash (sort sufflist 'string-lessp) suffmap))
(defun dix-compile-sorted-suffix-list ()
"Make lookup keys for `dix-compile-suffix-map' and `dix-get-pardefs'."
(save-excursion
(let (sufflist)
(condition-case nil
(progn (dix-up-to "pardef" "pardefs"))
(dix-parse-error (dix-goto-pardef)))
;; find all suffixes within this pardef:
(let ((end (save-excursion (dix-with-sexp (forward-sexp))
(point))))
(while (re-search-forward "<l>\\([^<]*\\)</l>" end 'noerror)
(when (match-string 1)
(setq sufflist (cons (match-string-no-properties 1) sufflist)))))
(sort sufflist 'string-lessp))))
(defun dix-assoc-delete-all (key alist)
"Delete all instances of KEY in ALIST.
Returns a copy (does not modify the original list)."
(if alist
(if (equal (caar alist) key)
(dix-assoc-delete-all key (cdr alist))
(cons (car alist)
(dix-assoc-delete-all key (cdr alist))))))
(defun dix-invert-alist (a)
"Invert the alist A so values become keys and keys values.
Values should of course be unique. The new values will lists."
(apply #'append
(mapcar (lambda (entry)
(mapcar (lambda (c) (list c (car entry)))
(cdr entry)))
a)))
;;;============================================================================
;;;
;;; Schemas / validation
;;;
(defcustom dix-schema-locating-files nil
"List of schema locating files.
Used by `dix-schema' to populate `rng-schema-locating-files'.
If nil, a default schema will be added."
:type '(repeat file)
:group 'dix)
(defun dix-schemas ()
"Add default Apertium schemas.xml to locating rules.
If possible, adds rules for files installed through package
manager, falling back to files installed using 'sudo make
install'.
To override, copy the schemas.xml file distributed with dix.el,
edit the paths, and add the path to the list
`dix-schema-locating-files'."
(if dix-schema-locating-files
(setq rng-schema-locating-files (append dix-schema-locating-files
rng-schema-locating-files))
(let ((source-dir (file-name-directory
(concat ; nil => empty string
(find-lisp-object-file-name #'dix-schemas nil))))
(rulefile (if (file-exists-p "/usr/share/lttoolbox/dix.rnc")
"schemas.xml"
"local-schemas.xml")))
(add-to-list 'rng-schema-locating-files (concat source-dir rulefile)))))
(add-hook 'dix-load-hook #'dix-schemas)
;;;============================================================================
;;;
;;; Alignment
;;;
(defcustom dix-rp-align-column 28
"Column to align pardef <r> elements to with `align'."
:type 'integer
:group 'dix)
(defcustom dix-rb-align-column 44
"Column to align bidix <r> elements to with `align'."
:type 'integer
:group 'dix)
(defcustom dix-i-align-column 25
"Column to align <i> elements to with `align'."
:type 'integer
:group 'dix)
(defcustom dix-ep-align-column 2
"Column to align pardef <e> elements to with `align'.
Not yet implemented, only used by `dix-LR-restriction-copy'."
:type 'integer
:group 'dix)
(defcustom dix-pp-align-column 12
"Column to align pardef <p> elements to with `align'."
:type 'integer
:group 'dix)
(defcustom dix-pb-align-column 10
"Column to align bidix <p> (and <re>) elements to with `align'."
:type 'integer
:group 'dix)
(defun dix-add-align-rule (name regexp column)
(add-to-list 'align-rules-list
`(,name
(regexp . ,regexp)
(tab-stop . nil)
(spacing . 0)
(group . 1)
(modes . '(nxml-mode))
(column . ,column))))
(add-hook
'align-load-hook
(lambda ()
(dix-add-align-rule
'dix-rp-align "\\s-+\\(\\s-*\\)<r>" 'dix-rp-align-column)
(dix-add-align-rule ;
'dix-rb-align "\\(\\s-*\\)<r>" 'dix-rb-align-column)
(dix-add-align-rule
'dix-i-align "\\(\\s-*\\)<i" 'dix-i-align-column)
(dix-add-align-rule
'dix-pb-align "^\\S-*\\(\\s-*\\)<\\(p\\|re\\)>" 'dix-pb-align-column)
(dix-add-align-rule
'dix-pp-align "^\\s-+\\S-*\\(\\s-*\\)<p>" 'dix-pp-align-column)))
;;;============================================================================
;;;
;;; Interactive functions
;;;
(defun dix-find-duplicate-pardefs (&optional recompile)
"Find all pardefs with this list of suffixes.
'Suffixes' are contents of <l> elements. If there are several of
them they might be duplicates. Optional prefix argument
RECOMPILE forces a re-check of all pardefs.
Uses internal function `dix-compile-suffix-map' which assumes
that pardefs are named according to the regular Apertium scheme,
eg. \"lik/e__vblex\" (ie. all pardefs of the same group have
\"__\" before the group name, and there are no \"_\" before
\"__\").
Returns the list of pardef names."
(interactive "P")
(let* ((partype
(save-excursion
(condition-case nil
(progn (dix-up-to "pardef" "pardefs"))
(dix-parse-error (dix-goto-pardef)))
(re-search-forward
(concat "pardef[^n>]*n=\"[^\"]*__\\([^\"]*\\)" ) nil 'noerror)
(match-string-no-properties 1)))
(foundmap (cdr (assoc-string partype dix-suffix-maps))))
(let* ((suffmap
(if (or recompile (not foundmap))
(dix-compile-suffix-map partype)
foundmap))
(pardefs (dix-get-pardefs (dix-compile-sorted-suffix-list)
suffmap)))
(when (or recompile (not foundmap))
(setq dix-suffix-maps (dix-assoc-delete-all partype dix-suffix-maps))
(add-to-list 'dix-suffix-maps (cons partype suffmap) 'append))
(message (prin1-to-string pardefs))
pardefs)))
(defvar dix-vr-langs nil "List of language codes (strings) allowed in the vr attribute of this dictionary.")
(defvar dix-vl-langs nil "List of language codes (strings) allowed in the vl attribute of this dictionary.")
(put 'dix-vr-langs 'safe-local-variable 'listp)
(put 'dix-vl-langs 'safe-local-variable 'listp)
(defun dix-get-vr-vl ()
"A cons of attribute key (vr/vl) and value (nno, nob, …); or nil if none such.
Assumes we don't have both vr and vl at the same time.
Assumes we've just done (dix-up-to \"e\" \"pardef\")"
(save-excursion
(when (re-search-forward " v\\([rl]\\)=\"\\([^\"]+\\)\"" (nxml-token-after) 'noerror 1)
(cons (match-string 1)
(match-string 2)))))
(defun dix-v-cycle ()
"Cycle through possible values of the `vr' or `vl' attributes.
Only affects the <e> element at point.
Doesn't yet deal with elements that specify both vr and vl.
For this to be useful, put something like this at the end of your file:
<!--
Local Variables:
dix-vr-langs: (\"nno\" \"nob\")
End:
-->"
(interactive)
(save-excursion
(dix-up-to "e" "pardef")
(let* ((def-dir (if dix-vr-langs "r" "l"))
(langs (list (cons "r" dix-vr-langs)
(cons "l" dix-vr-langs)))
(old (dix-get-vr-vl)) ; find what, if any, restriction we have already
(dir (if old (car old) def-dir))
(old-lang (when old (cdr old)))
(dir-langs (cdr (assoc dir langs)))
(next (car-safe (if old-lang
(cdr (member old-lang dir-langs))
dir-langs)))
(new (if next
(format " v%s=\"%s\"" dir next)
"")))
;; restrict:
(forward-word)
(if old (delete-region (match-beginning 0)
(match-end 0)))
(insert new)
(unless (looking-at ">") (just-one-space))
;; formatting, remove whitespace:
(goto-char (nxml-token-after))
(unless (looking-at "<")
(goto-char (nxml-token-after)))
(delete-horizontal-space)
(cond ((looking-at "<i") (indent-to dix-i-align-column))
((save-excursion (search-forward "</pardef>" nil 'noerror 1))
(indent-to dix-pp-align-column))
((looking-at "<p\\|<re") (indent-to dix-pb-align-column))))))
(defun dix-restriction-cycle (&optional dir)
"Cycle through possible values of the `r' attribute.
Only affects the <e> element at point.
Optional argument DIR is a string, either \"\", \"LR\" or
\"RL\"."
(interactive)
(save-excursion
(dix-up-to "e" "pardef")
(let* ((old ; find what, if any, restriction we have:
(save-excursion
(if (re-search-forward " r=\"\\(..\\)\"" (nxml-token-after) 'noerror 1)
(match-string 1))))
(dir (if dir dir
(if old ; find our new restriction:
(if (equal old "LR")
"RL" ; "LR" => "RL"
"") ; "RL" => ""
"LR"))) ; "" => "LR"
(new (if (equal dir "") ""
(concat " r=\"" dir "\""))))
;; restrict:
(forward-word)
(if old (delete-region (match-beginning 0)
(match-end 0)))
(insert new)
(unless (looking-at ">") (just-one-space))
;; formatting, remove whitespace:
(goto-char (nxml-token-after))
(unless (looking-at "<")
(goto-char (nxml-token-after)))
(delete-horizontal-space)
(cond ((looking-at "<i") (indent-to dix-i-align-column))
((save-excursion (search-forward "</pardef>" nil 'noerror 1))
(indent-to dix-pp-align-column))
((looking-at "<p\\|<re") (indent-to dix-pb-align-column))))))
(defun dix--swap-outer-tag (elt newtag)