-
Notifications
You must be signed in to change notification settings - Fork 91
/
Rakefile
150 lines (134 loc) · 3.61 KB
/
Rakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
require 'bundler'
begin
Bundler.setup
rescue Bundler::BundlerError => e
$stderr.puts e.message
$stderr.puts "Run `bundle install` to install missing gems"
exit e.status_code
end
require 'rake'
require 'rake/clean'
$:.unshift(File.join(File.dirname(__FILE__), './lib'))
require 'anystyle/version'
task :default
task :build => [:clean] do
system 'gem build anystyle-parser.gemspec'
end
task :release => [:build] do
system "git tag #{AnyStyle::VERSION}"
system "gem push anystyle-parser-#{AnyStyle::VERSION}.gem"
end
task :check_warnings do
$VERBOSE = true
require 'anystyle/parser'
puts AnyStyle::VERSION
end
require 'rspec/core'
require 'rspec/core/rake_task'
RSpec::Core::RakeTask.new(:spec) do |spec|
spec.pattern = FileList['spec/**/*_spec.rb']
end
task :default => :spec
desc 'Run an IRB session with AnyStyle loaded'
task :console do |t|
ARGV.clear
require 'irb'
require 'anystyle'
IRB.start
end
desc 'Update model using latest source and training data'
task :train, :model, :threads do |t, args|
model = args[:model] || 'parser'
threads = args[:threads] || 4
require 'anystyle'
Wapiti.debug!
case model
when 'finder'
AnyStyle::Finder.defaults[:threads] = threads
AnyStyle.finder.train
AnyStyle.finder.model.save
else
AnyStyle::Parser.defaults[:threads] = threads
AnyStyle.parser.train
AnyStyle.parser.model.save
end
end
desc 'Check all tagged datasets'
task :check, :model do |t, args|
model = args[:model] || 'parser'
require 'anystyle'
case model
when 'finder'
Dir['./res/finder/*.ttx'].sort.each do |ttx|
print 'Checking %.25s' % "#{File.basename(ttx)}....................."
start = Time.now
stats = AnyStyle.finder.check ttx
report stats, Time.now - start
end
else
Dir['./res/parser/*.xml'].sort.each do |xml|
print 'Checking %.25s' % "#{File.basename(xml)}....................."
start = Time.now
stats = AnyStyle.parser.check xml
report stats, Time.now - start
end
end
end
desc "Save delta of a tagged dataset with itself"
task :delta, :input do |t, args|
require 'anystyle'
input = args[:input]
if File.directory?(input)
files = Dir.entries(input)
.reject { |f| f.start_with?('.') }
.map { |f| File.join(input, f) }
else
files = [input]
end
files.each do |file|
extn = File.extname(file)
print 'Checking %.25s' % "#{File.basename(file)}....................."
case extn
when '.ttx'
input = Wapiti::Dataset.new([AnyStyle::Document.open(file)])
output = AnyStyle.finder.label input
format = 'txt'
else
input = Wapiti::Dataset.open(file)
output = AnyStyle.parser.label input
format = 'xml'
end
delta = output - input
if delta.length == 0
puts ' ✓'
else
name = File.basename(file, extn)
delta.save "delta_#{name}#{extn}", indent: 2, tagged: true, format: format
puts "delta saved to delta_#{name}#{extn} (#{delta.length})"
end
end
end
desc "Find references in document"
task :find, :input do |t, args|
require 'anystyle'
file = args[:input]
refs = AnyStyle.finder.find(file, format: :references)[0]
break unless refs.length > 0
output = AnyStyle.parser.label refs.join("\n")
puts output.to_xml(indent: 2)
end
def report(stats, time)
if stats[:token][:errors] == 0
puts ' ✓ %2ds' % time
else
puts '%4d seq %6.2f%% %6d tok %5.2f%% %2ds' % [
stats[:sequence][:errors],
stats[:sequence][:rate],
stats[:token][:errors],
stats[:token][:rate],
time
]
end
end
CLEAN.include('*.gem')
CLEAN.include('*.rbc')