-
Notifications
You must be signed in to change notification settings - Fork 0
/
Document.java
131 lines (98 loc) · 2.87 KB
/
Document.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.text.Normalizer;
import java.util.ArrayList;
import java.util.Collections;
public class Document {
String source;
static ArrayList<Pair> liste = new ArrayList<Pair>();
public Document() {
// liste.add(new Pair("Toto",3));
// liste.add(new Pair("Tata", 2));
Collections.sort(liste);
}
public static String versMin(String s) {
StringBuffer ch = new StringBuffer(s);
for (int i = 0; i < s.length(); i++) {
if (ch.charAt(i) >= 'A' && ch.charAt(i) <= 'Z') {
ch.setCharAt(i, (char) (ch.charAt(i) - 'A' + 'a'));
}
}
return ch.toString();
}
public static void tokenisation(String ch) {
// String ch = "Java, ça dechire";
String[] resultatSplit = ch.split("[^a-zA-Z0-9]+");
int nombreOccurences = 0;
// liste.add(new Pair(resultatSplit));
String valeurOccurence = "";
int valeur = 0;
for (int i = 0; i < resultatSplit.length; i++) {
valeurOccurence = resultatSplit[i];
while (valeurOccurence == resultatSplit[valeur]) {
nombreOccurences++;
valeur++;
}
liste.add(new Pair(resultatSplit[i], nombreOccurences));
}
}
public static String supprimentAccent(String s) {
String chaineSansAccent = Normalizer.normalize(s, Normalizer.Form.NFD)
.replaceAll("[^\\p{ASCII}]", "");
return chaineSansAccent;
}
public static void main(String[] args) {
// TODO Auto-generated method stub
// System.out.println(versMin("Coucou Je SUIS lE"));
// System.out.println(supprimentAccent("Tééést des àccênts"));
String testFilepath = "C:/monRep";
String resFilePath = "C:/monRep/test.csv";
File folder = new File(testFilepath);
File[] listOfFiles = folder.listFiles();
for (File f : listOfFiles) {
if (f.isFile()) {
String parsedText = parse(f.getAbsolutePath());
System.out.println(parsedText);
ecrire(resFilePath, parsedText);
}
}
// ecrire(resFilePath, "tototo");
}
public static String parse(String filePath) {
String result = "";
File file = new File(filePath);
if (file.exists()) {
try {
BufferedReader reader = new BufferedReader(new FileReader(file));
StringBuffer sbuf = new StringBuffer();
String line = reader.readLine();
while (line != null) {
line = supprimentAccent(line);
line = versMin(line);
sbuf.append(line);
line = reader.readLine();
}
reader.close();
result = sbuf.toString();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
return result;
}
public static void ecrire(String filePath, String content) {
try {
BufferedWriter writer = new BufferedWriter(new FileWriter(new File(
filePath)));
writer.write(content);
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}