From f6c558a4d21ebc3411d85794eb5c38d0bb20ecb6 Mon Sep 17 00:00:00 2001
From: ynqa <un.pensiero.vano@gmail.com>
Date: Tue, 8 Dec 2020 02:14:50 +0900
Subject: [PATCH] uPdate

---
 README.md | 81 ++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 51 insertions(+), 30 deletions(-)

diff --git a/README.md b/README.md
index 3d11192..ff3c8b5 100644
--- a/README.md
+++ b/README.md
@@ -34,6 +34,8 @@ Inspired by [Data Science in Go](https://speakerdeck.com/chewxy/data-science-in-
 
 ## Installation
 
+Use `go` command to get this pkg.
+
 ```
 $ go get -u github.com/ynqa/wego
 $ bin/wego -h
@@ -59,45 +61,64 @@ Available Commands:
   word2vec    Word2Vec: Continuous Bag-of-Words and Skip-gram model
 ```
 
-### Go SDK
+`word2vec`, `glove` and `lexvec` executes the workflow to generate word vectors:
+1. Build a dictionary for vocabularies and count word frequencies by scanning a given corpus.
+2. Start training. The execution time depends on the size of the corpus, the hyperparameters (flags), and so on.
+3. Save the words and their vectors as a text file.
 
-```go
-package main
+`query` and `console` are the commands which are related to nearest neighbor searching for the trained word vectors.
+
+`query` outputs similar words against a given word using sing word vectors which are generated by the above models.
+
+e.g. `wego query -i word_vector.txt microsoft`:
+```
+  RANK |   WORD    | SIMILARITY
+-------+-----------+-------------
+     1 | hypercard |   0.791492
+     2 | xp        |   0.768939
+     3 | software  |   0.763369
+     4 | freebsd   |   0.761084
+     5 | unix      |   0.749563
+     6 | linux     |   0.747327
+     7 | ibm       |   0.742115
+     8 | windows   |   0.731136
+     9 | desktop   |   0.715790
+    10 | linspire  |   0.711171
+```
+
+*wego* does not reproduce word vectors between each trial because it adopts HogWild! algorithm which updates the parameters (in this case word vector) async.
 
-import (
-	"os"
+`console` is for REPL mode to calculate the basic arithmetic operations (`+` and `-`) for word vectors.
 
-	"github.com/ynqa/wego/pkg/model/modelutil/vector"
-	"github.com/ynqa/wego/pkg/model/word2vec"
+### Go SDK
+
+It can define the hyper parameters for models by functional options.
+
+```go
+model, err := word2vec.New(
+	word2vec.Window(5),
+	word2vec.Model(word2vec.Cbow),
+	word2vec.Optimizer(word2vec.NegativeSampling),
+	word2vec.NegativeSampleSize(5),
+	word2vec.Verbose(),
 )
+```
+
+The models have some methods:
 
-func main() {
-	model, err := word2vec.New(
-		word2vec.Window(5),
-		word2vec.Model(word2vec.Cbow),
-		word2vec.Optimizer(word2vec.NegativeSampling),
-		word2vec.NegativeSampleSize(5),
-		word2vec.Verbose(),
-	)
-	if err != nil {
-		// failed to create word2vec.
-	}
-
-	input, _ := os.Open("text8")
-	if err = model.Train(input); err != nil {
-		// failed to train.
-	}
-
-	// write word vector.
-	model.Save(os.Stdin, vector.Agg)
+```go
+type Model interface {
+	Train(io.ReadSeeker) error
+	Save(io.Writer, vector.Type) error
+	WordVector(vector.Type) *matrix.Matrix
 }
 ```
 
-## Formats
+### Formats
 
-As training word vectors *wego* requires file format for inputs/outputs.
+As training word vectors wego requires the following file formats for inputs/outputs.
 
-### Input
+#### Input
 
 Input corpus must be subject to the formats to be divided by space between words like [text8](http://mattmahoney.net/dc/textdata.html).
 
@@ -105,7 +126,7 @@ Input corpus must be subject to the formats to be divided by space between words
 word1 word2 word3 ...
 ```
 
-###  Output
+#### Output
 
 After training *wego* save the word vectors into a txt file with the following format (`N` is the dimension for word vectors you given):