-
Notifications
You must be signed in to change notification settings - Fork 0
/
db.go
130 lines (118 loc) · 3.43 KB
/
db.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
package parser
import (
"context"
"fmt"
"log"
"time"
"go.mongodb.org/mongo-driver/bson"
"go.mongodb.org/mongo-driver/mongo"
"go.mongodb.org/mongo-driver/mongo/options"
)
const DbName = "packagedb"
func updateCollection(client *mongo.Client, DbName string, CollectionName string, data interface{}) *mongo.Collection {
collection := client.Database(DbName).Collection(CollectionName)
for _, pkg := range data.([]Package) {
filter := bson.M{"name": pkg.URL}
update := bson.M{"$set": pkg}
// Create an instance of an options and set the desired options.
opt := options.Update().SetUpsert(true)
result, err := collection.UpdateOne(context.Background(), filter, update, opt)
if err != nil {
log.Printf("repo star update failed: %v\n", err)
} else if result.UpsertedCount > 0 {
log.Printf("creating new entry for: %s", pkg.Name)
} else {
log.Printf("updating star count for: %s", pkg.Name)
}
}
return collection
}
func removeDuplicates() {
client := getClient()
DB := DbName
collections := listCollections(client, DB)
for _, collection := range collections {
err := findDeleteDoc(client, DB, collection)
if err != nil {
return
}
}
}
func listCollections(client *mongo.Client, DB string) []string {
collections, err := client.Database(DB).ListCollectionNames(context.TODO(), bson.D{})
if err != nil {
log.Fatal(err)
}
return collections
}
func findDeleteDoc(client *mongo.Client, DB string, Collection string) error {
//Define filter query for fetching specific document from collection
filter := bson.D{} //bson.D{{}} specifies 'all documents'
//Create a handle to the respective collection in the database.
collection := client.Database(DB).Collection(Collection)
//Perform Find operation & validate against the error.
cur, findError := collection.Find(context.TODO(), filter)
if findError != nil {
return findError
}
defer func(cur *mongo.Cursor, ctx context.Context) {
err := cur.Close(ctx)
if err != nil {
log.Println("error closing cursor", err)
return
}
}(cur, context.TODO())
urls := make(map[string]struct{})
//Map result to slice
for cur.Next(context.TODO()) {
t := Package{}
err := cur.Decode(&t)
if err != nil {
return err
}
if _, ok := urls[t.URL]; ok {
err := deleteOneDoc(client, DB, Collection, t.Name)
if err != nil {
return err
} // TODO
} else {
urls[t.URL] = struct{}{}
}
}
return nil
}
func deleteOneDoc(client *mongo.Client, DB string, Collection string, name string) error {
filter := bson.M{"name": name}
//Create a handle to the respective collection in the database.
collection := client.Database(DB).Collection(Collection)
//Perform deleteOneDoc operation & validate against the error.
_, err := collection.DeleteOne(context.TODO(), filter)
if err != nil {
return err
}
log.Printf("Deleted duplicate document with name: %v\n", name)
return nil
}
func getClient() *mongo.Client {
client, err := mongo.NewClient(options.Client().ApplyURI(Config.MongoURL))
if err != nil {
log.Fatal(err)
}
ctx, _ := context.WithTimeout(context.Background(), 10*time.Second)
err = client.Connect(ctx)
if err != nil {
log.Fatal(err)
}
return client
}
func writePackages(client *mongo.Client, categories Categories) {
for i, category := range categories {
title := category.Title
fmt.Println(i)
if title == "" || category.PackageDetails == nil {
continue
}
updateCollection(client, DbName, title, category.PackageDetails)
}
removeDuplicates()
}