-
Notifications
You must be signed in to change notification settings - Fork 16
/
process-duplicates.go
495 lines (437 loc) · 12.9 KB
/
process-duplicates.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
package main
import (
"fmt"
"sort"
"strconv"
"sync"
"time"
"gopkg.in/cheggaaa/pb.v1"
"github.com/jonnenauha/obj-simplify/objectfile"
)
// replacerList
type replacerList []*replacer
// flat map of index to ptr that replaces that index
func (rl replacerList) FlattenGeometry() map[int]*objectfile.GeometryValue {
out := make(map[int]*objectfile.GeometryValue)
for _, r := range rl {
for index, _ := range r.replaces {
if out[index] != nil {
fmt.Printf("duplicate warning\n %#v\n %#v\n %t\n\n", out[index], r.ref, out[index].Equals(r.ref, 1e-6))
}
out[index] = r.ref
}
}
return out
}
// replacer
type replacer struct {
ref *objectfile.GeometryValue
replaces map[int]*objectfile.GeometryValue
replacesSlice []*objectfile.GeometryValue
dirty bool
hasItems bool
}
func (r *replacer) Index() int {
return r.ref.Index
}
func (r *replacer) IsEmpty() bool {
return !r.hasItems
}
func (r *replacer) NumReplaces() int {
if r.replaces != nil {
return len(r.replaces)
}
return 0
}
func (r *replacer) Replaces() []*objectfile.GeometryValue {
// optimization to avoid huge map iters
if r.dirty {
r.dirty = false
if r.replaces != nil {
r.replacesSlice = make([]*objectfile.GeometryValue, len(r.replaces), len(r.replaces))
i := 0
for _, ref := range r.replaces {
r.replacesSlice[i] = ref
i++
}
} else {
r.replacesSlice = nil
}
}
return r.replacesSlice
}
func (r *replacer) Remove(index int) {
if r.replaces == nil {
return
}
if _, found := r.replaces[index]; found {
r.dirty = true
delete(r.replaces, index)
r.hasItems = len(r.replaces) > 0
}
}
func (r *replacer) Hit(ref *objectfile.GeometryValue) {
// cannot hit self
if ref.Index == r.ref.Index {
return
}
if r.replaces == nil {
r.replaces = make(map[int]*objectfile.GeometryValue)
}
r.dirty = true
r.hasItems = true
r.replaces[ref.Index] = ref
}
func (r *replacer) Hits(index int) bool {
return r.hasItems && r.replaces[index] != nil
}
// call merge only if r.Hits(other.Index())
// returns if other was completely merged to r.
func (r *replacer) Merge(other *replacer) {
for _, value := range other.Replaces() {
if value.Index == r.ref.Index {
other.Remove(r.ref.Index)
continue
}
if r.hasItems && r.replaces[value.Index] != nil {
// straight up duplicate
other.Remove(value.Index)
} else if r.ref.Equals(value, StartParams.Epsilon) {
// move equals hit to r from other
r.Hit(value)
other.Remove(value.Index)
}
}
// if not completely merged at this point, we must
// reject other.Index() from our hit list.
if other.hasItems {
r.Remove(other.Index())
}
}
// replacerByIndex
type replacerByIndex []*replacer
func (a replacerByIndex) Len() int { return len(a) }
func (a replacerByIndex) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a replacerByIndex) Less(i, j int) bool { return a[i].Index() < a[j].Index() }
// replacerResults
type replacerResults struct {
Type objectfile.Type
Items []*replacer
Spent time.Duration
}
func (rr *replacerResults) Duplicates() (duplicates int) {
for _, r := range rr.Items {
duplicates += r.NumReplaces()
}
return duplicates
}
// Duplicates
type Duplicates struct{}
func (processor Duplicates) Name() string {
return "Duplicates"
}
func (processor Duplicates) Desc() string {
return "Removes duplicate v/vn/vt declarations. Rewrites vertex data references."
}
func (processor Duplicates) Execute(obj *objectfile.OBJ) error {
var (
results = make(map[objectfile.Type]*replacerResults)
mResults = sync.RWMutex{}
wg = &sync.WaitGroup{}
preStats = obj.Geometry.Stats()
epsilon = StartParams.Epsilon
progressEnabled = !StartParams.NoProgress
)
logInfo(" - Using epsilon of %s", strconv.FormatFloat(epsilon, 'g', -1, 64))
// Doing this with channels felt a bit overkill, copying a lot of replacers etc.
setResults := func(result *replacerResults) {
mResults.Lock()
// If there is no progress bars, report results as they come in so user knows something is happening...
if !progressEnabled {
logInfo(" - %-2s %7d duplicates found for %d unique indexes (%s%%) in %s",
result.Type, result.Duplicates(), len(result.Items), computeFloatPerc(float64(result.Duplicates()), float64(preStats.Num(result.Type))), formatDuration(result.Spent))
}
results[result.Type] = result
mResults.Unlock()
}
// find duplicated
{
mResults.Lock()
var (
bars = make(map[objectfile.Type]*pb.ProgressBar)
progressPool *pb.Pool
progressErr error
)
// progress bars
if !StartParams.NoProgress {
for _, t := range []objectfile.Type{objectfile.Vertex, objectfile.Normal, objectfile.UV, objectfile.Param} {
if slice := obj.Geometry.Get(t); len(slice) > 0 {
bar := pb.New(len(slice)).Prefix(fmt.Sprintf(" - %-2s scan ", t.String())).SetMaxWidth(130)
bar.ShowTimeLeft = false
bars[t] = bar
}
}
barsSlice := make([]*pb.ProgressBar, 0)
for _, bar := range bars {
barsSlice = append(barsSlice, bar)
}
if progressPool, progressErr = pb.StartPool(barsSlice...); progressErr != nil {
// progress pools do not work in all shells on windows (eg. liteide)
bars = make(map[objectfile.Type]*pb.ProgressBar)
progressPool = nil
progressEnabled = false
}
}
// start goroutines
for _, t := range []objectfile.Type{objectfile.Vertex, objectfile.Normal, objectfile.UV, objectfile.Param} {
if slice := obj.Geometry.Get(t); len(slice) > 0 {
wg.Add(1)
go findDuplicates(t, slice, epsilon, wg, bars[t], setResults)
}
}
mResults.Unlock()
wg.Wait()
if progressPool != nil {
progressPool.Stop()
}
// report totals now in the same order as it would print without progress bars
if progressEnabled {
mResults.Lock()
for _, t := range []objectfile.Type{objectfile.Vertex, objectfile.Normal, objectfile.UV, objectfile.Param} {
if result := results[t]; result != nil {
logInfo(" - %-2s %7d duplicates found for %d unique indexes (%s%%) in %s",
result.Type, result.Duplicates(), len(result.Items), computeFloatPerc(float64(result.Duplicates()), float64(preStats.Num(result.Type))), formatDuration(result.Spent))
}
}
mResults.Unlock()
}
}
// Rewrite ptr refs to vertex data that is using an about to be removed duplicate.
// Exec in main thread, accessing the vertex data arrays in objects would be
// too much contention with a mutex. This operation is fairly fast, no need for parallel exec.
// Sweeps and marks .Discard to replaced values
for _, t := range []objectfile.Type{objectfile.Vertex, objectfile.Normal, objectfile.UV, objectfile.Param} {
if result := results[t]; result != nil {
replaceDuplicates(result.Type, obj, result.Items)
}
}
// Rewrite geometry
for _, t := range []objectfile.Type{objectfile.Vertex, objectfile.Normal, objectfile.UV, objectfile.Param} {
src := obj.Geometry.Get(t)
if len(src) == 0 {
continue
}
dest := make([]*objectfile.GeometryValue, 0)
for _, gv := range src {
if !gv.Discard {
gv.Index = len(dest) + 1
dest = append(dest, gv)
}
}
if len(dest) != len(src) {
obj.Geometry.Set(t, dest)
}
}
return nil
}
func findDuplicates(t objectfile.Type, slice []*objectfile.GeometryValue, epsilon float64, wgMain *sync.WaitGroup, progress *pb.ProgressBar, callback func(*replacerResults)) {
defer wgMain.Done()
var (
started = time.Now()
results = make(replacerList, 0)
mResults sync.RWMutex
)
appendResults := func(rs []*replacer) {
mResults.Lock()
for _, result := range rs {
if !result.IsEmpty() {
results = append(results, result)
}
}
mResults.Unlock()
}
processSlice := func(substart, subend int, fullslice []*objectfile.GeometryValue, subwg *sync.WaitGroup) {
innerResults := make(replacerList, 0)
var value *objectfile.GeometryValue
for first := substart; first < subend; first++ {
if progress != nil {
progress.Increment()
}
result := &replacer{
ref: fullslice[first],
}
for second, lenFull := first+1, len(fullslice); second < lenFull; second++ {
value = fullslice[second]
if value.Equals(result.ref, epsilon) {
result.Hit(value)
}
}
if !result.IsEmpty() {
innerResults = append(innerResults, result)
}
}
appendResults(innerResults)
subwg.Done()
}
wgInternal := &sync.WaitGroup{}
numPerRoutine := len(slice) / StartParams.Workers
for iter := 0; iter < StartParams.Workers; iter++ {
start := iter * numPerRoutine
end := start + numPerRoutine
if end >= len(slice) || iter == StartParams.Workers-1 {
end = len(slice)
iter = StartParams.Workers
}
wgInternal.Add(1)
go processSlice(start, end, slice, wgInternal)
}
wgInternal.Wait()
mResults.Lock()
defer mResults.Unlock()
if len(results) == 0 {
return
}
sort.Sort(replacerByIndex(results))
if progress != nil {
progress.Prefix(fmt.Sprintf(" - %-2s merge ", t))
progress.Total = int64(len(results))
progress.Set(0)
}
var r1, r2 *replacer
// 1st run: merge
for i1, lenResults := 0, len(results); i1 < lenResults; i1++ {
if progress != nil {
progress.Increment()
}
r1 = results[i1]
if !r1.hasItems {
continue
}
for i2 := i1 + 1; i2 < lenResults; i2++ {
r2 = results[i2]
/*if !r2.hasItems {
continue
}*/
/*if r1.ref.Index == r2.ref.Index {
// same primary index, this is a bug
logFatal("r1.Index() and r2.Index() are the same, something wrong with sub slice processing code\n%#v\n%#v\n\n", r1, r2)
}*/
if r2.hasItems && r1.hasItems && r1.replaces[r2.ref.Index] != nil {
// r1 geom value equals r2.
// only merge r2 hits where value equals r1, otherwise
// we would do transitive merges which is not what we want:
// eg. r1 closer than epsilon to r2, but r1 further than epsilon to r2.hitN
r1.Merge(r2)
// r1 might now be empty if r2 was its only hit,
// and it was not completely merged.
if !r1.hasItems {
break
}
}
}
}
nonemptyMerged := make(replacerList, 0)
for _, r := range results {
if r.hasItems {
nonemptyMerged = append(nonemptyMerged, r)
}
}
if progress != nil {
progress.Prefix(fmt.Sprintf(" - %-2s deduplicate ", t))
progress.Total = int64(len(nonemptyMerged))
progress.Set(0)
}
// 2nd run: deduplicate, must be done after full merge to work correctly.
//
// Deduplicate hits that are in both r1 and r2. This can happen if a value
// is between r1 and r2. Both equal with the in between value but
// not with each other (see above merge).
// In this case the hit is kept in the result that is closest to it.
// if r and other both have a hit index, which is not shared by being
// closer than epsilon tp both, keep it in the parent that it is closest to.
for i1, lenResults := 0, len(nonemptyMerged); i1 < lenResults; i1++ {
if progress != nil {
progress.Increment()
}
r1 = nonemptyMerged[i1]
if !r1.hasItems {
continue
}
for i2 := i1 + 1; i2 < lenResults; i2++ {
r2 = nonemptyMerged[i2]
if r2.hasItems {
deduplicate(r1, r2)
if !r1.hasItems {
break
}
}
}
}
// Gather non empty results
nonemptyFinal := make([]*replacer, 0)
for _, r := range nonemptyMerged {
if r.hasItems {
nonemptyFinal = append(nonemptyFinal, r)
}
}
// send results back
callback(&replacerResults{
Type: t,
Items: nonemptyFinal,
Spent: time.Since(started),
})
}
func deduplicate(r1, r2 *replacer) {
for _, value := range r1.Replaces() {
if !r2.hasItems {
// merged to empty during this iteration
return
} else if r2.replaces[value.Index] == nil {
// no hit for index, avoid function call
continue
}
// keep whichever is closest to value
dist1, dist2 := r1.ref.Distance(value), r2.ref.Distance(value)
if dist1 < dist2 {
r2.Remove(value.Index)
} else {
r1.Remove(value.Index)
}
}
}
func replaceDuplicates(t objectfile.Type, obj *objectfile.OBJ, replacements replacerList) {
rStart := time.Now()
indexToRef := replacements.FlattenGeometry()
replaced := 0
for _, child := range obj.Objects {
for _, vt := range child.VertexData {
// catch newly added types that are not implemented yet here
if vt.Type != objectfile.Face && vt.Type != objectfile.Line && vt.Type != objectfile.Point {
logFatal("Unsupported vertex data type %q for replacing duplicates\n\nPlease submit a bug report. If you can, provide this file as an attachement.\n> %s\n", vt.Type, ApplicationURL+"/issues")
}
for _, decl := range vt.Declarations {
switch t {
case objectfile.Vertex:
if ref := indexToRef[decl.Vertex]; ref != nil {
replaced++
decl.RefVertex.Discard = true
decl.RefVertex = ref
}
case objectfile.UV:
if ref := indexToRef[decl.UV]; ref != nil {
replaced++
decl.RefUV.Discard = true
decl.RefUV = ref
}
case objectfile.Normal:
if ref := indexToRef[decl.Normal]; ref != nil {
replaced++
decl.RefNormal.Discard = true
decl.RefNormal = ref
}
}
}
}
}
logInfo(" - %-2s %7d refs replaced in %s", t, replaced, formatDurationSince(rStart))
}