diff --git a/.gitignore b/.gitignore
index cfd99cf..8da8b1b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,6 +25,7 @@ docs/themes
 
 demo/demo.lmi
 demo/*.lexicmap.tsv.gz
+demo/seed_distance.tsv
 demo/seed_distance
 demo/seed-pos.tsv.gz
 demo/t.txt
diff --git a/docs/content/usage/utils/seed-pos.md b/docs/content/usage/utils/seed-pos.md
index ddf9456..65db302 100644
--- a/docs/content/usage/utils/seed-pos.md
+++ b/docs/content/usage/utils/seed-pos.md
@@ -75,21 +75,21 @@ Global Flags:
         ref               pos   strand   distance
         ---------------   ---   ------   --------
         GCF_000017205.1   2     +        1
-        GCF_000017205.1   41    +        39
+        GCF_000017205.1   41    -        39
         GCF_000017205.1   45    +        4
-        GCF_000017205.1   74    +        29
-        GCF_000017205.1   85    +        11
-        GCF_000017205.1   119   +        34
-        GCF_000017205.1   130   +        11
+        GCF_000017205.1   74    -        29
+        GCF_000017205.1   85    -        11
+        GCF_000017205.1   119   -        34
+        GCF_000017205.1   130   -        11
         GCF_000017205.1   185   +        55
-        GCF_000017205.1   269   +        84
+        GCF_000017205.1   269   -        84
 
     Or only list records with seed distance longer than a threshold.
 
         $ lexicmap utils seed-pos -d demo.lmi/ -n GCF_000017205.1 -D 1000 | csvtk pretty -t
         ref               pos       strand   distance
         ---------------   -------   ------   --------
-        GCF_000017205.1   5430137   +        1234
+        GCF_000017205.1   5430137   -        1235
 
     Check the biggest seed distances.
 
@@ -100,7 +100,7 @@ Global Flags:
 
         distance   frequency
         --------   ---------
-        1234       1
+        1235       1
         899        2
         898        4
         897        1
@@ -136,7 +136,7 @@ Global Flags:
         ref               pos   strand   distance   pre_pos   len_aaa   seq
         ---------------   ---   ------   --------   -------   -------   ---------------------------------------
         GCF_000017205.1   2     +        1          0         0         T
-        GCF_000017205.1   41    +        39         1         5         TAAAGAGACCGGCGATTCTAGTGAAATCGAACGGGCAGG
+        GCF_000017205.1   41    -        39         1         5         TAAAGAGACCGGCGATTCTAGTGAAATCGAACGGGCAGG
         GCF_000017205.1   45    +        4          40        1         TCAA
 
     Or only list records with seed distance longer than a threshold.
@@ -145,7 +145,7 @@ Global Flags:
             | csvtk pretty -t -W 50 --clip
         ref               pos       strand   distance   pre_pos   len_aaa   seq
         ---------------   -------   ------   --------   -------   -------   --------------------------------------------------
-        GCF_000017205.1   5430137   +        1234       5428902   4         ATCGGCGATCACGTTCAGCAGCGCCTTGGTGATGGTCAGGTTGTTGC.
+        GCF_000017205.1   5430137   -        1235       5428901   4         CATCGGCGATCACGTTCAGCAGCGCCTTGGTGATGGTCAGGTTGTTG...
 
 
 3. Listing seed position of all genomes.
@@ -158,21 +158,21 @@ Global Flags:
         $ csvtk freq -t -f ref -nr seed-pos.tsv.gz | csvtk pretty -t
         ref               frequency
         ---------------   ---------
-        GCF_000017205.1   45708
+        GCF_000017205.1   45711
         GCF_002950215.1   43617
-        GCF_002949675.1   43411
+        GCF_002949675.1   43415
         GCF_001457655.1   42112
-        GCF_006742205.1   42103
+        GCF_006742205.1   42102
         GCF_900638025.1   42008
-        GCF_001027105.1   41856
-        GCF_000742135.1   41414
-        GCF_000392875.1   41392
+        GCF_001027105.1   41855
+        GCF_000742135.1   41415
+        GCF_000392875.1   41391
         GCF_003697165.2   41189
-        GCF_009759685.1   41137
-        GCF_000006945.2   41108
+        GCF_009759685.1   41138
+        GCF_000006945.2   41110
         GCF_000148585.2   41075
-        GCF_001096185.1   40239
-        GCF_001544255.1   40159
+        GCF_001096185.1   40226
+        GCF_001544255.1   40153
 
     Plot the histograms of distances between seeds for all genomes.
 
diff --git a/go.mod b/go.mod
index 718653b..193a5cc 100644
--- a/go.mod
+++ b/go.mod
@@ -15,7 +15,7 @@ require (
 	github.com/shenwei356/bio v0.13.3
 	github.com/shenwei356/go-logging v0.0.0-20171012171522-c6b9702d88ba
 	github.com/shenwei356/kmers v0.1.0
-	github.com/shenwei356/lexichash v0.3.0
+	github.com/shenwei356/lexichash v0.4.0
 	github.com/shenwei356/util v0.5.2
 	github.com/spf13/cobra v1.8.0
 	github.com/twotwotwo/sorts v0.0.0-20160814051341-bf5c1f2b8553
diff --git a/lexicmap/cmd/lib-index-build.go b/lexicmap/cmd/lib-index-build.go
index dfb38e0..9107b5a 100644
--- a/lexicmap/cmd/lib-index-build.go
+++ b/lexicmap/cmd/lib-index-build.go
@@ -37,6 +37,7 @@ import (
 	"github.com/shenwei356/LexicMap/lexicmap/cmd/genome"
 	"github.com/shenwei356/LexicMap/lexicmap/cmd/kv"
 	"github.com/shenwei356/LexicMap/lexicmap/cmd/seedposition"
+	"github.com/shenwei356/LexicMap/lexicmap/cmd/util"
 	"github.com/shenwei356/bio/seqio/fastx"
 	"github.com/shenwei356/lexichash"
 	"github.com/shenwei356/lexichash/iterator"
@@ -600,7 +601,6 @@ func buildAnIndex(lh *lexichash.LexicHash, opt *IndexBuildingOptions,
 
 	// --------------------------------
 	// 1) parsing input genome files & mask & pack sequences
-	k := lh.K
 	nnn := bytes.Repeat([]byte{'N'}, opt.ContigInterval)
 	reRefName := opt.ReRefName
 	extractRefName := reRefName != nil
@@ -620,6 +620,9 @@ func buildAnIndex(lh *lexichash.LexicHash, opt *IndexBuildingOptions,
 			}()
 			startTime := time.Now()
 
+			k := lh.K
+			k8 := uint8(lh.K)
+
 			// --------------------------------
 			// read sequence
 
@@ -821,6 +824,9 @@ func buildAnIndex(lh *lexichash.LexicHash, opt *IndexBuildingOptions,
 			var hash, hashMin uint64
 			var knl *[]uint64
 
+			var lenPrefixSuffix uint8 = 1
+			ttt := uint64((1 << (lenPrefixSuffix << 1)) - 1)
+
 			extraLocs := poolInts.Get().(*[]int) // extra positions
 			*extraLocs = (*extraLocs)[:0]
 
@@ -835,6 +841,12 @@ func buildAnIndex(lh *lexichash.LexicHash, opt *IndexBuildingOptions,
 					continue
 				}
 
+				// there's a really big gap in it, it might be the interver between contigs or a assembly gap
+				if float64(lengthAAs(refseq.Seq[pre:pos]))/float64(d) >= 0.7 {
+					pre = pos
+					continue
+				}
+
 				// range of desert region +- 1000 bp,
 				// as we don't want other kmers with the same prefix exist around.
 				start = int(pre) - 1000 // start position in the sequence
@@ -896,11 +908,6 @@ func buildAnIndex(lh *lexichash.LexicHash, opt *IndexBuildingOptions,
 					*p2ks = append(*p2ks, p2k)
 				}
 
-				if (*counter)[0] > 50 { // it's in an interval region
-					pre = pos
-					continue
-				}
-
 				// fmt.Printf("  list size： %d\n", len(*p2ks))
 
 				// fmt.Printf("  posOfPre: %d, posOfCur: %d\n", posOfPre, posOfCur)
@@ -920,12 +927,13 @@ func buildAnIndex(lh *lexichash.LexicHash, opt *IndexBuildingOptions,
 
 					ok = false
 					for ; _j > _end; _j-- {
-						// fmt.Printf("    test %d\n", _j)
+						// fmt.Printf("    test u %d\n", _j)
 						p2k = (*p2ks)[_j]
 
 						// strand +
 						p, kmer = (*p2k)[0], (*p2k)[1]
-						if kmer != 0 && (*counter)[p] == 1 {
+						if kmer != 0 && (*counter)[p] == 1 &&
+							!util.MustKmerHasPrefix(kmer, 0, k8, lenPrefixSuffix) { // (gap AAAAAAAAAAAA) ACTGACTAG
 							kmerPos = uint64(start+_j) << 1
 							ok = true
 							break
@@ -933,7 +941,8 @@ func buildAnIndex(lh *lexichash.LexicHash, opt *IndexBuildingOptions,
 
 						// strand -
 						p, kmer = (*p2k)[2], (*p2k)[3]
-						if kmer != 0 && (*counter)[p] == 1 {
+						if kmer != 0 && (*counter)[p] == 1 &&
+							!util.MustKmerHasSuffix(kmer, ttt, k8, lenPrefixSuffix) { // (gap AAAAAAAAAAAA) ACTGACTAG
 							kmerPos = uint64(start+_j)<<1 | 1
 							ok = true
 							break
@@ -978,11 +987,12 @@ func buildAnIndex(lh *lexichash.LexicHash, opt *IndexBuildingOptions,
 						_end = posOfCur - 1
 					}
 					for _j = _start; _j < _end; _j++ {
+						// fmt.Printf("    test d %d\n", _j)
 						p2k = (*p2ks)[_j]
 
-						// strand +
 						p, kmer = (*p2k)[0], (*p2k)[1]
-						if kmer != 0 && (*counter)[p] == 1 {
+						if kmer != 0 && (*counter)[p] == 1 &&
+							!util.MustKmerHasPrefix(kmer, 0, k8, lenPrefixSuffix) { // (gap AAAAAAAAAAAA) ACTGACTAG
 							kmerPos = uint64(start+_j) << 1
 							ok = true
 							break
@@ -990,7 +1000,8 @@ func buildAnIndex(lh *lexichash.LexicHash, opt *IndexBuildingOptions,
 
 						// strand -
 						p, kmer = (*p2k)[2], (*p2k)[3]
-						if kmer != 0 && (*counter)[p] == 1 {
+						if kmer != 0 && (*counter)[p] == 1 &&
+							!util.MustKmerHasSuffix(kmer, ttt, k8, lenPrefixSuffix) { // (gap AAAAAAAAAAAA) ACTGACTAG
 							kmerPos = uint64(start+_j)<<1 | 1
 							ok = true
 							break
@@ -1001,10 +1012,12 @@ func buildAnIndex(lh *lexichash.LexicHash, opt *IndexBuildingOptions,
 						maskList = lh.MaskKmer(kmer)
 
 						hashMin = math.MaxUint64
-						for _, _im = range *maskList { // find the mask which capture the most similar k-mer with current one
+						for _, _im = range *maskList {
 							// fmt.Printf("     to %s\n", kmers.MustDecode((*_kmers)[_im], k))
-							hash = kmer ^ (*_kmers)[_im]
-							// hash = kmer ^ lh.Masks[_im] // both are OK
+
+							// both are OK
+							hash = kmer ^ (*_kmers)[_im] // find the mask which capture the most similar k-mer with current one
+							// hash = kmer ^ lh.Masks[_im] // find the mask which will capture current one
 							if hash < hashMin {
 								_imMostSimilar = _im
 							}
@@ -1153,7 +1166,7 @@ func buildAnIndex(lh *lexichash.LexicHash, opt *IndexBuildingOptions,
 	nMasks := opt.Masks
 	chunkSize := (nMasks + chunks - 1) / opt.Chunks
 	var j, begin, end int
-
+	k8 := uint8(lh.K)
 	tokens = make(chan int, 1)   // hope it reduces memory
 	for j = 0; j < chunks; j++ { // each chunk
 		begin = j * chunkSize
@@ -1176,7 +1189,7 @@ func buildAnIndex(lh *lexichash.LexicHash, opt *IndexBuildingOptions,
 			// 	}
 			// }
 
-			_, err := kv.WriteKVData(uint8(k), begin, (*datas)[begin:end], file, opt.Partitions)
+			_, err := kv.WriteKVData(k8, begin, (*datas)[begin:end], file, opt.Partitions)
 			if err != nil {
 				checkError(fmt.Errorf("failed to write seeds data: %s", err))
 			}
diff --git a/lexicmap/cmd/seed-pos.go b/lexicmap/cmd/seed-pos.go
index 4486271..f0896b3 100644
--- a/lexicmap/cmd/seed-pos.go
+++ b/lexicmap/cmd/seed-pos.go
@@ -348,7 +348,7 @@ Extra columns:
 							continue
 						}
 
-						fmt.Fprintf(outfh, "%s\t%d\t%c\t%d", refname, pos+1, lexichash.Strands[pos2str&1], dist)
+						fmt.Fprintf(outfh, "%s\t%d\t%c\t%d", refname, pos+1, lexichash.Strands[pos2str>>1&1], dist)
 
 						if moreColumns {
 							if dist <= 0 {
@@ -411,7 +411,7 @@ Extra columns:
 
 					v = append(v, float64(dist))
 
-					fmt.Fprintf(outfh, "%s\t%d\t%c\t%d", refname, pos+1, lexichash.Strands[pos2str&1], dist)
+					fmt.Fprintf(outfh, "%s\t%d\t%c\t%d", refname, pos+1, lexichash.Strands[pos2str>>1&1], dist)
 
 					if moreColumns {
 						if dist <= 0 {
diff --git a/lexicmap/cmd/util/kmers.go b/lexicmap/cmd/util/kmers.go
index cc28885..426b4cd 100644
--- a/lexicmap/cmd/util/kmers.go
+++ b/lexicmap/cmd/util/kmers.go
@@ -72,6 +72,19 @@ func MustKmerHasPrefix(code uint64, prefix uint64, k1, k2 uint8) bool {
 	return code>>((k1-k2)<<1) == prefix
 }
 
+// KmerHasSuffix checks if a k-mer has a suffix.
+func KmerHasSuffix(code uint64, suffix uint64, k1, k2 uint8) bool {
+	if k1 < k2 {
+		return false
+	}
+	return code&((1<<(k2<<1))-1) == suffix
+}
+
+// MustKmerHasSuffix checks if a k-mer has a suffix, by assuming k1>=k2.
+func MustKmerHasSuffix(code uint64, suffix uint64, k1, k2 uint8) bool {
+	return code&((1<<(k2<<1))-1) == suffix
+}
+
 // SharingPrefixKmersMismatch counts the number of mismatch between two k-mers
 // sharing with a p-bp prefix.
 func SharingPrefixKmersMismatch(code1, code2 uint64, k, p uint8) (n uint8) {