Skip to content

Commit 33c9bc8

Browse files
authored
flate: Add amd64 assembly matchlen (#837)
* flate: Add assembly matchlen A few percent faster. ``` before: github-june-2days-2019.json gzkp 1 6273951764 1073607045 17271 346.43 github-june-2days-2019.json gzkp 2 6273951764 1045461954 20016 298.92 github-june-2days-2019.json gzkp 3 6273951764 1030139729 21372 279.95 github-june-2days-2019.json gzkp 4 6273951764 992526317 26354 227.03 github-june-2days-2019.json gzkp 5 6273951764 938015731 28919 206.89 github-june-2days-2019.json gzkp 6 6273951764 918717756 32473 184.25 github-june-2days-2019.json gzkp 7 6273951764 924473679 41597 143.84 github-june-2days-2019.json gzkp 8 6273951764 905294390 52419 114.14 github-june-2days-2019.json gzkp 9 6273951764 895561157 103132 58.02 after: github-june-2days-2019.json gzkp 1 6273951764 1073607045 16978 352.40 github-june-2days-2019.json gzkp 2 6273951764 1045461954 19362 309.01 github-june-2days-2019.json gzkp 3 6273951764 1030139729 20882 286.53 github-june-2days-2019.json gzkp 4 6273951764 992526317 25009 239.24 github-june-2days-2019.json gzkp 5 6273951764 938015731 28934 206.79 github-june-2days-2019.json gzkp 6 6273951764 918717756 32698 182.98 github-june-2days-2019.json gzkp 7 6273951764 924473679 42734 140.01 github-june-2days-2019.json gzkp 8 6273951764 905294390 53639 111.55 github-june-2days-2019.json gzkp 9 6273951764 895561157 97701 61.24 ``` * Add noasm fuzz test to deflate
1 parent f71e4d3 commit 33c9bc8

File tree

5 files changed

+120
-23
lines changed

5 files changed

+120
-23
lines changed

.github/workflows/go.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,9 @@ jobs:
198198
- name: flate/FuzzEncoding
199199
run: go test -run=none -fuzz=FuzzEncoding -fuzztime=100000x -test.fuzzminimizetime=10ms ./flate/.
200200

201+
- name: flate/FuzzEncoding/noasm
202+
run: go test -run=none -tags=noasm -fuzz=FuzzEncoding -fuzztime=100000x -test.fuzzminimizetime=10ms ./flate/.
203+
201204
- name: zip/FuzzReader
202205
run: go test -run=none -fuzz=FuzzReader -fuzztime=500000x -test.fuzzminimizetime=10ms ./zip/.
203206

flate/fast_encoder.go

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ package flate
88
import (
99
"encoding/binary"
1010
"fmt"
11-
"math/bits"
1211
)
1312

1413
type fastEnc interface {
@@ -192,25 +191,3 @@ func (e *fastGen) Reset() {
192191
}
193192
e.hist = e.hist[:0]
194193
}
195-
196-
// matchLen returns the maximum length.
197-
// 'a' must be the shortest of the two.
198-
func matchLen(a, b []byte) int {
199-
var checked int
200-
201-
for len(a) >= 8 {
202-
if diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b); diff != 0 {
203-
return checked + (bits.TrailingZeros64(diff) >> 3)
204-
}
205-
checked += 8
206-
a = a[8:]
207-
b = b[8:]
208-
}
209-
b = b[:len(a)]
210-
for i := range a {
211-
if a[i] != b[i] {
212-
return i + checked
213-
}
214-
}
215-
return len(a) + checked
216-
}

flate/matchlen_amd64.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
//go:build amd64 && !appengine && !noasm && gc
2+
// +build amd64,!appengine,!noasm,gc
3+
4+
// Copyright 2019+ Klaus Post. All rights reserved.
5+
// License information can be found in the LICENSE file.
6+
7+
package flate
8+
9+
// matchLen returns how many bytes match in a and b
10+
//
11+
// It assumes that:
12+
//
13+
// len(a) <= len(b) and len(a) > 0
14+
//
15+
//go:noescape
16+
func matchLen(a []byte, b []byte) int

flate/matchlen_amd64.s

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
// Copied from S2 implementation.
2+
3+
//go:build !appengine && !noasm && gc && !noasm
4+
5+
#include "textflag.h"
6+
7+
// func matchLen(a []byte, b []byte) int
8+
// Requires: BMI
9+
TEXT ·matchLen(SB), NOSPLIT, $0-56
10+
MOVQ a_base+0(FP), AX
11+
MOVQ b_base+24(FP), CX
12+
MOVQ a_len+8(FP), DX
13+
14+
// matchLen
15+
XORL SI, SI
16+
CMPL DX, $0x08
17+
JB matchlen_match4_standalone
18+
19+
matchlen_loopback_standalone:
20+
MOVQ (AX)(SI*1), BX
21+
XORQ (CX)(SI*1), BX
22+
TESTQ BX, BX
23+
JZ matchlen_loop_standalone
24+
25+
#ifdef GOAMD64_v3
26+
TZCNTQ BX, BX
27+
#else
28+
BSFQ BX, BX
29+
#endif
30+
SARQ $0x03, BX
31+
LEAL (SI)(BX*1), SI
32+
JMP gen_match_len_end
33+
34+
matchlen_loop_standalone:
35+
LEAL -8(DX), DX
36+
LEAL 8(SI), SI
37+
CMPL DX, $0x08
38+
JAE matchlen_loopback_standalone
39+
40+
matchlen_match4_standalone:
41+
CMPL DX, $0x04
42+
JB matchlen_match2_standalone
43+
MOVL (AX)(SI*1), BX
44+
CMPL (CX)(SI*1), BX
45+
JNE matchlen_match2_standalone
46+
LEAL -4(DX), DX
47+
LEAL 4(SI), SI
48+
49+
matchlen_match2_standalone:
50+
CMPL DX, $0x02
51+
JB matchlen_match1_standalone
52+
MOVW (AX)(SI*1), BX
53+
CMPW (CX)(SI*1), BX
54+
JNE matchlen_match1_standalone
55+
LEAL -2(DX), DX
56+
LEAL 2(SI), SI
57+
58+
matchlen_match1_standalone:
59+
CMPL DX, $0x01
60+
JB gen_match_len_end
61+
MOVB (AX)(SI*1), BL
62+
CMPB (CX)(SI*1), BL
63+
JNE gen_match_len_end
64+
INCL SI
65+
66+
gen_match_len_end:
67+
MOVQ SI, ret+48(FP)
68+
RET

flate/matchlen_generic.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
//go:build !amd64 || appengine || !gc || noasm
2+
// +build !amd64 appengine !gc noasm
3+
4+
// Copyright 2019+ Klaus Post. All rights reserved.
5+
// License information can be found in the LICENSE file.
6+
7+
package flate
8+
9+
import (
10+
"encoding/binary"
11+
"math/bits"
12+
)
13+
14+
// matchLen returns the maximum common prefix length of a and b.
15+
// a must be the shortest of the two.
16+
func matchLen(a, b []byte) (n int) {
17+
for ; len(a) >= 8 && len(b) >= 8; a, b = a[8:], b[8:] {
18+
diff := binary.LittleEndian.Uint64(a) ^ binary.LittleEndian.Uint64(b)
19+
if diff != 0 {
20+
return n + bits.TrailingZeros64(diff)>>3
21+
}
22+
n += 8
23+
}
24+
25+
for i := range a {
26+
if a[i] != b[i] {
27+
break
28+
}
29+
n++
30+
}
31+
return n
32+
33+
}

0 commit comments

Comments
 (0)