Skip to content

Commit fce6a89

Browse files
committed
feat(builtins/method): bytes/bytearray: join
1 parent 13d3d78 commit fce6a89

File tree

3 files changed

+178
-3
lines changed

3 files changed

+178
-3
lines changed

Objects/byteobjects.nim

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import ./pyobject
55
from ./abstract/iter import PyObject_GetIter
66
import ./[listobject, tupleobjectImpl, stringobject, exceptions, iterobject]
77
import ./numobjects/intobject/[decl, ops_imp_warn]
8+
import ../Utils/addr0
89
#XXX: Nim's string ops has bugs for NUL('\0') char, e.g. len('1\02') gives 2
910
declarePyType Bytes(tpToken):
1011
items: seq[char]
@@ -54,15 +55,30 @@ proc contains*(s: PyByteLike, c: char): bool = c in s.items
5455
proc `[]`*(s: PyByteLike, i: int): char = s.items[i]
5556
proc getInt*(s: PyByteLike, i: int): PyIntObject = newPyInt s[i]
5657

57-
template impl(B, InitT, newTOfCap){.dirty.} =
58+
when defined(js):
59+
type CharsView* = seq[char]
60+
else:
61+
type CharsView* = cstring ## impl is unstable. It's UB if setitem to PyBytes's CharsView
62+
## and in JS backend, currently it's just a copy, not a real view
63+
proc getCharPtr*(s: PyByteLike; i: int): ptr char = addr s.items[i] ## unstable.
64+
## not available on JS
5865

66+
template impl(B, InitT, newTOfCap){.dirty.} =
5967
proc asString*(s: `Py B Object`): string = $s.items
68+
proc charsView*(s: `Py B Object`): CharsView =
69+
when defined(js): s.items
70+
else:
71+
return cast[cstring](s.items.addr0)
6072
method `$`*(s: `Py B Object`): string = s.asString
61-
proc `newPy B`*(s: InitT = default InitT): `Py B Object` =
73+
proc `newPy B`*(s: InitT): `Py B Object` =
6274
result = `newPy B Simple`()
6375
result.items = s
6476
proc `newPy B`*(size: int): `Py B Object` =
6577
`newPy B` newTOfCap size
78+
79+
let `empty B` = `newPy B` @[]
80+
proc `newPy B`*(): `Py B Object` = `empty B`
81+
6682
proc `&`*(s1, s2: `Py B Object`): `Py B Object` =
6783
`newPy B`(s1.items & s2.items)
6884

Objects/byteobjectsImpl.nim

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11

22
import std/strformat
3-
import ../Utils/[sequtils, addr0]
3+
import ../Utils/[sequtils, destroyPatch, addr0]
44
import ./byteobjects
55
import ./pyobject
66
import ./[boolobject, numobjects, stringobjectImpl, exceptions, noneobject,
77
iterobject, hash, abstract,
88
]
99
import ./tupleobjectImpl
10+
import ./stringlib/join
1011
from ./listobject import genMutableSequenceMethods
1112

1213
export byteobjects
@@ -114,6 +115,27 @@ genMutableSequenceMethods PyNumber_AsCharOrRet, newPyInt, ByteArray, char:
114115
if self.len == high int:
115116
return newOverflowError newPyAscii"cannot add more objects to bytearray"
116117

118+
#TODO:buffer
119+
# workaround:
120+
type Py_buffer = object
121+
buf: CharsView
122+
len: int
123+
obj: PyObject
124+
defdestroy Py_buffer: discard
125+
#proc PyBuffer_Release(b: Py_buffer) = discard
126+
127+
proc init_Py_buffer(buf: CharsView, len: int, obj: PyObject, ): Py_buffer = Py_buffer(buf: buf, len: len, obj: obj)
128+
129+
proc to_py_buffer(b: PyBytesObject|PyByteArrayObject): CharsView = b.charsView
130+
131+
template genJoin(B; mut: bool){.dirty.} =
132+
proc join*(b: `Py B Object`, iterable: PyObject): PyObject{.pyCFuncPragma.} =
133+
bytes_join B, b, iterable, mutable=mut
134+
`impl B Method` join(iterable): self.join iterable
135+
136+
genJoin bytes, false
137+
genJoin bytearray, true
138+
117139
template impl(x, fromSize, fromObject) =
118140
if x.ofPyStrObject:
119141
return newTypeError newPyAscii"string argument without an encoding"

Objects/stringlib/join.nim

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
2+
import std/strformat
3+
import ../[
4+
pyobjectBase,
5+
stringobject,
6+
exceptions,
7+
]
8+
import ../abstract/sequence/list
9+
import ../../Utils/rtarrays
10+
11+
template bytes_join*(S; sep; iterable: PyObject; mutable: bool)#[: PyObject]#{.dirty.} =
12+
bind RtArray, initRtArray
13+
bind PySequence_Fast, PySequence_Fast_GET_SIZE, PySequence_Fast_GET_ITEM
14+
bind newPyStr, newPyAscii, newRuntimeError, newTypeError, newOverflowError
15+
bind formatValue, fmt
16+
let
17+
sepstr = sep.charsView
18+
seplen = len(sep)
19+
20+
let sequ = PySequence_Fast(iterable, "can only join an iterable")
21+
retIfExc sequ
22+
23+
let seqlen = PySequence_Fast_GET_SIZE(sequ)
24+
if seqlen == 0:
25+
return `newPy S`()
26+
27+
var item: PyObject
28+
when not mutable:
29+
if seqlen == 1:
30+
item = PySequence_Fast_GET_ITEM(sequ, 0)
31+
if item.`ofExactPy S Object`:
32+
return item
33+
34+
const GIL_THRESHOLD = 1048576
35+
36+
#XXX: NIM-BUG: when JS using RtArray: `Error: internal error: ("genAddr: 2", skTemp)`
37+
# due to `[]=` or `[]` to RtArray
38+
var buffers = (when defined(js): newSeq else: initRTArray)[Py_buffer](seqlen)
39+
40+
41+
#[ Here is the general case. Do a pre-pass to figure out the total
42+
amount of space we'll need (sz), and see whether all arguments are
43+
bytes-like.
44+
]#
45+
var sz = 0
46+
var nbufs = 0
47+
var drop_gil = true
48+
for i in 0 ..< seqlen:
49+
item = PySequence_Fast_GET_ITEM(sequ, i)
50+
proc asgn(b: auto) =
51+
buffers[i] = init_Py_buffer(to_py_buffer(b), b.len, item)
52+
if item.ofExactPyBytesObject:
53+
# Fast path.
54+
let b = PyBytesObject(item)
55+
asgn b
56+
elif item.ofExactPyByteArrayObject:
57+
let b = PyByteArrayObject(item)
58+
asgn b
59+
else:
60+
template byteslikeExpect =
61+
return newTypeError newPyStr(
62+
fmt"sequence item {i}: expected a bytes-like object, {item.typeName:.80s} found"
63+
)
64+
when defined(npython_buffer):
65+
#TODO:buffer
66+
let exc: PyBaseErrorObject = PyObject_GetBuffer(item, buffers[i], PyBUF.SIMPLE)
67+
if not exc.isNil:
68+
byteslikeExpect
69+
#[ If the backing objects are mutable, then dropping the GIL
70+
opens up race conditions where another thread tries to modify
71+
the object which we hold a buffer on it. Such code has data
72+
races anyway, but this is a conservative approach that avoids
73+
changing the behaviour of that data race.
74+
]#
75+
drop_gil = false
76+
else:
77+
byteslikeExpect
78+
79+
nbufs = i + 1 # for error cleanup
80+
let itemlen = buffers[i].len
81+
template resTooLong =
82+
return newOverflowError newPyAscii"join() result is too long"
83+
template `+?=`(s: var int; i: int) =
84+
if i > int.high - s: resTooLong
85+
s += i
86+
sz +?= itemlen
87+
if i != 0:
88+
sz +?= seplen
89+
if seqlen != PySequence_Fast_GET_SIZE(sequ):
90+
return newRuntimeError newPyAscii"sequence changed size during iteration"
91+
92+
# Allocate result space.
93+
var res = `newPy S`(sz)
94+
95+
# Catenate everything.
96+
var p = 0
97+
when declared(copyMem):
98+
template memcpy(_, b; n: int) = copyMem(res.getCharPtr p, b[0].addr, n)
99+
else:
100+
template memcpy(_, b; n: int) =
101+
for i in 0..<n:#p ..< p+n:
102+
res.items[p+i] = b[i]
103+
template addbn(b; n: int) =
104+
memcpy(res[p], b, n)
105+
p += n
106+
template addb(bExpr: Py_buffer) =
107+
let b = bExpr
108+
addbn(b.buf, b.len)
109+
if sz < GIL_THRESHOLD:
110+
drop_gil = false # Benefits are likely outweighed by the overheads
111+
112+
#TODO:threads
113+
const hasPyThrd = defined(npython_threads)
114+
when hasPyThrd:
115+
var save: PyThreadState
116+
if drop_gil: save = PyEval_SaveThread()
117+
118+
if seplen == 0:
119+
# fast path
120+
for i in 0..<nbufs:
121+
addb buffers[i]
122+
else:
123+
if nbufs > 0:
124+
addb(buffers[0])
125+
addbn(sepstr, seplen)
126+
for i in 1 ..< nbufs:
127+
addb(buffers[i])
128+
129+
when hasPyThrd:
130+
if drop_gil: PyEval_RestoreThread(save)
131+
132+
# RtArray's `=destroy` will call buffer's destroy
133+
#for b in buffers: PyBuffer_Release(b)
134+
#if use_non_static: PyMem_Free(buffers)
135+
return res
136+
137+

0 commit comments

Comments
 (0)