Skip to content

Commit 709f226

Browse files
committed
fix: Reconfigure the encoding of standard input according to the --encoding option, closes #1038
1 parent 12be2ff commit 709f226

File tree

13 files changed

+51
-49
lines changed

13 files changed

+51
-49
lines changed

CHANGELOG.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ Unreleased
1010
* :doc:`/scripts/csvstat` adds a :code:`--non-nulls` option to only output counts of non-null values.
1111
* :doc:`/scripts/csvstat` adds a :code:`--max-precision` option to only output the most decimal places.
1212
* feat: Add a :code:`--null-value` option to commands with the :code:`--blanks` option, to convert additional values to NULL.
13+
* fix: Reconfigure the encoding of standard input according to the :code:`--encoding` option, which defaults to ``utf-8-sig``. Affected users no longer need to set the ``PYTHONIOENCODING`` environment variable.
1314
* fix: Prompt the user if additional input is expected (i.e. if no input file or piped data is provided) in :doc:`/scripts/csvjoin`, :doc:`/scripts/csvsql` and :doc:`/scripts/csvstack`.
1415
* fix: No longer errors if a NUL byte occurs in an input file.
1516
* Add Python 3.12 support.

csvkit/cli.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,7 @@ def _open_input_file(self, path):
238238
Open the input file specified on the command line.
239239
"""
240240
if not path or path == '-':
241+
sys.stdin.reconfigure(encoding=self.args.encoding)
241242
f = sys.stdin
242243
else:
243244
extension = splitext(path)[1]

tests/test_convert/test_fixed.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from io import StringIO
1+
import io
22

33
from csvkit.convert import fixed
44
from csvkit.utilities.in2csv import In2CSV
@@ -23,7 +23,7 @@ def test_fixed_skip_lines(self):
2323
self.assertEqual(f.read(), output)
2424

2525
def test_fixed_no_inference(self):
26-
input_file = StringIO(' 1 2 3')
26+
input_file = io.BytesIO(b' 1 2 3')
2727

2828
with stdin_as_string(input_file):
2929
self.assertLines(['--no-inference', '-f', 'fixed', '--schema',
@@ -36,7 +36,7 @@ def test_fixed_no_inference(self):
3636

3737
def test_fixed_streaming(self):
3838
with open('examples/testfixed') as f, open('examples/testfixed_schema.csv') as schema:
39-
output_file = StringIO()
39+
output_file = io.StringIO()
4040
fixed.fixed2csv(f, schema, output=output_file)
4141
output = output_file.getvalue()
4242
output_file.close()
@@ -91,7 +91,7 @@ def test_schematic_line_parser(self):
9191
bar,6,2
9292
baz,8,5"""
9393

94-
f = StringIO(schema)
94+
f = io.StringIO(schema)
9595
parser = fixed.FixedWidthRowParser(f)
9696
f.close()
9797

tests/test_utilities/test_csvclean.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1+
import io
12
import os
23
import sys
3-
from io import StringIO
44
from unittest.mock import patch
55

66
from csvkit.utilities.csvclean import CSVClean, launch_new_instance
@@ -17,7 +17,7 @@ def tearDown(self):
1717

1818
def assertCleaned(self, basename, output_lines, error_lines, additional_args=[]):
1919
args = [f'examples/{basename}.csv'] + additional_args
20-
output_file = StringIO()
20+
output_file = io.StringIO()
2121

2222
utility = CSVClean(args, output_file)
2323
utility.run()

tests/test_utilities/test_csvformat.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1+
import io
12
import sys
2-
from io import StringIO
33
from unittest.mock import patch
44

55
from csvkit.utilities.csvformat import CSVFormat, launch_new_instance
@@ -54,7 +54,7 @@ def test_tab_delimiter(self):
5454
])
5555

5656
def test_quotechar(self):
57-
input_file = StringIO('a,b,c\n1*2,3,4\n')
57+
input_file = io.BytesIO(b'a,b,c\n1*2,3,4\n')
5858

5959
with stdin_as_string(input_file):
6060
self.assertLines(['-Q', '*'], [
@@ -65,7 +65,7 @@ def test_quotechar(self):
6565
input_file.close()
6666

6767
def test_doublequote(self):
68-
input_file = StringIO('a\n"a ""quoted"" string"')
68+
input_file = io.BytesIO(b'a\n"a ""quoted"" string"')
6969

7070
with stdin_as_string(input_file):
7171
self.assertLines(['-P', '#', '-B'], [
@@ -76,7 +76,7 @@ def test_doublequote(self):
7676
input_file.close()
7777

7878
def test_escapechar(self):
79-
input_file = StringIO('a,b,c\n1"2,3,4\n')
79+
input_file = io.BytesIO(b'a,b,c\n1"2,3,4\n')
8080

8181
with stdin_as_string(input_file):
8282
self.assertLines(['-P', '#', '-U', '3'], [

tests/test_utilities/test_csvjson.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1+
import io
12
import json
23
import sys
3-
from io import StringIO
44
from unittest.mock import patch
55

66
from csvkit.utilities.csvjson import CSVJSON, launch_new_instance
@@ -58,7 +58,7 @@ def test_keying(self):
5858
self.assertDictEqual(js, {'True': {'a': True, 'c': 3.0, 'b': 2.0}})
5959

6060
def test_duplicate_keys(self):
61-
output_file = StringIO()
61+
output_file = io.StringIO()
6262
utility = CSVJSON(['-k', 'a', 'examples/dummy3.csv'], output_file)
6363
self.assertRaisesRegex(ValueError,
6464
'Value True is not unique in the key column.',

tests/test_utilities/test_csvlook.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1+
import io
12
import sys
2-
from io import StringIO
33
from unittest.mock import patch
44

55
from csvkit.utilities.csvlook import CSVLook, launch_new_instance
@@ -127,7 +127,7 @@ def test_max_column_width(self):
127127
])
128128

129129
def test_stdin(self):
130-
input_file = StringIO('a,b,c\n1,2,3\n4,5,6\n')
130+
input_file = io.BytesIO(b'a,b,c\n1,2,3\n4,5,6\n')
131131

132132
with stdin_as_string(input_file):
133133
self.assertLines([], [

tests/test_utilities/test_csvsort.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1+
import io
12
import sys
2-
from io import StringIO
33
from unittest.mock import patch
44

55
from csvkit.utilities.csvsort import CSVSort, launch_new_instance
@@ -78,7 +78,7 @@ def test_sort_t_and_nulls(self):
7878
self.assertEqual(test_order, new_order)
7979

8080
def test_stdin(self):
81-
input_file = StringIO('a,b,c\n4,5,6\n1,2,3\n')
81+
input_file = io.BytesIO(b'a,b,c\n4,5,6\n1,2,3\n')
8282

8383
with stdin_as_string(input_file):
8484
self.assertLines([], [

tests/test_utilities/test_csvsql.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1+
import io
12
import os
23
import sys
3-
from io import StringIO
44
from textwrap import dedent
55
from unittest.mock import patch
66

@@ -108,7 +108,7 @@ def test_linenumbers(self):
108108
''')) # noqa: W291
109109

110110
def test_stdin(self):
111-
input_file = StringIO('a,b,c\n4,2,3\n')
111+
input_file = io.BytesIO(b'a,b,c\n4,2,3\n')
112112

113113
with stdin_as_string(input_file):
114114
sql = self.get_output(['--tables', 'foo'])
@@ -124,7 +124,7 @@ def test_stdin(self):
124124
input_file.close()
125125

126126
def test_stdin_and_filename(self):
127-
input_file = StringIO("a,b,c\n1,2,3\n")
127+
input_file = io.BytesIO(b'a,b,c\n1,2,3\n')
128128

129129
with stdin_as_string(input_file):
130130
sql = self.get_output(['-', 'examples/dummy.csv'])
@@ -135,7 +135,7 @@ def test_stdin_and_filename(self):
135135
input_file.close()
136136

137137
def test_query(self):
138-
input_file = StringIO("a,b,c\n1,2,3\n")
138+
input_file = io.BytesIO(b'a,b,c\n1,2,3\n')
139139

140140
with stdin_as_string(input_file):
141141
sql = self.get_output(['--query', 'SELECT m.usda_id, avg(i.sepal_length) AS mean_sepal_length FROM iris '
@@ -150,7 +150,7 @@ def test_query(self):
150150
input_file.close()
151151

152152
def test_query_empty(self):
153-
input_file = StringIO()
153+
input_file = io.BytesIO()
154154

155155
with stdin_as_string(input_file):
156156
output = self.get_output(['--query', 'SELECT 1'])
@@ -185,14 +185,14 @@ def test_before_after_insert(self):
185185
'SELECT 1; CREATE TABLE foobar (date DATE)', '--after-insert',
186186
'INSERT INTO dummy VALUES (0, 5, 6)'])
187187

188-
output_file = StringIO()
188+
output_file = io.StringIO()
189189
utility = SQL2CSV(['--db', 'sqlite:///' + self.db_file, '--query', 'SELECT * FROM foobar'], output_file)
190190
utility.run()
191191
output = output_file.getvalue()
192192
output_file.close()
193193
self.assertEqual(output, 'date\n')
194194

195-
output_file = StringIO()
195+
output_file = io.StringIO()
196196
utility = SQL2CSV(['--db', 'sqlite:///' + self.db_file, '--query', 'SELECT * FROM dummy'], output_file)
197197
utility.run()
198198
output = output_file.getvalue()

tests/test_utilities/test_csvstack.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def test_skip_lines(self):
2121
])
2222

2323
def test_skip_lines_stdin(self):
24-
with open('examples/test_skip_lines.csv') as f, stdin_as_string(f):
24+
with open('examples/test_skip_lines.csv', 'rb') as f, stdin_as_string(f):
2525
self.assertRows(['--skip-lines', '3', '-', 'examples/test_skip_lines.csv'], [
2626
['a', 'b', 'c'],
2727
['1', '2', '3'],
@@ -62,14 +62,14 @@ def test_multiple_file_stack_col_ragged(self):
6262
])
6363

6464
def test_multiple_file_stack_col_ragged_stdin(self):
65-
with open('examples/dummy.csv') as f, stdin_as_string(f):
65+
with open('examples/dummy.csv', 'rb') as f, stdin_as_string(f):
6666
self.assertRows(['-', 'examples/dummy_col_shuffled_ragged.csv'], [
6767
['a', 'b', 'c', 'd'],
6868
['1', '2', '3', ''],
6969
['1', '2', '3', '4'],
7070
])
7171

72-
with open('examples/dummy.csv') as f, stdin_as_string(f):
72+
with open('examples/dummy.csv', 'rb') as f, stdin_as_string(f):
7373
self.assertRows(['examples/dummy_col_shuffled_ragged.csv', '-'], [
7474
['b', 'c', 'a', 'd'],
7575
['2', '3', '1', '4'],
@@ -101,14 +101,14 @@ def test_no_header_row_basic(self):
101101
])
102102

103103
def test_no_header_row_basic_stdin(self):
104-
with open('examples/no_header_row.csv') as f, stdin_as_string(f):
104+
with open('examples/no_header_row.csv', 'rb') as f, stdin_as_string(f):
105105
self.assertRows(['--no-header-row', '-', 'examples/no_header_row2.csv'], [
106106
['a', 'b', 'c'],
107107
['1', '2', '3'],
108108
['4', '5', '6'],
109109
])
110110

111-
with open('examples/no_header_row.csv') as f, stdin_as_string(f):
111+
with open('examples/no_header_row.csv', 'rb') as f, stdin_as_string(f):
112112
self.assertRows(['--no-header-row', 'examples/no_header_row2.csv', '-'], [
113113
['a', 'b', 'c'],
114114
['4', '5', '6'],

0 commit comments

Comments
 (0)