File tree Expand file tree Collapse file tree 3 files changed +48
-4
lines changed Expand file tree Collapse file tree 3 files changed +48
-4
lines changed Original file line number Diff line number Diff line change @@ -173,11 +173,15 @@ def run_process(
173173 """
174174 if env is None :
175175 env = self .expand_env (self .env )
176+
177+ # Decode the child process output as UTF-8 (instead of default encoding)
176178 process = subprocess .Popen (
177179 command ,
178180 stdout = subprocess .PIPE ,
179181 stderr = subprocess .PIPE ,
180182 text = True ,
183+ encoding = "utf-8" ,
184+ errors = "replace" , # avoid crashes on the occasional bad byte
181185 env = env ,
182186 bufsize = 1 ,
183187 universal_newlines = True ,
@@ -189,7 +193,15 @@ def run_process(
189193 # check verbosity level
190194 quiet_mode = logger .getEffectiveLevel () <= logging .INFO
191195
192- def stream_output (pipe , output_lines , stream ):
196+ # Ensure our own stdout/stderr won't choke on non-ASCII (Windows consoles often do).
197+ for s in (sys .stdout , sys .stderr ):
198+ try :
199+ s .reconfigure (encoding = "utf-8" , errors = "replace" ) # Python 3.7+
200+ except Exception as e :
201+ logger .info (f"{ e } " )
202+ pass # OK if not available (e.g., redirected or older Python)
203+
204+ def stream_output (pipe , output_lines , stream ): # lines are already str decoded as UTF-8
193205 for line in iter (pipe .readline , "" ):
194206 if not quiet_mode :
195207 print (line .rstrip (), file = stream )
Original file line number Diff line number Diff line change 77 goose : {}
88
99models :
10- gpt-4o :
10+ claude-sonnet :
1111 provider : anthropic
1212 name : claude-sonnet-4-20250514
1313
3434 MONDO:0011694 (spinocerebellar ataxia type 15/16, aka SCA15)
3535 MONDO:0007298 (spinocerebellar ataxia type 29, aka SCA29)
3636 MONDO:0008795 (aniridia-cerebellar ataxia-intellectual disability syndrome; aka Gillespie syndrome)
37-
3837 threshold : 0.7
39-
38+ - name : character_encoding_test
39+ metrics : [CorrectnessMetric]
40+ input : Based on PMID 33926573 do microbes from alkaline sulphidic tailings show oxidative stresses?
41+ expected_output : ' The paper says No but it is retracted so the results should not be trusted.'
42+ threshold : 0.9
Original file line number Diff line number Diff line change 1+ name : pubmed tools evals
2+ description : |
3+ Evaluations for multiple pubmed MCPs
4+
5+
6+ coders :
7+ goose : {}
8+
9+ models :
10+ claude-sonnet :
11+ provider : anthropic
12+ name : claude-sonnet-4-20250514
13+
14+ servers :
15+ ols :
16+ name : ols
17+ command : uvx
18+ args : [mcp-ols]
19+
20+ server_combinations :
21+ - [simple-pubmed]
22+
23+ cases :
24+ - name : character_encoding_test
25+ metrics :
26+ - CorrectnessMetric
27+ input : Based on PMID 33926573 do microbes from alkaline sulphidic tailings show oxidative stresses?
28+ expected_output : ' The paper says No but it is retracted so the results should not be trusted.'
29+ threshold : 0.9
You can’t perform that action at this time.
0 commit comments