Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.search.ExtendedDismaxQParserPlugin;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QParserPlugin;
import org.slf4j.Logger;
Expand All @@ -27,8 +28,10 @@
import java.nio.charset.StandardCharsets;


public class AutoPhrasingQParserPlugin extends QParserPlugin implements ResourceLoaderAware {

public class AutoPhrasingQParserPlugin
extends ExtendedDismaxQParserPlugin
implements ResourceLoaderAware {

private static final Logger Log = LoggerFactory.getLogger( AutoPhrasingQParserPlugin.class );
private CharArraySet phraseSets;
private String phraseSetFiles;
Expand All @@ -38,18 +41,22 @@ public class AutoPhrasingQParserPlugin extends QParserPlugin implements Resource
private char replaceWhitespaceWith = 'x'; // preserves stemming

private boolean ignoreCase = true;
@Override
public void init( NamedList initArgs ) {
Log.info( "init ..." );
SolrParams params = SolrParams.toSolrParams(initArgs);
phraseSetFiles = params.get( "phrases" );


Log.info( "phrases :" + phraseSetFiles);

String pImpl = params.get( "defType" );
if (pImpl != null) {
parserImpl = pImpl;
}

Log.info( "defType :" + pImpl);

String replaceWith = params.get( "replaceWhitespaceWith" );
if (replaceWith != null && replaceWith.length() > 0) {
replaceWhitespaceWith = replaceWith.charAt(0);
Expand All @@ -63,7 +70,7 @@ public void init( NamedList initArgs ) {

@Override
public QParser createParser( String qStr, SolrParams localParams, SolrParams params,
SolrQueryRequest req) {
SolrQueryRequest req) {
Log.info( "createParser" );
ModifiableSolrParams modparams = new ModifiableSolrParams( params );
String modQ = filter( qStr );
Expand All @@ -73,14 +80,14 @@ public QParser createParser( String qStr, SolrParams localParams, SolrParams par
.createParser(modQ, localParams, modparams, req);
}

private String filter( String qStr ) {
private String filter( String qStr ) {
// 1) collapse " :" to ":" to protect field names
// 2) expand ":" to ": " to free terms from field names
// 3) expand "+" to "+ " to free terms from "+" operator
// 4) expand "-" to "- " to free terms from "-" operator
// 5) Autophrase with whitespace tokenizer
// 6) collapse "+ " and "- " to "+" and "-" to glom operators.
// 5) Autophrase with whitespace tokenizer
// 6) collapse "+ " and "- " to "+" and "-" to glom operators.
String query = qStr;
while( query.contains( " :" ))
query = query.replaceAll( "\\s:", ": " );
Expand All @@ -94,9 +101,11 @@ private String filter( String qStr ) {
}

try {
Log.info("before, q:" + query);
query = autophrase( query );
Log.info("after, q:" + query);
}
catch (IOException ioe ) { }
catch (IOException ioe ) { Log.info("ioe exception"); }

query = query.replaceAll( "\\+ ", "+" );
query = query.replaceAll( "\\- ", "-" );
Expand All @@ -105,23 +114,27 @@ private String filter( String qStr ) {
query = query.replaceAll( "&&", "AND" );
query = query.replaceAll( "\\|\\|", "OR" );
}
return query;
}
private String autophrase( String input ) throws IOException {
WhitespaceTokenizer wt = new WhitespaceTokenizer( new StringReader( input ));
WhitespaceTokenizer wt = new WhitespaceTokenizer();
wt.setReader( new StringReader( input ));
TokenStream ts = wt;
if (ignoreCase) {
Log.info("to lower...");
ts = new LowerCaseFilter( wt );
}

AutoPhrasingTokenFilter aptf = new AutoPhrasingTokenFilter( ts, phraseSets, false );
aptf.setReplaceWhitespaceWith( new Character( replaceWhitespaceWith ) );
CharTermAttribute term = aptf.addAttribute(CharTermAttribute.class);
aptf.reset();

StringBuffer strbuf = new StringBuffer( );
while( aptf.incrementToken( )) {
Log.info("increment token :" + term.toString());
strbuf.append( term.toString( ) ).append( " " );
}

Expand All @@ -134,26 +147,26 @@ public void inform(ResourceLoader loader) throws IOException {
phraseSets = getWordSet(loader, phraseSetFiles, true );
}
}
private CharArraySet getWordSet( ResourceLoader loader,
String wordFiles, boolean ignoreCase)
throws IOException {
String wordFiles, boolean ignoreCase)
throws IOException {
List<String> files = splitFileNames(wordFiles);
CharArraySet words = null;
CharArraySet words = null;
if (files.size() > 0) {
// default stopwords list has 35 or so words, but maybe don't make it that
// big to start
words = new CharArraySet( files.size() * 10, ignoreCase);
for (String file : files) {
List<String> wlist = getLines(loader, file.trim());
words.addAll(StopFilter.makeStopSet( wlist, ignoreCase));
words.addAll(StopFilter.makeStopSet( wlist, ignoreCase));
}
}
return words;
}
private List<String> getLines(ResourceLoader loader, String resource) throws IOException {
return WordlistLoader.getLines(loader.openResource(resource), StandardCharsets.UTF_8);
return WordlistLoader.getLines(loader.openResource(resource), StandardCharsets.UTF_8);
}

private List<String> splitFileNames(String fileNames) {
Expand Down
Loading