replacing opensearch with solr

This commit is contained in:
Stefan Hardegger
2025-09-22 09:44:50 +02:00
parent 9e684a956b
commit 87f37567fb
40 changed files with 2000 additions and 3464 deletions

View File

@@ -0,0 +1,93 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!--
Solr Schema for StoryCove Authors Core
Based on AuthorSearchDto data model
-->
<schema name="storycove-authors" version="1.6">
<!-- Field Types -->
<!-- String field type for exact matching -->
<fieldType name="string" class="solr.StrField" sortMissingLast="true" />
<!-- Text field type for full-text search -->
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<!-- Enhanced text field for names -->
<fieldType name="text_enhanced" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EdgeNGramFilterFactory" minGramSize="2" maxGramSize="15"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<!-- Integer field type -->
<fieldType name="pint" class="solr.IntPointField" docValues="true"/>
<!-- Long field type -->
<fieldType name="plong" class="solr.LongPointField" docValues="true"/>
<!-- Double field type -->
<fieldType name="pdouble" class="solr.DoublePointField" docValues="true"/>
<!-- Date field type -->
<fieldType name="pdate" class="solr.DatePointField" docValues="true"/>
<!-- Multi-valued string for URLs -->
<fieldType name="strings" class="solr.StrField" sortMissingLast="true" multiValued="true"/>
<!-- Fields -->
<!-- Required Fields -->
<field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
<field name="_version_" type="plong" indexed="false" stored="false"/>
<!-- Core Author Fields -->
<field name="name" type="text_enhanced" indexed="true" stored="true" required="true"/>
<field name="notes" type="text_general" indexed="true" stored="true"/>
<field name="authorRating" type="pint" indexed="true" stored="true"/>
<field name="averageStoryRating" type="pdouble" indexed="true" stored="true"/>
<field name="storyCount" type="pint" indexed="true" stored="true"/>
<field name="urls" type="strings" indexed="true" stored="true"/>
<field name="avatarImagePath" type="string" indexed="false" stored="true"/>
<!-- Timestamp Fields -->
<field name="createdAt" type="pdate" indexed="true" stored="true"/>
<field name="updatedAt" type="pdate" indexed="true" stored="true"/>
<!-- Search-specific Fields -->
<field name="searchScore" type="plong" indexed="false" stored="true"/>
<!-- Combined search field for general queries -->
<field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/>
<!-- Copy Fields for comprehensive search -->
<copyField source="name" dest="text"/>
<copyField source="notes" dest="text"/>
<copyField source="urls" dest="text"/>
<!-- Default Search Field -->
<!-- UniqueKey -->
<uniqueKey>id</uniqueKey>
</schema>

140
solr/authors/conf/solrconfig.xml Executable file
View File

@@ -0,0 +1,140 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!--
Solr Configuration for StoryCove Authors Core
Optimized for author search with highlighting and faceting
-->
<config>
<luceneMatchVersion>9.9.0</luceneMatchVersion>
<!-- DataDir configuration -->
<dataDir>${solr.data.dir:}</dataDir>
<!-- Directory Factory -->
<directoryFactory name="DirectoryFactory"
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
<!-- CodecFactory -->
<codecFactory class="solr.SchemaCodecFactory"/>
<!-- Index Configuration -->
<indexConfig>
<lockType>${solr.lock.type:native}</lockType>
<infoStream>true</infoStream>
</indexConfig>
<!-- JMX Configuration -->
<jmx />
<!-- Update Handler -->
<updateHandler class="solr.DirectUpdateHandler2">
<updateLog>
<str name="dir">${solr.ulog.dir:}</str>
<int name="numVersionBuckets">${solr.ulog.numVersionBuckets:65536}</int>
</updateLog>
<autoCommit>
<maxTime>15000</maxTime>
<openSearcher>false</openSearcher>
</autoCommit>
<autoSoftCommit>
<maxTime>1000</maxTime>
</autoSoftCommit>
</updateHandler>
<!-- Query Configuration -->
<query>
<maxBooleanClauses>1024</maxBooleanClauses>
<filterCache class="solr.CaffeineCache"
size="512"
initialSize="512"
autowarmCount="0"/>
<queryResultCache class="solr.CaffeineCache"
size="512"
initialSize="512"
autowarmCount="0"/>
<documentCache class="solr.CaffeineCache"
size="512"
initialSize="512"
autowarmCount="0"/>
<enableLazyFieldLoading>true</enableLazyFieldLoading>
</query>
<!-- Request Dispatcher -->
<requestDispatcher handleSelect="false" >
<requestParsers enableRemoteStreaming="true"
multipartUploadLimitInKB="2048000"
formdataUploadLimitInKB="2048"
addHttpRequestToContext="false"/>
<httpCaching never304="true" />
</requestDispatcher>
<!-- Request Handlers -->
<!-- Standard Select Handler -->
<requestHandler name="/select" class="solr.SearchHandler">
<lst name="defaults">
<str name="echoParams">explicit</str>
<int name="rows">10</int>
<str name="df">text</str>
<str name="wt">json</str>
<str name="indent">true</str>
<str name="hl">true</str>
<str name="hl.fl">name,notes</str>
<str name="hl.simple.pre">&lt;em&gt;</str>
<str name="hl.simple.post">&lt;/em&gt;</str>
<str name="hl.fragsize">150</str>
<str name="hl.maxAnalyzedChars">51200</str>
<str name="facet">true</str>
<str name="facet.field">authorRating</str>
<str name="facet.range">averageStoryRating</str>
<str name="facet.range">storyCount</str>
<str name="facet.mincount">1</str>
<str name="facet.sort">count</str>
</lst>
</requestHandler>
<!-- Update Handler -->
<requestHandler name="/update" class="solr.UpdateRequestHandler" />
<!-- Admin Handlers -->
<requestHandler name="/admin/ping" class="solr.PingRequestHandler">
<lst name="invariants">
<str name="q">*:*</str>
</lst>
<lst name="defaults">
<str name="echoParams">all</str>
</lst>
</requestHandler>
<!-- Suggester Handler -->
<requestHandler name="/suggest" class="solr.SearchHandler" startup="lazy">
<lst name="defaults">
<str name="suggest">true</str>
<str name="suggest.count">10</str>
</lst>
<arr name="components">
<str>suggest</str>
</arr>
</requestHandler>
<!-- Search Components -->
<searchComponent name="suggest" class="solr.SuggestComponent">
<lst name="suggester">
<str name="name">authorSuggester</str>
<str name="lookupImpl">AnalyzingInfixLookupFactory</str>
<str name="dictionaryImpl">DocumentDictionaryFactory</str>
<str name="field">name</str>
<str name="weightField">storyCount</str>
<str name="suggestAnalyzerFieldType">text_general</str>
<str name="buildOnStartup">false</str>
<str name="buildOnCommit">false</str>
</lst>
</searchComponent>
<!-- Response Writers -->
<queryResponseWriter name="json" class="solr.JSONResponseWriter">
<str name="content-type">application/json; charset=UTF-8</str>
</queryResponseWriter>
</config>

34
solr/authors/conf/stopwords.txt Executable file
View File

@@ -0,0 +1,34 @@
# English stopwords for author search
a
an
and
are
as
at
be
but
by
for
if
in
into
is
it
no
not
of
on
or
such
that
the
their
then
there
these
they
this
to
was
will
with

9
solr/authors/conf/synonyms.txt Executable file
View File

@@ -0,0 +1,9 @@
# Synonyms for author search
# Format: word1,word2,word3 => synonym1,synonym2
writer,author,novelist
pen name,pseudonym,alias
prolific,productive
acclaimed,famous,renowned
bestselling,popular
contemporary,modern
classic,traditional