source: exist/trunk/SetUp/conf.xml @ 2571

Subversion URL: http://proj.badc.rl.ac.uk/svn/ndg/exist/trunk/SetUp/conf.xml@3997
Revision 2571, 21.1 KB checked in by ko23, 12 years ago (diff)
Line 
1<?xml version="1.0" encoding="UTF-8"?><!--
2    This is the central configuration file for the database. If the database
3    is running in a servlet-context, the configuration file will be read from
4    the WEB-INF directory of the web application. Otherwise, the configuration
5    is read from the directory specified by the exist.home system property.
6   
7    Structure of this xml document:
8   
9        exist
10            cluster
11            db-connection
12                default-permissions
13                pool
14                recovery
15                security
16                system-task
17                watchdog
18            indexer
19            serializer
20            transformer
21            xquery
22            xupdate
23   
24   
25    For detailed and latest information please consult the eXist documentation:
26   
27        - http://exist-db.org/configuration.html
28        - http://exist-db.org/cluster.html
29        - http://exist-db.org/documentation.html
30        - http://wiki.exist-db.org/
31--><exist>
32
33    <!--
34        Configures the cluster configuration
35       
36        - dbaUser:
37            defines the user used by the cluster for the replica.
38       
39        - dbaPassword:
40            defines the user password used by the cluster for the replica.
41       
42        - exclude:
43            some collections should be signed as no-replicated collections i.e.
44            during the replica phase eXist cluster environment doesn't send
45            events based on these collections, colon is the separator char when
46            more than one collection is added.
47       
48        - journalDir
49            specifies the folder where the eXist cluster node saves its journal
50            file. Directory is automatically created if it doesn't exist. If no
51            journaldir is specified, journal is disabled.
52       
53        - protocol:
54            defines a protocol stack. Refers the jgroups protocol stack.
55   -->
56    <cluster dbaPassword="" dbaUser="admin" exclude="/db/system,/db/system/config" journalDir="data/journal" protocol="UDP(mcast_addr=228.1.2.3;mcast_port=45566;ip_ttl=8;ip_mcast=true;mcast_send_buf_size=800000;mcast_recv_buf_size=150000;ucast_send_buf_size=800000;ucast_recv_buf_size=150000;loopback=true):PING(timeout=2000;num_initial_members=3;up_thread=true;down_thread=true):MERGE2(min_interval=10000;max_interval=20000):FD(shun=true;up_thread=true;down_thread=true;timeout=2500;max_tries=5):VERIFY_SUSPECT(timeout=3000;num_msgs=3;up_thread=true;down_thread=true):pbcast.NAKACK(gc_lag=50;retransmit_timeout=300,600,1200,2400,4800;max_xmit_size=8192;up_thread=true;down_thread=true):UNICAST(timeout=300,600,1200,2400,4800;window_size=100;min_threshold=10;down_thread=true):pbcast.STABLE(desired_avg_gossip=20000;up_thread=true;down_thread=true):FRAG(frag_size=8192;down_thread=true;up_thread=true):pbcast.GMS(join_timeout=5000;join_retry_timeout=2000;shun=true;print_local_addr=true)"/>
57
58    <!--
59        Configures the database backend:
60       
61        - cacheSize:
62            the maximum amount of memory to use for database page buffers.
63            Each database file has an associated page buffer for B+-tree and
64            data pages. However, the memory specified via cacheSize is shared
65            between all page buffers. It represents an absolute maximum, which
66            would be occupied if all page buffers were completely full.
67           
68            The cacheSize should not be more than half of the size of the JVM
69            heap size (set by the JVM -Xmx parameter).
70            KDO note: I believe that this is the Tomcat setting - mine's set to 786M
71
72                Set to 378M from 48M - KDO
73       
74        - collectionCacheSize:
75            not documented.
76       
77        - database:
78            selects a database backend. Currently, "native" and "native_cluster"
79            are the only valid setting.
80       
81        - files:
82            path to the directory where database files are stored.
83       
84        - free_mem_min:
85            minimum amount of free memory in percent during indexing. The
86            database will start to flush its internal buffers whenever
87            free memory drops below the specified limit.
88       
89        - pageSize:
90            the size of one page on the disk. This is the smallest unit
91            transferred from and to the database files. Should be a multiple of
92            the operating system's file system page size (usually 4096).
93
94        =====================================================================
95       
96        The settings below are very conservative to avoid out-of-memory
97        exceptions on machines with limited memory (256MB).
98       
99        Increase the buffer settings for elements_buffers and words_buffers if
100        you have some more memory to waste. If you deal with lots of
101        collections, you can also increase the collectionCacheSize value
102    -->
103    <db-connection cacheSize="378M" collectionCacheSize="128" database="native" files="data" free_mem_min="5" pageSize="4096">
104       
105        <!--
106            Specifies the default permissions for all resources and collections
107            in eXist (see  User Authentication and Access Control). When this is
108            not configured, the default "mod" (similar to the Unix "chmod"
109            command) is set to 0775 in the resources and collections attributes.
110            A different default value may be set for a database instance, and
111            local overrides are also possible.
112        -->
113        <!--default-permissions collection="0775" resource="0775" /-->
114       
115        <!--
116            Settings for the database connection pool:
117           
118            - min:
119                minimum number of connections to keep alive.
120           
121            - max:
122                maximum number of connections allowed.
123           
124            - sync-period:
125                defines how often the database will flush its
126                internal buffers to disk. The sync thread will interrupt
127                normal database operation after the specified number of
128                milliseconds and write all dirty pages to disk.
129           
130            - wait-before-shutdown:
131                defines how long the database instance will wait for running
132                operations to complete before it forces a shutdown.
133        -->
134        <pool max="20" min="1" sync-period="120000" wait-before-shutdown="120000"/>
135
136        <!--
137            Settings for the journaling and recovery of the database. With
138            recovery enabled, the database is able to recover from an unclean
139            database shutdown due to, for example, power failures, OS reboots,
140            and hanging processes. For this to work correctly, all database
141            operations must be logged to a journal file.
142                     
143            - enabled:
144                if this attribute is set to yes, automatic recovery is enabled.
145           
146            - group-commit:
147                not documented.
148           
149            - journal-dir:     
150                this attribute sets the directory where journal files are to be
151                written. If no directory is specified, the default path is to
152                the data directory.
153           
154            - size:
155                this attributes sets the maximum allowed size of the journal
156                file. Once the journal reaches this limit, a checkpoint will be
157                triggered and the journal will be cleaned. However, the database
158                waits for running transactions to return before processing this
159                checkpoint. In the event one of these transactions writes a lot
160                of data to the journal file, the file will grow until the
161                transaction has completed. Hence, the size limit is not enforced
162                in all cases.
163           
164            - sync-on-commit:
165                this attribute determines whether or not to protect the journal
166                during operating system failures. That is, it determines whether
167                the database forces a file-sync on the journal after every
168                commit.
169                If this attribute is set to "yes", the journal is protected
170                against operating system failures. However, this will slow
171                performance - especially on Windows systems.
172                If set to "no", eXist will rely on the operating system to flush
173                out the journal contents to disk. In the worst case scenario,
174                in which there is a complete system failure, some committed
175                transactions might not have yet been written to the journal,
176                and so will be rolled back.
177        -->
178        <recovery enabled="yes" group-commit="no" journal-dir="data" size="100M" sync-on-commit="no"/>
179       
180        <!--
181            The <security> element in the <db-connection> node is used to select
182            the security manager Class and control the database of users and
183            groups.
184           
185            - class:
186                            this attribute is required, and specifies a Java class name used
187                            to implement the org.exist.security.SecurityManager interface,
188                            as in the following example:
189           
190            Example: <security> class Attribute (LDAP)
191            <security class="org.exist.security.LDAPSecurityManager" />
192           
193            eXist is distributed with the following built-in security manager
194            implementations:
195           
196            - org.exist.security.XMLSecurityManager
197           
198                stores the user information in the database. This is the
199                default manager if the <security> element is not included
200                in <db-connection>.
201           
202            - org.exist.security.LDAPSecurityManager
203           
204                retrieves the user and groups from the LDAP database. This
205                requires addition configuration parameters which are
206                described in the LDAP Security Manager documentation.
207           
208            - password-encoding:
209                password encoding can be set to one of the following types:
210           
211                - plain:
212                    applies plain encryption.
213           
214                - md5: (default)
215                    applies the MD5 algorithm to encrypt passwords.
216           
217                - simple-md5:
218                    applies a simplified MD5 algorithm to encrypt passwords.
219           
220            - password-realm:
221                the realm to use for basic auth or http-digest password
222                challenges.
223        -->
224        <!-- security class="org.exist.security.LDAPSecurityManager" /-->
225       
226        <!--
227            Tasks used for system maintenance. System tasks require the database
228            to be in a consistent state. All database operations will be stopped
229            until method returned or throws an exception. Any exception will be
230            caught and a warning written to the log.
231           
232            Currently only the databackup task is available.
233        -->
234        <!--system-task class="org.exist.storage.DataBackup" period="120000">
235            <parameter name="output-dir" value="backup" />
236        </system-task-->
237
238        <!-- 
239            This is the global configuration for the query watchdog. The
240            watchdog monitors all query processes, and can terminate any
241            long-running queries if they exceed one of the predefined limits.
242            These limits are as follows:
243           
244            - output-size-limit
245                this attribute limits the size of XML fragments constructed
246                using XQuery, and thus sets the maximum amount of main memory a
247                query is allowed to use. This limit is expressed as the maximum
248                number of nodes allowed for an in-memory DOM tree. The purpose
249                of this option is to avoid memory shortages on the server in
250                cases where users are allowed to run queries that produce very
251                large output fragments.
252
253                        Watchdog set to -1 from 10000 (i.e disabled) - KDO
254           
255            - query-timeout
256                this attribute sets the maximum amount of time (expressed in
257                milliseconds) that the query can take before it is killed..
258           
259        -->
260        <watchdog output-size-limit="-1" query-timeout="-1"/>
261       
262    </db-connection>
263   
264    <!--
265        Settings for the indexer:
266       
267        - caseSensitive
268            should equality comparisons between strings be case-sensitive or
269            insensitive: "yes" or "no".
270       
271        - index-depth
272            defines the maximum nesting depth of nodes which will be indexed
273            in the DOM index. Nodes below the specified nesting depth will
274            not be indexed in the DOM file. This has only an effect when
275            retrieving query results or for some types of XPath subexpressions,
276            like equality comparisons.
277       
278        - stemming
279            eXist includes a very simple english language stemmer, based on
280            Porter's algorithm. Set the "stemming"-option to "true" if you
281            would like to use stemming. This does only work for english.
282       
283        - suppress-whitespace
284            should leading or trailing whitespace be removed from a text node?
285            Set to "leading", "trailing", "both" or "none".
286            Changing the parameter will only have an effect on newly loaded
287            files, not old ones.
288       
289        - suppress-whitespace-mixed-content
290            preserve the white space inside a mixed content node
291       
292        - tokenizer:
293            this attribute invokes the Java class used to tokenize a string into
294            a sequence of single words or tokens, which are stored to the
295            fulltext index. Currently only the SimpleTokenizer is available.
296       
297        - track-term-freq:
298            not documented.
299       
300        - validation:
301            should XML source files be validated against a schema or DTD before
302            storing them? The setting is passed to the XML parser. The actual
303            effects depend on the parser you use. eXist comes with Xerces which
304            can validate against both: schemas and DTDs.
305            Possible values: "yes", "no", "auto". "auto" will leave validation
306            to the parser.       
307
308        Validation set to "no" from "auto" - KDO
309    -->
310    <indexer caseSensitive="yes" index-depth="5" preserve-whitespace-mixed-content="no" stemming="no" suppress-whitespace="both" tokenizer="org.exist.storage.analysis.SimpleTokenizer" track-term-freq="yes" validation="no">
311       
312        <!--
313            The file for this element points to a file containing a list of
314            stopwords. Note that stopwords are NOT added to the fullext index.
315        -->
316        <stopwords file="stopword"/>
317       
318        <!--
319            Default index settings. Default settings apply if there's no
320            collection-specific configuration for a collection.
321        -->
322        <index>
323            <fulltext attributes="true" default="all">
324                <exclude path="/auth"/>
325            </fulltext>
326        </index>
327       
328        <!--
329            Specify the location of one or more catalog files.
330            Catalogs are used to resolve external entities in XML documents.
331        -->
332        <entity-resolver>
333            <catalog file="catalog.xml"/>
334        </entity-resolver>
335    </indexer>
336       
337    <!--
338        Default settings for the serializer. Most of these can be changed
339        by client code:
340
341        - add-exist-id:
342            for debugging: add an exist:id attribute to every element, showing
343            the internal node identifier (as a long int) assigned to this node.
344            Possible values are: "none", "element", "all". "all" displays the
345            node of every element node; "element" displays the id only for the
346            root nodes of the returned XML fragments.
347           
348       - compress-output:
349           should the output be compressed when serializing documents?
350           Sometimes useful with remote clients.
351           Remember to add a statement like this to your client code:
352           service.setProperty("compress-output", "yes");
353           to uncompress the retrieved result in the client too.
354       
355        - enable-xinclude:
356            should the database expand XInclude tags by default?
357       
358        - enable-xsl:
359            should the database evaluate XSL processing instructions
360            when serializing documents?
361       
362        - indent:
363            should the serializer pretty-print (indent) XML?
364       
365        - match-tagging-attributes:
366            matches for attribute values can also be tagged using the character
367            sequence "||" to demarcate the matching text string. Since this
368            changes the content of the attribute value, the feature is disabled
369            by default.
370       
371        - match-tagging-elements:
372            the database can highlight matches in the text content of a node by
373            tagging the matching text string with <exist:match> . Clearly, this
374            only works for XPath expressions using the fulltext index.
375           
376            Set the parameter to "yes" to disable this feature.
377
378    -->
379    <serializer add-exist-id="none" compress-output="no" enable-xinclude="yes" enable-xsl="no" indent="yes" match-tagging-attributes="no" match-tagging-elements="yes"/>
380
381        <!--
382        Default settings for the XSLT Transformer. Allow's for a choice of
383        implementation:
384       
385        - class:
386            the name of the class that implements javax.xml.transform.TransformerFactory
387           
388            for Saxon (XSLT 2.0 support) - net.sf.saxon.TransformerFactoryImpl
389                You will need to copy saxon8.jar saxon8-dom.jar and saxon8-xpath.jar
390                into lib/endorsed.
391                You can get these from http://sourceforge.net/projects/saxon
392           
393            for Xalan (XSLT 1.0 support) - org.apache.xalan.processor.TransformerFactoryImpl
394           
395           
396        For further details see - http://wiki.exist-db.org/space/Howtos/Adding+XSLT+2.0+%28Saxon%29
397       
398    -->
399    <transformer class="org.apache.xalan.processor.TransformerFactoryImpl"/>
400   
401   
402    <!--
403        Define modules that contain xQuery functions.
404       
405            - enable-java-binding:
406                eXist supports calls to arbitrary Java methods from within
407                XQuery. Setting to "yes" might introduce a security risk. 
408    -->
409    <xquery enable-java-binding="no">
410        <builtin-modules>
411            <module class="org.exist.xquery.functions.util.UtilModule" uri="http://exist-db.org/xquery/util"/>
412            <module class="org.exist.xquery.functions.transform.TransformModule" uri="http://exist-db.org/xquery/transform"/>
413            <module class="org.exist.xquery.functions.xmldb.XMLDBModule" uri="http://exist-db.org/xquery/xmldb"/>
414            <module class="org.exist.xquery.functions.request.RequestModule" uri="http://exist-db.org/xquery/request"/>
415            <module class="org.exist.xquery.functions.response.ResponseModule" uri="http://exist-db.org/xquery/response"/>
416            <module class="org.exist.xquery.functions.session.SessionModule" uri="http://exist-db.org/xquery/session"/>
417            <module class="org.exist.xquery.functions.text.TextModule" uri="http://exist-db.org/xquery/text"/>
418            <module class="org.exist.xquery.modules.example.ExampleModule" uri="http://exist-db.org/xquery/examples"/>
419            <module class="org.exist.xquery.functions.validation.ValidationModule" uri="http://exist-db.org/xquery/validation"/>
420            <module class="org.exist.xquery.functions.system.SystemModule" uri="http://exist-db.org/xquery/system"/>
421        </builtin-modules>
422    </xquery>
423
424
425    <!--
426        During XUpdates, the database needs to do a partial reindex of the
427      document whenever the internal node-id structure has changed. Reindex
428      runs can occur quite frequently and slow down the XUpdate process.
429   
430      Frequent reindex runs can be avoided by leaving some space between the
431      numeric identifiers assigned to every node. Future insertions will first
432      fill up these spare identifiers, so no reindex is required.
433
434        - allowed-fragmentation:
435            defines the maximum number of page splits allowed within a document
436            before a defragmentation run will be triggered.
437           
438      - enable-consistency-checks:
439            for debugging only. If the parameter is set to "yes", a consistency
440            check will be run on every modified document after every XUpdate
441            request. It checks if the persistent DOM is complete and all
442            pointers in the structural index point to valid storage addresses
443            containing valid nodes.
444       
445        - growth-factor:
446            determines the number of spare ids to be inserted whenever the node
447            id scheme is recomputed after an XUpdate.
448            Increase the setting to have less reindex runs. However, you have
449            to be aware that leaving spare ids also limits the maximum size of
450            a document that can be indexed.
451       
452        -->
453    <xupdate allowed-fragmentation="5" enable-consistency-checks="no" growth-factor="20"/>
454
455</exist>
Note: See TracBrowser for help on using the repository browser.