de.unidu.is.retrieval.lemur
Class Lemur

java.lang.Object
  extended byde.unidu.is.retrieval.lemur.Lemur

public class Lemur
extends java.lang.Object

A wrapper class for the Lemur (distributed) retrieval system.

Since:
2004-02-18
Version:
$Revision: 1.10 $, $Date: 2005/03/14 17:33:14 $
Author:
Henrik Nottelma

Field Summary
static int inquery
          The ID for the INQUERY ranking method.
static int kl
          The ID for the KL-Divergance ranking method.
static int okapi
          The ID for the OKAPI ranking method.
static int tfidf
          The ID for the tf.idf ranking method.
 
Constructor Summary
Lemur(java.io.File dir, boolean useSample)
          Creates a new instance and initis the directories.
 
Method Summary
 java.io.File createTextFile(java.lang.String coll, java.util.Map termMap)
          Creates a text file (with the terms in it) in TREC format for the specified collection from the database.
 void createTopicFile(java.io.File topicFile, Query query, boolean append)
          Creates a topic file (with the terms in it) in TREC format for the specified node.
 void createTopicFile(java.lang.String topicsName, Query query, boolean append)
          Creates a topic file (with the terms in it) in TREC format for the specified node.
 void evaluate(java.lang.String coll, java.io.File topicsFile, java.io.File resultFile, int mode, int numDocs, boolean doLog)
          Evaluates the given topics on the specified collection.
 void evaluate(java.lang.String coll, java.lang.String topicsName, int mode, int numDocs, boolean doLog)
          Evaluates the given topics on the specified collection.
 void evaluateCORI(java.lang.String[] colls, java.io.File topicsFile, java.io.File ranksFile, java.io.File resultFile, int numDocs, int numDLs, boolean doLog)
          Evaluates the given topics on the specified collections, employing resource selection with CORI.
 void evaluateCORI(java.lang.String[] colls, java.lang.String topicsName, int numDocs, int numDLs, boolean doLog)
          Evaluates the given topics on the specified collections, employing resource selection with CORI.
 void generateSmooth(java.lang.String coll, boolean doLog)
          Generates a smoothing file for the collection.
 java.util.List[] getCORIResult(java.lang.String[] colls, Query query, int numDocs, int numDLs, boolean doLog)
          Returns the result for the specified query after resource selection with CORI.
 java.io.File getDir()
          Returns the base directory.
 java.io.File getIndexCORIDir()
          Returns the CORI index directory.
 java.io.File getIndexDir()
          Returns the index directory.
 java.io.File getRanksCORIFile(java.lang.String topicsName)
          Returns the file name for the ranks of the CORI evaluation.
 java.util.List getResult(java.lang.String coll, Query query, int mode, int numDocs, boolean doLog)
          Returns the result for the specified query.
 java.io.File getResultCORIFile(java.lang.String topicsName)
          Returns the file name for the results of the CORI evaluation.
 java.io.File getResultDir()
          Returns the result directory.
 java.io.File getResultFile(java.lang.String topicsName, java.lang.String coll)
          Returns the file name for the evaluation results.
 java.io.File getSampleDir()
          Returns the sample directory.
 java.io.File getTextDir()
          Returns the text directory.
 java.io.File getTopicDir()
          Returns the topics directory.
 void index(java.lang.String coll, boolean doLog)
          Indexes the collection.
 void indexCORI(java.lang.String[] colls, boolean doLog)
          Computes a CORI index and creates for each collection a parameter file which will be used for retrieval if the collection will be selected.
 void setDir(java.io.File file)
          Sets the base directory.
 void setDir(java.io.File file, boolean useSample)
          Sets the base directory.
 void setIndexCORIDir(java.io.File file)
          Sets the CORI index directory
 void setIndexDir(java.io.File file)
          Sets the index directory
 void setResultDir(java.io.File file)
          Sets the result directory
 void setSampleDir(java.io.File file)
          Sets the sample directory
 void setTextDir(java.io.File file)
          Sets the text directory
 void setTopicDir(java.io.File file)
          Sets the topic directory
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

tfidf

public static final int tfidf
The ID for the tf.idf ranking method.

See Also:
Constant Field Values

okapi

public static final int okapi
The ID for the OKAPI ranking method.

See Also:
Constant Field Values

kl

public static final int kl
The ID for the KL-Divergance ranking method.

See Also:
Constant Field Values

inquery

public static final int inquery
The ID for the INQUERY ranking method.

See Also:
Constant Field Values
Constructor Detail

Lemur

public Lemur(java.io.File dir,
             boolean useSample)
Creates a new instance and initis the directories.

Parameters:
dir - base directory
useSample - if true, the sample will be used for the CORI index
Method Detail

createTextFile

public java.io.File createTextFile(java.lang.String coll,
                                   java.util.Map termMap)
Creates a text file (with the terms in it) in TREC format for the specified collection from the database.

Parameters:
coll - collection name
termMap - document-termlist map

index

public void index(java.lang.String coll,
                  boolean doLog)
Indexes the collection.

Parameters:
coll - collection name
doLog - if true, the output of the Lemur program is logged

generateSmooth

public void generateSmooth(java.lang.String coll,
                           boolean doLog)
Generates a smoothing file for the collection.

Parameters:
coll - collection name
doLog - if true, the output of the Lemur program is logged

createTopicFile

public void createTopicFile(java.lang.String topicsName,
                            Query query,
                            boolean append)
Creates a topic file (with the terms in it) in TREC format for the specified node.

Parameters:
topicsName - name for the topic
query - query
append - if true, appends to that file

createTopicFile

public void createTopicFile(java.io.File topicFile,
                            Query query,
                            boolean append)
Creates a topic file (with the terms in it) in TREC format for the specified node.

Parameters:
topicFile - file
query - query
append - if true, appends to that file

getResult

public java.util.List getResult(java.lang.String coll,
                                Query query,
                                int mode,
                                int numDocs,
                                boolean doLog)
Returns the result for the specified query.

Parameters:
coll - collection name
query - query
mode - Lemur mode
numDocs - number of documents
doLog - if true, the output of the Lemur program is logged
Returns:
list of ProbDoc instances

evaluate

public void evaluate(java.lang.String coll,
                     java.lang.String topicsName,
                     int mode,
                     int numDocs,
                     boolean doLog)
Evaluates the given topics on the specified collection.

Parameters:
coll - collection name
topicsName - name of the topics (for the topic and the result file)
mode - Lemur mode
numDocs - number of documents
doLog - if true, the output of the Lemur program is logged

evaluate

public void evaluate(java.lang.String coll,
                     java.io.File topicsFile,
                     java.io.File resultFile,
                     int mode,
                     int numDocs,
                     boolean doLog)
Evaluates the given topics on the specified collection.

Parameters:
coll - collection name
topicsFile - topics file
resultFile - file for the results
mode - Lemur mode
numDocs - number of documents
doLog - if true, the output of the Lemur program is logged

indexCORI

public void indexCORI(java.lang.String[] colls,
                      boolean doLog)
Computes a CORI index and creates for each collection a parameter file which will be used for retrieval if the collection will be selected.

Parameters:
colls - collections to be used for the index
doLog - if true, the output of the Lemur program is logged

getCORIResult

public java.util.List[] getCORIResult(java.lang.String[] colls,
                                      Query query,
                                      int numDocs,
                                      int numDLs,
                                      boolean doLog)
Returns the result for the specified query after resource selection with CORI.

Parameters:
colls - collection names
query - query
numDocs - number of documents
numDLs - number of DLs to be selected
doLog - if true, the output of the Lemur program is logged
Returns:
array of two lists of ProbDoc instances (0: result, 1: ranks)

evaluateCORI

public void evaluateCORI(java.lang.String[] colls,
                         java.lang.String topicsName,
                         int numDocs,
                         int numDLs,
                         boolean doLog)
Evaluates the given topics on the specified collections, employing resource selection with CORI.

Parameters:
colls - collection names
topicsName - name of the topics (for the topic and the result file)
numDocs - number of documents
numDLs - number of DLs to be selected
doLog - if true, the output of the Lemur program is logged

evaluateCORI

public void evaluateCORI(java.lang.String[] colls,
                         java.io.File topicsFile,
                         java.io.File ranksFile,
                         java.io.File resultFile,
                         int numDocs,
                         int numDLs,
                         boolean doLog)
Evaluates the given topics on the specified collections, employing resource selection with CORI.

Parameters:
colls - collection names
topicsFile - topics file
ranksFile - file for the ranks
resultFile - file for the results
numDocs - number of documents
numDLs - number of DLs to be selected
doLog - if true, the output of the Lemur program is logged

getResultFile

public java.io.File getResultFile(java.lang.String topicsName,
                                  java.lang.String coll)
Returns the file name for the evaluation results.

Parameters:
topicsName - name of the topics (for the topic and the result file)
coll - colection name
Returns:
file name for the evaluation results

getRanksCORIFile

public java.io.File getRanksCORIFile(java.lang.String topicsName)
Returns the file name for the ranks of the CORI evaluation.

Parameters:
topicsName - name of the topics (for the topic and the result file)
Returns:
file name for the evaluation ranks

getResultCORIFile

public java.io.File getResultCORIFile(java.lang.String topicsName)
Returns the file name for the results of the CORI evaluation.

Parameters:
topicsName - name of the topics (for the topic and the result file)
Returns:
file name for the evaluation results

getDir

public java.io.File getDir()
Returns the base directory.

Returns:
base directory

getIndexCORIDir

public java.io.File getIndexCORIDir()
Returns the CORI index directory.

Returns:
directory

getIndexDir

public java.io.File getIndexDir()
Returns the index directory.

Returns:
directory

getResultDir

public java.io.File getResultDir()
Returns the result directory.

Returns:
directory

getSampleDir

public java.io.File getSampleDir()
Returns the sample directory.

Returns:
directory

getTextDir

public java.io.File getTextDir()
Returns the text directory.

Returns:
directory

getTopicDir

public java.io.File getTopicDir()
Returns the topics directory.

Returns:
directory

setDir

public void setDir(java.io.File file)
Sets the base directory.

Parameters:
file - base directory

setDir

public void setDir(java.io.File file,
                   boolean useSample)
Sets the base directory.

Parameters:
file - base directory
useSample - if true, uses samples instead of full text

setIndexCORIDir

public void setIndexCORIDir(java.io.File file)
Sets the CORI index directory

Parameters:
file - directory

setIndexDir

public void setIndexDir(java.io.File file)
Sets the index directory

Parameters:
file - directory

setResultDir

public void setResultDir(java.io.File file)
Sets the result directory

Parameters:
file - directory

setSampleDir

public void setSampleDir(java.io.File file)
Sets the sample directory

Parameters:
file - directory

setTextDir

public void setTextDir(java.io.File file)
Sets the text directory

Parameters:
file - directory

setTopicDir

public void setTopicDir(java.io.File file)
Sets the topic directory

Parameters:
file - directory