4765
// in solr/test-framework/src/java/org/apache/solr/analysis/StringMockSolrResourceLoader.java
public List<String> getLines(String resource) throws IOException {
return Arrays.asList(text.split("\n"));
}
// in solr/test-framework/src/java/org/apache/solr/analysis/StringMockSolrResourceLoader.java
public InputStream openResource(String resource) throws IOException {
return new ByteArrayInputStream(text.getBytes("UTF-8"));
}
// in solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java
protected void indexDoc(SolrInputDocument doc) throws IOException, SolrServerException {
controlClient.add(doc);
int which = (doc.getField(id).toString().hashCode() & 0x7fffffff) % clients.size();
SolrServer client = clients.get(which);
client.add(doc);
}
// in solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java
protected UpdateResponse add(SolrServer server, SolrParams params, SolrInputDocument... sdocs) throws IOException, SolrServerException {
UpdateRequest ureq = new UpdateRequest();
ureq.setParams(new ModifiableSolrParams(params));
for (SolrInputDocument sdoc : sdocs) {
ureq.add(sdoc);
}
return ureq.process(server);
}
// in solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java
protected UpdateResponse del(SolrServer server, SolrParams params, Object... ids) throws IOException, SolrServerException {
UpdateRequest ureq = new UpdateRequest();
ureq.setParams(new ModifiableSolrParams(params));
for (Object id: ids) {
ureq.deleteById(id.toString());
}
return ureq.process(server);
}
// in solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java
protected UpdateResponse delQ(SolrServer server, SolrParams params, String... queries) throws IOException, SolrServerException {
UpdateRequest ureq = new UpdateRequest();
ureq.setParams(new ModifiableSolrParams(params));
for (String q: queries) {
ureq.deleteByQuery(q);
}
return ureq.process(server);
}
// in solr/test-framework/src/java/org/apache/solr/update/processor/BufferingRequestProcessor.java
Override
public void processAdd(AddUpdateCommand cmd) throws IOException {
addCommands.add( cmd );
}
// in solr/test-framework/src/java/org/apache/solr/update/processor/BufferingRequestProcessor.java
Override
public void processDelete(DeleteUpdateCommand cmd) throws IOException {
deleteCommands.add( cmd );
}
// in solr/test-framework/src/java/org/apache/solr/update/processor/BufferingRequestProcessor.java
Override
public void processCommit(CommitUpdateCommand cmd) throws IOException {
commitCommands.add( cmd );
}
// in solr/test-framework/src/java/org/apache/solr/update/processor/BufferingRequestProcessor.java
Override
public void processRollback(RollbackUpdateCommand cmd) throws IOException
{
rollbackCommands.add( cmd );
}
// in solr/test-framework/src/java/org/apache/solr/update/processor/BufferingRequestProcessor.java
Override
public void finish() throws IOException {
// nothing?
}
// in solr/test-framework/src/java/org/apache/solr/core/MockDirectoryFactory.java
Override
protected Directory create(String path) throws IOException {
MockDirectoryWrapper dir = LuceneTestCase.newDirectory();
// Somehow removing unref'd files in Solr tests causes
// problems... there's some interaction w/
// CachingDirectoryFactory. Once we track down where Solr
// isn't closing an IW, we can re-enable this:
dir.setAssertNoUnrefencedFilesOnClose(false);
return dir;
}
// in solr/test-framework/src/java/org/apache/solr/core/MockFSDirectoryFactory.java
Override
public Directory create(String path) throws IOException {
MockDirectoryWrapper dir = LuceneTestCase.newFSDirectory(new File(path));
// Somehow removing unref'd files in Solr tests causes
// problems... there's some interaction w/
// CachingDirectoryFactory. Once we track down where Solr
// isn't closing an IW, we can re-enable this:
dir.setAssertNoUnrefencedFilesOnClose(false);
return dir;
}
// in solr/test-framework/src/java/org/apache/solr/util/TestHarness.java
public String validateQuery(SolrQueryRequest req, String... tests)
throws IOException, Exception {
String res = query(req);
return validateXPath(res, tests);
}
// in solr/test-framework/src/java/org/apache/solr/util/TestHarness.java
public String query(SolrQueryRequest req) throws IOException, Exception {
return query(req.getParams().get(CommonParams.QT), req);
}
// in solr/test-framework/src/java/org/apache/solr/util/TestHarness.java
public String query(String handler, SolrQueryRequest req) throws IOException, Exception {
try {
SolrQueryResponse rsp = new SolrQueryResponse();
SolrRequestInfo.setRequestInfo(new SolrRequestInfo(req, rsp));
core.execute(core.getRequestHandler(handler),req,rsp);
if (rsp.getException() != null) {
throw rsp.getException();
}
StringWriter sw = new StringWriter(32000);
QueryResponseWriter responseWriter = core.getQueryResponseWriter(req);
responseWriter.write(sw,req,rsp);
req.close();
return sw.toString();
} finally {
req.close();
SolrRequestInfo.clearRequestInfo();
}
}
// in solr/test-framework/src/java/org/apache/solr/util/AbstractSolrTestCase.java
public static File getFile(String name) throws IOException {
return SolrTestCaseJ4.getFile(name);
}
// in solr/solrj/src/java/org/apache/solr/common/cloud/DefaultConnectionStrategy.java
Override
public void connect(String serverAddress, int timeout, Watcher watcher, ZkUpdate updater) throws IOException, InterruptedException, TimeoutException {
updater.update(new SolrZooKeeper(serverAddress, timeout, watcher));
}
// in solr/solrj/src/java/org/apache/solr/common/cloud/DefaultConnectionStrategy.java
Override
public void reconnect(final String serverAddress, final int zkClientTimeout,
final Watcher watcher, final ZkUpdate updater) throws IOException {
log.info("Connection expired - starting a new one...");
try {
updater
.update(new SolrZooKeeper(serverAddress, zkClientTimeout, watcher));
log.info("Reconnected to ZooKeeper");
} catch (Exception e) {
SolrException.log(log, "Reconnect to ZooKeeper failed", e);
log.info("Reconnect to ZooKeeper failed");
}
}
// in solr/solrj/src/java/org/apache/solr/common/cloud/SolrZkClient.java
public void makePath(String path, File file, boolean failOnExists, boolean retryOnConnLoss)
throws IOException, KeeperException, InterruptedException {
makePath(path, FileUtils.readFileToString(file).getBytes("UTF-8"),
CreateMode.PERSISTENT, null, failOnExists, retryOnConnLoss);
}
// in solr/solrj/src/java/org/apache/solr/common/cloud/SolrZkClient.java
public void makePath(String path, File file, boolean retryOnConnLoss) throws IOException,
KeeperException, InterruptedException {
makePath(path, FileUtils.readFileToString(file).getBytes("UTF-8"), retryOnConnLoss);
}
// in solr/solrj/src/java/org/apache/solr/common/cloud/SolrZkClient.java
public void setData(String path, File file, boolean retryOnConnLoss) throws IOException,
KeeperException, InterruptedException {
if (log.isInfoEnabled()) {
log.info("Write to ZooKeepeer " + file.getAbsolutePath() + " to " + path);
}
String data = FileUtils.readFileToString(file);
setData(path, data.getBytes("UTF-8"), retryOnConnLoss);
}
// in solr/solrj/src/java/org/apache/solr/common/cloud/ConnectionManager.java
public synchronized void process(WatchedEvent event) {
if (log.isInfoEnabled()) {
log.info("Watcher " + this + " name:" + name + " got event " + event
+ " path:" + event.getPath() + " type:" + event.getType());
}
state = event.getState();
if (state == KeeperState.SyncConnected) {
connected = true;
clientConnected.countDown();
} else if (state == KeeperState.Expired) {
connected = false;
log.info("Attempting to reconnect to recover relationship with ZooKeeper...");
try {
connectionStrategy.reconnect(zkServerAddress, zkClientTimeout, this,
new ZkClientConnectionStrategy.ZkUpdate() {
@Override
public void update(SolrZooKeeper keeper)
throws InterruptedException, TimeoutException, IOException {
synchronized (connectionStrategy) {
waitForConnected(SolrZkClient.DEFAULT_CLIENT_CONNECT_TIMEOUT);
client.updateKeeper(keeper);
if (onReconnect != null) {
onReconnect.command();
}
synchronized (ConnectionManager.this) {
ConnectionManager.this.connected = true;
}
}
}
});
} catch (Exception e) {
SolrException.log(log, "", e);
}
log.info("Connected:" + connected);
}
// in solr/solrj/src/java/org/apache/solr/common/cloud/ConnectionManager.java
Override
public void update(SolrZooKeeper keeper)
throws InterruptedException, TimeoutException, IOException {
synchronized (connectionStrategy) {
waitForConnected(SolrZkClient.DEFAULT_CLIENT_CONNECT_TIMEOUT);
client.updateKeeper(keeper);
if (onReconnect != null) {
onReconnect.command();
}
synchronized (ConnectionManager.this) {
ConnectionManager.this.connected = true;
}
}
}
// in solr/solrj/src/java/org/apache/solr/common/cloud/ConnectionManager.java
public synchronized void waitForConnected(long waitForConnection)
throws InterruptedException, TimeoutException, IOException {
long expire = System.currentTimeMillis() + waitForConnection;
long left = waitForConnection;
while (!connected && left > 0) {
wait(left);
left = expire - System.currentTimeMillis();
}
if (!connected) {
throw new TimeoutException("Could not connect to ZooKeeper " + zkServerAddress + " within " + waitForConnection + " ms");
}
}
// in solr/solrj/src/java/org/apache/solr/common/util/StrUtils.java
public static void partialURLEncodeVal(Appendable dest, String val) throws IOException {
for (int i=0; i<val.length(); i++) {
char ch = val.charAt(i);
if (ch < 32) {
dest.append('%');
if (ch < 0x10) dest.append('0');
dest.append(Integer.toHexString(ch));
} else {
switch (ch) {
case ' ': dest.append('+'); break;
case '&': dest.append("%26"); break;
case '%': dest.append("%25"); break;
case '=': dest.append("%3D"); break;
case '+': dest.append("%2B"); break;
default : dest.append(ch); break;
}
}
}
}
// in solr/solrj/src/java/org/apache/solr/common/util/ContentStreamBase.java
public InputStream getStream() throws IOException {
URLConnection conn = this.url.openConnection();
contentType = conn.getContentType();
name = url.toExternalForm();
size = new Long( conn.getContentLength() );
return conn.getInputStream();
}
// in solr/solrj/src/java/org/apache/solr/common/util/ContentStreamBase.java
public InputStream getStream() throws IOException {
return new FileInputStream( file );
}
// in solr/solrj/src/java/org/apache/solr/common/util/ContentStreamBase.java
Override
public Reader getReader() throws IOException {
String charset = getCharsetFromContentType( contentType );
return charset == null
? new FileReader( file )
: new InputStreamReader( getStream(), charset );
}
// in solr/solrj/src/java/org/apache/solr/common/util/ContentStreamBase.java
public InputStream getStream() throws IOException {
return new ByteArrayInputStream( str.getBytes(DEFAULT_CHARSET) );
}
// in solr/solrj/src/java/org/apache/solr/common/util/ContentStreamBase.java
Override
public Reader getReader() throws IOException {
String charset = getCharsetFromContentType( contentType );
return charset == null
? new StringReader( str )
: new InputStreamReader( getStream(), charset );
}
// in solr/solrj/src/java/org/apache/solr/common/util/ContentStreamBase.java
public Reader getReader() throws IOException {
String charset = getCharsetFromContentType( getContentType() );
return charset == null
? new InputStreamReader( getStream(), DEFAULT_CHARSET )
: new InputStreamReader( getStream(), charset );
}
// in solr/solrj/src/java/org/apache/solr/common/util/XML.java
public static void escapeCharData(String str, Writer out) throws IOException {
escape(str, out, chardata_escapes);
}
// in solr/solrj/src/java/org/apache/solr/common/util/XML.java
public static void escapeAttributeValue(String str, Writer out) throws IOException {
escape(str, out, attribute_escapes);
}
// in solr/solrj/src/java/org/apache/solr/common/util/XML.java
public static void escapeAttributeValue(char [] chars, int start, int length, Writer out) throws IOException {
escape(chars, start, length, out, attribute_escapes);
}
// in solr/solrj/src/java/org/apache/solr/common/util/XML.java
public final static void writeXML(Writer out, String tag, String val) throws IOException {
out.write('<');
out.write(tag);
if (val == null) {
out.write('/');
out.write('>');
} else {
out.write('>');
escapeCharData(val,out);
out.write('<');
out.write('/');
out.write(tag);
out.write('>');
}
}
// in solr/solrj/src/java/org/apache/solr/common/util/XML.java
public final static void writeUnescapedXML(Writer out, String tag, String val, Object... attrs) throws IOException {
out.write('<');
out.write(tag);
for (int i=0; i<attrs.length; i++) {
out.write(' ');
out.write(attrs[i++].toString());
out.write('=');
out.write('"');
out.write(attrs[i].toString());
out.write('"');
}
if (val == null) {
out.write('/');
out.write('>');
} else {
out.write('>');
out.write(val);
out.write('<');
out.write('/');
out.write(tag);
out.write('>');
}
}
// in solr/solrj/src/java/org/apache/solr/common/util/XML.java
public final static void writeXML(Writer out, String tag, String val, Object... attrs) throws IOException {
out.write('<');
out.write(tag);
for (int i=0; i<attrs.length; i++) {
out.write(' ');
out.write(attrs[i++].toString());
out.write('=');
out.write('"');
escapeAttributeValue(attrs[i].toString(), out);
out.write('"');
}
if (val == null) {
out.write('/');
out.write('>');
} else {
out.write('>');
escapeCharData(val,out);
out.write('<');
out.write('/');
out.write(tag);
out.write('>');
}
}
// in solr/solrj/src/java/org/apache/solr/common/util/XML.java
public static void writeXML(Writer out, String tag, String val, Map<String, String> attrs) throws IOException {
out.write('<');
out.write(tag);
for (Map.Entry<String, String> entry : attrs.entrySet()) {
out.write(' ');
out.write(entry.getKey());
out.write('=');
out.write('"');
escapeAttributeValue(entry.getValue(), out);
out.write('"');
}
if (val == null) {
out.write('/');
out.write('>');
} else {
out.write('>');
escapeCharData(val,out);
out.write('<');
out.write('/');
out.write(tag);
out.write('>');
}
}
// in solr/solrj/src/java/org/apache/solr/common/util/XML.java
private static void escape(char [] chars, int offset, int length, Writer out, String [] escapes) throws IOException{
for (int i=offset; i<length; i++) {
char ch = chars[i];
if (ch<escapes.length) {
String replacement = escapes[ch];
if (replacement != null) {
out.write(replacement);
continue;
}
}
out.write(ch);
}
}
// in solr/solrj/src/java/org/apache/solr/common/util/XML.java
private static void escape(String str, Writer out, String[] escapes) throws IOException {
for (int i=0; i<str.length(); i++) {
char ch = str.charAt(i);
if (ch<escapes.length) {
String replacement = escapes[ch];
if (replacement != null) {
out.write(replacement);
continue;
}
}
out.write(ch);
}
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastInputStream.java
Override
public int read() throws IOException {
if (pos >= end) {
refill();
if (pos >= end) return -1;
}
return buf[pos++] & 0xff;
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastInputStream.java
public int peek() throws IOException {
if (pos >= end) {
refill();
if (pos >= end) return -1;
}
return buf[pos] & 0xff;
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastInputStream.java
public int readUnsignedByte() throws IOException {
if (pos >= end) {
refill();
if (pos >= end) {
throw new EOFException();
}
}
return buf[pos++] & 0xff;
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastInputStream.java
public int readWrappedStream(byte[] target, int offset, int len) throws IOException {
return in.read(target, offset, len);
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastInputStream.java
public void refill() throws IOException {
// this will set end to -1 at EOF
end = readWrappedStream(buf, 0, buf.length);
if (end > 0) readFromStream += end;
pos = 0;
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastInputStream.java
Override
public int available() throws IOException {
return end - pos;
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastInputStream.java
Override
public int read(byte b[], int off, int len) throws IOException {
int r=0; // number of bytes we have read
// first read from our buffer;
if (end-pos > 0) {
r = Math.min(end-pos, len);
System.arraycopy(buf, pos, b, off, r);
pos += r;
}
if (r == len) return r;
// amount left to read is >= buffer size
if (len-r >= buf.length) {
int ret = readWrappedStream(b, off+r, len-r);
if (ret >= 0) {
readFromStream += ret;
r += ret;
return r;
} else {
// negative return code
return r > 0 ? r : -1;
}
}
refill();
// read rest from our buffer
if (end-pos > 0) {
int toRead = Math.min(end-pos, len-r);
System.arraycopy(buf, pos, b, off+r, toRead);
pos += toRead;
r += toRead;
return r;
}
return r > 0 ? r : -1;
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastInputStream.java
Override
public void close() throws IOException {
in.close();
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastInputStream.java
public void readFully(byte b[]) throws IOException {
readFully(b, 0, b.length);
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastInputStream.java
public void readFully(byte b[], int off, int len) throws IOException {
while (len>0) {
int ret = read(b, off, len);
if (ret==-1) {
throw new EOFException();
}
off += ret;
len -= ret;
}
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastInputStream.java
public int skipBytes(int n) throws IOException {
if (end-pos >= n) {
pos += n;
return n;
}
if (end-pos<0) return -1;
int r = end-pos;
pos = end;
while (r < n) {
refill();
if (end-pos <= 0) return r;
int toRead = Math.min(end-pos, n-r);
r += toRead;
pos += toRead;
}
return r;
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastInputStream.java
public boolean readBoolean() throws IOException {
return readByte()==1;
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastInputStream.java
public byte readByte() throws IOException {
if (pos >= end) {
refill();
if (pos >= end) throw new EOFException();
}
return buf[pos++];
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastInputStream.java
public short readShort() throws IOException {
return (short)((readUnsignedByte() << 8) | readUnsignedByte());
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastInputStream.java
public int readUnsignedShort() throws IOException {
return (readUnsignedByte() << 8) | readUnsignedByte();
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastInputStream.java
public char readChar() throws IOException {
return (char)((readUnsignedByte() << 8) | readUnsignedByte());
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastInputStream.java
public int readInt() throws IOException {
return ((readUnsignedByte() << 24)
|(readUnsignedByte() << 16)
|(readUnsignedByte() << 8)
| readUnsignedByte());
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastInputStream.java
public long readLong() throws IOException {
return (((long)readUnsignedByte()) << 56)
| (((long)readUnsignedByte()) << 48)
| (((long)readUnsignedByte()) << 40)
| (((long)readUnsignedByte()) << 32)
| (((long)readUnsignedByte()) << 24)
| (readUnsignedByte() << 16)
| (readUnsignedByte() << 8)
| (readUnsignedByte());
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastInputStream.java
public float readFloat() throws IOException {
return Float.intBitsToFloat(readInt());
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastInputStream.java
public double readDouble() throws IOException {
return Double.longBitsToDouble(readLong());
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastInputStream.java
public String readLine() throws IOException {
return new DataInputStream(this).readLine();
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastInputStream.java
public String readUTF() throws IOException {
return new DataInputStream(this).readUTF();
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastOutputStream.java
Override
public void write(int b) throws IOException {
write((byte)b);
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastOutputStream.java
Override
public void write(byte b[]) throws IOException {
write(b,0,b.length);
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastOutputStream.java
public void write(byte b) throws IOException {
if (pos >= buf.length) {
out.write(buf);
written += pos;
pos=0;
}
buf[pos++] = b;
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastOutputStream.java
Override
public void write(byte arr[], int off, int len) throws IOException {
int space = buf.length - pos;
if (len < space) {
System.arraycopy(arr, off, buf, pos, len);
pos += len;
} else if (len<buf.length) {
// if the data to write is small enough, buffer it.
System.arraycopy(arr, off, buf, pos, space);
out.write(buf);
written += buf.length;
pos = len-space;
System.arraycopy(arr, off+space, buf, 0, pos);
} else {
if (pos>0) {
out.write(buf,0,pos); // flush
written += pos;
pos=0;
}
// don't buffer, just write to sink
out.write(arr, off, len);
written += len;
}
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastOutputStream.java
public void reserve(int len) throws IOException {
if (len > (buf.length - pos))
flushBuffer();
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastOutputStream.java
public void writeBoolean(boolean v) throws IOException {
write(v ? 1:0);
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastOutputStream.java
public void writeByte(int v) throws IOException {
write((byte)v);
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastOutputStream.java
public void writeShort(int v) throws IOException {
write((byte)(v >>> 8));
write((byte)v);
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastOutputStream.java
public void writeChar(int v) throws IOException {
writeShort(v);
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastOutputStream.java
public void writeInt(int v) throws IOException {
reserve(4);
buf[pos] = (byte)(v>>>24);
buf[pos+1] = (byte)(v>>>16);
buf[pos+2] = (byte)(v>>>8);
buf[pos+3] = (byte)(v);
pos+=4;
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastOutputStream.java
public void writeLong(long v) throws IOException {
reserve(8);
buf[pos] = (byte)(v>>>56);
buf[pos+1] = (byte)(v>>>48);
buf[pos+2] = (byte)(v>>>40);
buf[pos+3] = (byte)(v>>>32);
buf[pos+4] = (byte)(v>>>24);
buf[pos+5] = (byte)(v>>>16);
buf[pos+6] = (byte)(v>>>8);
buf[pos+7] = (byte)(v);
pos+=8;
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastOutputStream.java
public void writeFloat(float v) throws IOException {
writeInt(Float.floatToRawIntBits(v));
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastOutputStream.java
public void writeDouble(double v) throws IOException {
writeLong(Double.doubleToRawLongBits(v));
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastOutputStream.java
public void writeBytes(String s) throws IOException {
// non-optimized version, but this shouldn't be used anyway
for (int i=0; i<s.length(); i++)
write((byte)s.charAt(i));
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastOutputStream.java
public void writeChars(String s) throws IOException {
// non-optimized version
for (int i=0; i<s.length(); i++)
writeChar(s.charAt(i));
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastOutputStream.java
public void writeUTF(String s) throws IOException {
// non-optimized version, but this shouldn't be used anyway
DataOutputStream daos = new DataOutputStream(this);
daos.writeUTF(s);
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastOutputStream.java
Override
public void flush() throws IOException {
flushBuffer();
out.flush();
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastOutputStream.java
Override
public void close() throws IOException {
flushBuffer();
out.close();
}
// in solr/solrj/src/java/org/apache/solr/common/util/FastOutputStream.java
public void flushBuffer() throws IOException {
if (pos > 0) {
out.write(buf, 0, pos);
written += pos;
pos=0;
}
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public void marshal(Object nl, OutputStream os) throws IOException {
init(FastOutputStream.wrap(os));
try {
daos.writeByte(VERSION);
writeVal(nl);
} finally {
daos.flushBuffer();
}
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public Object unmarshal(InputStream is) throws IOException {
FastInputStream dis = FastInputStream.wrap(is);
version = dis.readByte();
if (version != VERSION) {
throw new RuntimeException("Invalid version (expected " + VERSION +
", but " + version + ") or the data in not in 'javabin' format");
}
return readVal(dis);
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public SimpleOrderedMap<Object> readOrderedMap(FastInputStream dis) throws IOException {
int sz = readSize(dis);
SimpleOrderedMap<Object> nl = new SimpleOrderedMap<Object>();
for (int i = 0; i < sz; i++) {
String name = (String) readVal(dis);
Object val = readVal(dis);
nl.add(name, val);
}
return nl;
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public NamedList<Object> readNamedList(FastInputStream dis) throws IOException {
int sz = readSize(dis);
NamedList<Object> nl = new NamedList<Object>();
for (int i = 0; i < sz; i++) {
String name = (String) readVal(dis);
Object val = readVal(dis);
nl.add(name, val);
}
return nl;
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public void writeNamedList(NamedList<?> nl) throws IOException {
writeTag(nl instanceof SimpleOrderedMap ? ORDERED_MAP : NAMED_LST, nl.size());
for (int i = 0; i < nl.size(); i++) {
String name = nl.getName(i);
writeExternString(name);
Object val = nl.getVal(i);
writeVal(val);
}
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public void writeVal(Object val) throws IOException {
if (writeKnownType(val)) {
return;
} else {
Object tmpVal = val;
if (resolver != null) {
tmpVal = resolver.resolve(val, this);
if (tmpVal == null) return; // null means the resolver took care of it fully
if (writeKnownType(tmpVal)) return;
}
}
writeVal(val.getClass().getName() + ':' + val.toString());
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public Object readVal(FastInputStream dis) throws IOException {
tagByte = dis.readByte();
// if ((tagByte & 0xe0) == 0) {
// if top 3 bits are clear, this is a normal tag
// OK, try type + size in single byte
switch (tagByte >>> 5) {
case STR >>> 5:
return readStr(dis);
case SINT >>> 5:
return readSmallInt(dis);
case SLONG >>> 5:
return readSmallLong(dis);
case ARR >>> 5:
return readArray(dis);
case ORDERED_MAP >>> 5:
return readOrderedMap(dis);
case NAMED_LST >>> 5:
return readNamedList(dis);
case EXTERN_STRING >>> 5:
return readExternString(dis);
}
switch (tagByte) {
case NULL:
return null;
case DATE:
return new Date(dis.readLong());
case INT:
return dis.readInt();
case BOOL_TRUE:
return Boolean.TRUE;
case BOOL_FALSE:
return Boolean.FALSE;
case FLOAT:
return dis.readFloat();
case DOUBLE:
return dis.readDouble();
case LONG:
return dis.readLong();
case BYTE:
return dis.readByte();
case SHORT:
return dis.readShort();
case MAP:
return readMap(dis);
case SOLRDOC:
return readSolrDocument(dis);
case SOLRDOCLST:
return readSolrDocumentList(dis);
case BYTEARR:
return readByteArray(dis);
case ITERATOR:
return readIterator(dis);
case END:
return END_OBJ;
case SOLRINPUTDOC:
return readSolrInputDocument(dis);
}
throw new RuntimeException("Unknown type " + tagByte);
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public boolean writeKnownType(Object val) throws IOException {
if (writePrimitive(val)) return true;
if (val instanceof NamedList) {
writeNamedList((NamedList<?>) val);
return true;
}
if (val instanceof SolrDocumentList) { // SolrDocumentList is a List, so must come before List check
writeSolrDocumentList((SolrDocumentList) val);
return true;
}
if (val instanceof Collection) {
writeArray((Collection) val);
return true;
}
if (val instanceof Object[]) {
writeArray((Object[]) val);
return true;
}
if (val instanceof SolrDocument) {
//this needs special treatment to know which fields are to be written
if (resolver == null) {
writeSolrDocument((SolrDocument) val);
} else {
Object retVal = resolver.resolve(val, this);
if (retVal != null) {
if (retVal instanceof SolrDocument) {
writeSolrDocument((SolrDocument) retVal);
} else {
writeVal(retVal);
}
}
}
return true;
}
if (val instanceof SolrInputDocument) {
writeSolrInputDocument((SolrInputDocument)val);
return true;
}
if (val instanceof Map) {
writeMap((Map) val);
return true;
}
if (val instanceof Iterator) {
writeIterator((Iterator) val);
return true;
}
if (val instanceof Iterable) {
writeIterator(((Iterable) val).iterator());
return true;
}
return false;
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public void writeTag(byte tag) throws IOException {
daos.writeByte(tag);
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public void writeTag(byte tag, int size) throws IOException {
if ((tag & 0xe0) != 0) {
if (size < 0x1f) {
daos.writeByte(tag | size);
} else {
daos.writeByte(tag | 0x1f);
writeVInt(size - 0x1f, daos);
}
} else {
daos.writeByte(tag);
writeVInt(size, daos);
}
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public void writeByteArray(byte[] arr, int offset, int len) throws IOException {
writeTag(BYTEARR, len);
daos.write(arr, offset, len);
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public byte[] readByteArray(FastInputStream dis) throws IOException {
byte[] arr = new byte[readVInt(dis)];
dis.readFully(arr);
return arr;
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public void writeSolrDocument(SolrDocument doc) throws IOException {
writeTag(SOLRDOC);
writeTag(ORDERED_MAP, doc.size());
for (Map.Entry<String, Object> entry : doc) {
String name = entry.getKey();
writeExternString(name);
Object val = entry.getValue();
writeVal(val);
}
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public SolrDocument readSolrDocument(FastInputStream dis) throws IOException {
NamedList nl = (NamedList) readVal(dis);
SolrDocument doc = new SolrDocument();
for (int i = 0; i < nl.size(); i++) {
String name = nl.getName(i);
Object val = nl.getVal(i);
doc.setField(name, val);
}
return doc;
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public SolrDocumentList readSolrDocumentList(FastInputStream dis) throws IOException {
SolrDocumentList solrDocs = new SolrDocumentList();
List list = (List) readVal(dis);
solrDocs.setNumFound((Long) list.get(0));
solrDocs.setStart((Long) list.get(1));
solrDocs.setMaxScore((Float) list.get(2));
@SuppressWarnings("unchecked")
List<SolrDocument> l = (List<SolrDocument>) readVal(dis);
solrDocs.addAll(l);
return solrDocs;
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public void writeSolrDocumentList(SolrDocumentList docs)
throws IOException {
writeTag(SOLRDOCLST);
List<Number> l = new ArrayList<Number>(3);
l.add(docs.getNumFound());
l.add(docs.getStart());
l.add(docs.getMaxScore());
writeArray(l);
writeArray(docs);
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public SolrInputDocument readSolrInputDocument(FastInputStream dis) throws IOException {
int sz = readVInt(dis);
float docBoost = (Float)readVal(dis);
SolrInputDocument sdoc = new SolrInputDocument();
sdoc.setDocumentBoost(docBoost);
for (int i = 0; i < sz; i++) {
float boost = 1.0f;
String fieldName;
Object boostOrFieldName = readVal(dis);
if (boostOrFieldName instanceof Float) {
boost = (Float)boostOrFieldName;
fieldName = (String)readVal(dis);
} else {
fieldName = (String)boostOrFieldName;
}
Object fieldVal = readVal(dis);
sdoc.setField(fieldName, fieldVal, boost);
}
return sdoc;
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public void writeSolrInputDocument(SolrInputDocument sdoc) throws IOException {
writeTag(SOLRINPUTDOC, sdoc.size());
writeFloat(sdoc.getDocumentBoost());
for (SolrInputField inputField : sdoc.values()) {
if (inputField.getBoost() != 1.0f) {
writeFloat(inputField.getBoost());
}
writeExternString(inputField.getName());
writeVal(inputField.getValue());
}
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public Map<Object,Object> readMap(FastInputStream dis)
throws IOException {
int sz = readVInt(dis);
Map<Object,Object> m = new LinkedHashMap<Object,Object>();
for (int i = 0; i < sz; i++) {
Object key = readVal(dis);
Object val = readVal(dis);
m.put(key, val);
}
return m;
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public void writeIterator(Iterator iter) throws IOException {
writeTag(ITERATOR);
while (iter.hasNext()) {
writeVal(iter.next());
}
writeVal(END_OBJ);
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public List<Object> readIterator(FastInputStream fis) throws IOException {
ArrayList<Object> l = new ArrayList<Object>();
while (true) {
Object o = readVal(fis);
if (o == END_OBJ) break;
l.add(o);
}
return l;
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public void writeArray(List l) throws IOException {
writeTag(ARR, l.size());
for (int i = 0; i < l.size(); i++) {
writeVal(l.get(i));
}
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public void writeArray(Collection coll) throws IOException {
writeTag(ARR, coll.size());
for (Object o : coll) {
writeVal(o);
}
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public void writeArray(Object[] arr) throws IOException {
writeTag(ARR, arr.length);
for (int i = 0; i < arr.length; i++) {
Object o = arr[i];
writeVal(o);
}
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public List<Object> readArray(FastInputStream dis) throws IOException {
int sz = readSize(dis);
ArrayList<Object> l = new ArrayList<Object>(sz);
for (int i = 0; i < sz; i++) {
l.add(readVal(dis));
}
return l;
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public void writeStr(String s) throws IOException {
if (s == null) {
writeTag(NULL);
return;
}
int end = s.length();
int maxSize = end * 4;
if (bytes == null || bytes.length < maxSize) bytes = new byte[maxSize];
int sz = ByteUtils.UTF16toUTF8(s, 0, end, bytes, 0);
writeTag(STR, sz);
daos.write(bytes, 0, sz);
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public String readStr(FastInputStream dis) throws IOException {
int sz = readSize(dis);
if (bytes == null || bytes.length < sz) bytes = new byte[sz];
dis.readFully(bytes, 0, sz);
arr.reset();
ByteUtils.UTF8toUTF16(bytes, 0, sz, arr);
return arr.toString();
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public void writeInt(int val) throws IOException {
if (val > 0) {
int b = SINT | (val & 0x0f);
if (val >= 0x0f) {
b |= 0x10;
daos.writeByte(b);
writeVInt(val >>> 4, daos);
} else {
daos.writeByte(b);
}
} else {
daos.writeByte(INT);
daos.writeInt(val);
}
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public int readSmallInt(FastInputStream dis) throws IOException {
int v = tagByte & 0x0F;
if ((tagByte & 0x10) != 0)
v = (readVInt(dis) << 4) | v;
return v;
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public void writeLong(long val) throws IOException {
if ((val & 0xff00000000000000L) == 0) {
int b = SLONG | ((int) val & 0x0f);
if (val >= 0x0f) {
b |= 0x10;
daos.writeByte(b);
writeVLong(val >>> 4, daos);
} else {
daos.writeByte(b);
}
} else {
daos.writeByte(LONG);
daos.writeLong(val);
}
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public long readSmallLong(FastInputStream dis) throws IOException {
long v = tagByte & 0x0F;
if ((tagByte & 0x10) != 0)
v = (readVLong(dis) << 4) | v;
return v;
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public void writeFloat(float val) throws IOException {
daos.writeByte(FLOAT);
daos.writeFloat(val);
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public boolean writePrimitive(Object val) throws IOException {
if (val == null) {
daos.writeByte(NULL);
return true;
} else if (val instanceof String) {
writeStr((String) val);
return true;
} else if (val instanceof Number) {
if (val instanceof Integer) {
writeInt(((Integer) val).intValue());
return true;
} else if (val instanceof Long) {
writeLong(((Long) val).longValue());
return true;
} else if (val instanceof Float) {
writeFloat(((Float) val).floatValue());
return true;
} else if (val instanceof Double) {
daos.writeByte(DOUBLE);
daos.writeDouble(((Double) val).doubleValue());
return true;
} else if (val instanceof Byte) {
daos.writeByte(BYTE);
daos.writeByte(((Byte) val).intValue());
return true;
} else if (val instanceof Short) {
daos.writeByte(SHORT);
daos.writeShort(((Short) val).intValue());
return true;
}
return false;
} else if (val instanceof Date) {
daos.writeByte(DATE);
daos.writeLong(((Date) val).getTime());
return true;
} else if (val instanceof Boolean) {
if ((Boolean) val) daos.writeByte(BOOL_TRUE);
else daos.writeByte(BOOL_FALSE);
return true;
} else if (val instanceof byte[]) {
writeByteArray((byte[]) val, 0, ((byte[]) val).length);
return true;
} else if (val instanceof ByteBuffer) {
ByteBuffer buf = (ByteBuffer) val;
writeByteArray(buf.array(),buf.position(),buf.limit() - buf.position());
return true;
} else if (val == END_OBJ) {
writeTag(END);
return true;
}
return false;
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public void writeMap(Map<?,?> val) throws IOException {
writeTag(MAP, val.size());
for (Map.Entry<?,?> entry : val.entrySet()) {
Object key = entry.getKey();
if (key instanceof String) {
writeExternString((String) key);
} else {
writeVal(key);
}
writeVal(entry.getValue());
}
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public int readSize(FastInputStream in) throws IOException {
int sz = tagByte & 0x1f;
if (sz == 0x1f) sz += readVInt(in);
return sz;
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public static void writeVInt(int i, FastOutputStream out) throws IOException {
while ((i & ~0x7F) != 0) {
out.writeByte((byte) ((i & 0x7f) | 0x80));
i >>>= 7;
}
out.writeByte((byte) i);
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public static int readVInt(FastInputStream in) throws IOException {
byte b = in.readByte();
int i = b & 0x7F;
for (int shift = 7; (b & 0x80) != 0; shift += 7) {
b = in.readByte();
i |= (b & 0x7F) << shift;
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public static void writeVLong(long i, FastOutputStream out) throws IOException {
while ((i & ~0x7F) != 0) {
out.writeByte((byte) ((i & 0x7f) | 0x80));
i >>>= 7;
}
out.writeByte((byte) i);
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public static long readVLong(FastInputStream in) throws IOException {
byte b = in.readByte();
long i = b & 0x7F;
for (int shift = 7; (b & 0x80) != 0; shift += 7) {
b = in.readByte();
i |= (long) (b & 0x7F) << shift;
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public void writeExternString(String s) throws IOException {
if (s == null) {
writeTag(NULL);
return;
}
Integer idx = stringsMap == null ? null : stringsMap.get(s);
if (idx == null) idx = 0;
writeTag(EXTERN_STRING, idx);
if (idx == 0) {
writeStr(s);
if (stringsMap == null) stringsMap = new HashMap<String, Integer>();
stringsMap.put(s, ++stringsCount);
}
}
// in solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
public String readExternString(FastInputStream fis) throws IOException {
int idx = readSize(fis);
if (idx != 0) {// idx != 0 is the index of the extern string
return stringsList.get(idx - 1);
} else {// idx == 0 means it has a string value
String s = (String) readVal(fis);
if (stringsList == null) stringsList = new ArrayList<String>();
stringsList.add(s);
return s;
}
}
// in solr/solrj/src/java/org/apache/solr/common/util/DateUtil.java
public static Calendar formatDate(Date date, Calendar cal, Appendable out) throws IOException {
// using a stringBuilder for numbers can be nice since
// a temporary string isn't used (it's added directly to the
// builder's buffer.
StringBuilder sb = out instanceof StringBuilder ? (StringBuilder)out : new StringBuilder();
if (cal==null) cal = Calendar.getInstance(TimeZone.getTimeZone("GMT"), Locale.US);
cal.setTime(date);
int i = cal.get(Calendar.YEAR);
sb.append(i);
sb.append('-');
i = cal.get(Calendar.MONTH) + 1; // 0 based, so add 1
if (i<10) sb.append('0');
sb.append(i);
sb.append('-');
i=cal.get(Calendar.DAY_OF_MONTH);
if (i<10) sb.append('0');
sb.append(i);
sb.append('T');
i=cal.get(Calendar.HOUR_OF_DAY); // 24 hour time format
if (i<10) sb.append('0');
sb.append(i);
sb.append(':');
i=cal.get(Calendar.MINUTE);
if (i<10) sb.append('0');
sb.append(i);
sb.append(':');
i=cal.get(Calendar.SECOND);
if (i<10) sb.append('0');
sb.append(i);
i=cal.get(Calendar.MILLISECOND);
if (i != 0) {
sb.append('.');
if (i<100) sb.append('0');
if (i<10) sb.append('0');
sb.append(i);
// handle canonical format specifying fractional
// seconds shall not end in '0'. Given the slowness of
// integer div/mod, simply checking the last character
// is probably the fastest way to check.
int lastIdx = sb.length()-1;
if (sb.charAt(lastIdx)=='0') {
lastIdx--;
if (sb.charAt(lastIdx)=='0') {
lastIdx--;
}
sb.setLength(lastIdx+1);
}
}
sb.append('Z');
if (out != sb)
out.append(sb);
return cal;
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/request/DirectXmlRequest.java
Override
public UpdateResponse process( SolrServer server ) throws SolrServerException, IOException
{
long startTime = System.currentTimeMillis();
UpdateResponse res = new UpdateResponse();
res.setResponse( server.request( this ) );
res.setElapsedTime( System.currentTimeMillis()-startTime );
return res;
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/request/SolrPing.java
Override
public SolrPingResponse process( SolrServer server ) throws SolrServerException, IOException
{
long startTime = System.currentTimeMillis();
SolrPingResponse res = new SolrPingResponse();
res.setResponse( server.request( this ) );
res.setElapsedTime( System.currentTimeMillis()-startTime );
return res;
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/request/AbstractUpdateRequest.java
Override
public UpdateResponse process( SolrServer server ) throws SolrServerException, IOException
{
long startTime = System.currentTimeMillis();
UpdateResponse res = new UpdateResponse();
res.setResponse( server.request( this ) );
res.setElapsedTime( System.currentTimeMillis()-startTime );
return res;
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/request/ContentStreamUpdateRequest.java
Override
public Collection<ContentStream> getContentStreams() throws IOException {
return contentStreams;
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/request/ContentStreamUpdateRequest.java
public void addFile(File file, String contentType) throws IOException {
ContentStreamBase cs = new ContentStreamBase.FileStream(file);
cs.setContentType(contentType);
addContentStream(cs);
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/request/FieldAnalysisRequest.java
Override
public Collection<ContentStream> getContentStreams() throws IOException {
return null;
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/request/FieldAnalysisRequest.java
Override
public FieldAnalysisResponse process(SolrServer server) throws SolrServerException, IOException {
if (fieldTypes == null && fieldNames == null) {
throw new IllegalStateException("At least one field type or field name need to be specified");
}
if (fieldValue == null) {
throw new IllegalStateException("The field value must be set");
}
long startTime = System.currentTimeMillis();
FieldAnalysisResponse res = new FieldAnalysisResponse();
res.setResponse(server.request(this));
res.setElapsedTime(System.currentTimeMillis() - startTime);
return res;
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/request/RequestWriter.java
public Collection<ContentStream> getContentStreams(SolrRequest req) throws IOException {
if (req instanceof UpdateRequest) {
UpdateRequest updateRequest = (UpdateRequest) req;
if (isEmpty(updateRequest)) return null;
List<ContentStream> l = new ArrayList<ContentStream>();
l.add(new LazyContentStream(updateRequest));
return l;
}
return req.getContentStreams();
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/request/RequestWriter.java
public ContentStream getContentStream(UpdateRequest req) throws IOException {
return new ContentStreamBase.StringStream(req.getXML());
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/request/RequestWriter.java
public void write(SolrRequest request, OutputStream os) throws IOException {
if (request instanceof UpdateRequest) {
UpdateRequest updateRequest = (UpdateRequest) request;
OutputStreamWriter writer = new OutputStreamWriter(os, UTF_8);
updateRequest.writeXML(writer);
writer.flush();
}
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/request/RequestWriter.java
public InputStream getStream() throws IOException {
return getDelegate().getStream();
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/request/RequestWriter.java
public Reader getReader() throws IOException {
return getDelegate().getReader();
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/request/RequestWriter.java
public void writeTo(OutputStream os) throws IOException {
write(req, os);
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/request/UpdateRequest.java
Override
public Collection<ContentStream> getContentStreams() throws IOException {
return ClientUtils.toContentStreams( getXML(), ClientUtils.TEXT_XML );
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/request/UpdateRequest.java
public String getXML() throws IOException {
StringWriter writer = new StringWriter();
writeXML( writer );
writer.flush();
// If action is COMMIT or OPTIMIZE, it is sent with params
String xml = writer.toString();
//System.out.println( "SEND:"+xml );
return (xml.length() > 0) ? xml : null;
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/request/UpdateRequest.java
public void writeXML( Writer writer ) throws IOException {
if( (documents != null && documents.size() > 0) || docIterator != null) {
if( commitWithin > 0 ) {
writer.write("<add commitWithin=\""+commitWithin+"\">");
}
else {
writer.write("<add>");
}
if(documents != null) {
for (SolrInputDocument doc : documents) {
if (doc != null) {
ClientUtils.writeXML(doc, writer);
}
}
}
if (docIterator != null) {
while (docIterator.hasNext()) {
SolrInputDocument doc = docIterator.next();
if (doc != null) {
ClientUtils.writeXML(doc, writer);
}
}
}
writer.write("</add>");
}
// Add the delete commands
boolean deleteI = deleteById != null && deleteById.size() > 0;
boolean deleteQ = deleteQuery != null && deleteQuery.size() > 0;
if( deleteI || deleteQ ) {
if(commitWithin>0) {
writer.append( "<delete commitWithin=\"" + commitWithin + "\">" );
} else {
writer.append( "<delete>" );
}
if( deleteI ) {
for( String id : deleteById ) {
writer.append( "<id>" );
XML.escapeCharData( id, writer );
writer.append( "</id>" );
}
}
if( deleteQ ) {
for( String q : deleteQuery ) {
writer.append( "<query>" );
XML.escapeCharData( q, writer );
writer.append( "</query>" );
}
}
writer.append( "</delete>" );
}
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/request/LukeRequest.java
Override
public LukeResponse process( SolrServer server ) throws SolrServerException, IOException
{
long startTime = System.currentTimeMillis();
LukeResponse res = new LukeResponse();
res.setResponse( server.request( this ) );
res.setElapsedTime( System.currentTimeMillis()-startTime );
return res;
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/request/UpdateRequestExt.java
Override
public Collection<ContentStream> getContentStreams() throws IOException {
return ClientUtils.toContentStreams(getXML(), ClientUtils.TEXT_XML);
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/request/UpdateRequestExt.java
public String getXML() throws IOException {
StringWriter writer = new StringWriter();
writeXML(writer);
writer.flush();
String xml = writer.toString();
return (xml.length() > 0) ? xml : null;
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/request/UpdateRequestExt.java
public void writeXML(Writer writer) throws IOException {
List<List<SolrDoc>> getDocLists = getDocLists(documents);
for (List<SolrDoc> docs : getDocLists) {
if ((docs != null && docs.size() > 0)) {
SolrDoc firstDoc = docs.get(0);
int commitWithin = firstDoc.commitWithin != -1 ? firstDoc.commitWithin : this.commitWithin;
boolean overwrite = firstDoc.overwrite;
if (commitWithin > -1 || overwrite != true) {
writer.write("<add commitWithin=\"" + commitWithin + "\" " + "overwrite=\"" + overwrite + "\">");
} else {
writer.write("<add>");
}
if (documents != null) {
for (SolrDoc doc : documents) {
if (doc != null) {
ClientUtils.writeXML(doc.document, writer);
}
}
}
writer.write("</add>");
}
}
// Add the delete commands
boolean deleteI = deleteById != null && deleteById.size() > 0;
boolean deleteQ = deleteQuery != null && deleteQuery.size() > 0;
if (deleteI || deleteQ) {
writer.append("<delete>");
if (deleteI) {
for (Map.Entry<String,Long> entry : deleteById.entrySet()) {
writer.append("<id");
Long version = entry.getValue();
if (version != null) {
writer.append(" version=\"" + version + "\"");
}
writer.append(">");
XML.escapeCharData(entry.getKey(), writer);
writer.append("</id>");
}
}
if (deleteQ) {
for (String q : deleteQuery) {
writer.append("<query>");
XML.escapeCharData(q, writer);
writer.append("</query>");
}
}
writer.append("</delete>");
}
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/request/DocumentAnalysisRequest.java
Override
public Collection<ContentStream> getContentStreams() throws IOException {
return ClientUtils.toContentStreams(getXML(), ClientUtils.TEXT_XML);
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/request/DocumentAnalysisRequest.java
Override
public DocumentAnalysisResponse process(SolrServer server) throws SolrServerException, IOException {
long startTime = System.currentTimeMillis();
DocumentAnalysisResponse res = new DocumentAnalysisResponse();
res.setResponse(server.request(this));
res.setElapsedTime(System.currentTimeMillis() - startTime);
return res;
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/request/DocumentAnalysisRequest.java
String getXML() throws IOException {
StringWriter writer = new StringWriter();
writer.write("<docs>");
for (SolrInputDocument document : documents) {
ClientUtils.writeXML(document, writer);
}
writer.write("</docs>");
writer.flush();
String xml = writer.toString();
return (xml.length() > 0) ? xml : null;
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/request/JavaBinUpdateRequestCodec.java
public void marshal(UpdateRequest updateRequest, OutputStream os) throws IOException {
NamedList nl = new NamedList();
NamedList params = solrParamsToNamedList(updateRequest.getParams());
if (updateRequest.getCommitWithin() != -1) {
params.add("commitWithin", updateRequest.getCommitWithin());
}
Iterator<SolrInputDocument> docIter = null;
if (updateRequest.getDocuments() != null) {
docIter = updateRequest.getDocuments().iterator();
}
if(updateRequest.getDocIterator() != null){
docIter = updateRequest.getDocIterator();
}
nl.add("params", params);// 0: params
nl.add("delById", updateRequest.getDeleteById());
nl.add("delByQ", updateRequest.getDeleteQuery());
nl.add("docs", docIter);
JavaBinCodec codec = new JavaBinCodec();
codec.marshal(nl, os);
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/request/JavaBinUpdateRequestCodec.java
public UpdateRequest unmarshal(InputStream is, final StreamingUpdateHandler handler) throws IOException {
final UpdateRequest updateRequest = new UpdateRequest();
List<List<NamedList>> doclist;
List<String> delById;
List<String> delByQ;
final NamedList[] namedList = new NamedList[1];
JavaBinCodec codec = new JavaBinCodec() {
// NOTE: this only works because this is an anonymous inner class
// which will only ever be used on a single stream -- if this class
// is ever refactored, this will not work.
private boolean seenOuterMostDocIterator = false;
@Override
public NamedList readNamedList(FastInputStream dis) throws IOException {
int sz = readSize(dis);
NamedList nl = new NamedList();
if (namedList[0] == null) {
namedList[0] = nl;
}
for (int i = 0; i < sz; i++) {
String name = (String) readVal(dis);
Object val = readVal(dis);
nl.add(name, val);
}
return nl;
}
@Override
public List readIterator(FastInputStream fis) throws IOException {
// default behavior for reading any regular Iterator in the stream
if (seenOuterMostDocIterator) return super.readIterator(fis);
// special treatment for first outermost Iterator
// (the list of documents)
seenOuterMostDocIterator = true;
return readOuterMostDocIterator(fis);
}
private List readOuterMostDocIterator(FastInputStream fis) throws IOException {
NamedList params = (NamedList) namedList[0].getVal(0);
updateRequest.setParams(new ModifiableSolrParams(SolrParams.toSolrParams(params)));
if (handler == null) return super.readIterator(fis);
while (true) {
Object o = readVal(fis);
if (o == END_OBJ) break;
SolrInputDocument sdoc = null;
if (o instanceof List) {
sdoc = listToSolrInputDocument((List<NamedList>) o);
} else if (o instanceof NamedList) {
UpdateRequest req = new UpdateRequest();
req.setParams(new ModifiableSolrParams(SolrParams.toSolrParams((NamedList) o)));
handler.update(null, req);
} else {
sdoc = (SolrInputDocument) o;
}
handler.update(sdoc, updateRequest);
}
return Collections.EMPTY_LIST;
}
};
codec.unmarshal(is);
// NOTE: if the update request contains only delete commands the params
// must be loaded now
if(updateRequest.getParams()==null) {
NamedList params = (NamedList) namedList[0].get("params");
if(params!=null) {
updateRequest.setParams(new ModifiableSolrParams(SolrParams.toSolrParams(params)));
}
}
delById = (List<String>) namedList[0].get("delById");
delByQ = (List<String>) namedList[0].get("delByQ");
doclist = (List) namedList[0].get("docs");
if (doclist != null && !doclist.isEmpty()) {
List<SolrInputDocument> solrInputDocs = new ArrayList<SolrInputDocument>();
for (Object o : doclist) {
if (o instanceof List) {
solrInputDocs.add(listToSolrInputDocument((List<NamedList>)o));
} else {
solrInputDocs.add((SolrInputDocument)o);
}
}
updateRequest.add(solrInputDocs);
}
if (delById != null) {
for (String s : delById) {
updateRequest.deleteById(s);
}
}
if (delByQ != null) {
for (String s : delByQ) {
updateRequest.deleteByQuery(s);
}
}
return updateRequest;
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/request/JavaBinUpdateRequestCodec.java
Override
public NamedList readNamedList(FastInputStream dis) throws IOException {
int sz = readSize(dis);
NamedList nl = new NamedList();
if (namedList[0] == null) {
namedList[0] = nl;
}
for (int i = 0; i < sz; i++) {
String name = (String) readVal(dis);
Object val = readVal(dis);
nl.add(name, val);
}
return nl;
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/request/JavaBinUpdateRequestCodec.java
Override
public List readIterator(FastInputStream fis) throws IOException {
// default behavior for reading any regular Iterator in the stream
if (seenOuterMostDocIterator) return super.readIterator(fis);
// special treatment for first outermost Iterator
// (the list of documents)
seenOuterMostDocIterator = true;
return readOuterMostDocIterator(fis);
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/request/JavaBinUpdateRequestCodec.java
private List readOuterMostDocIterator(FastInputStream fis) throws IOException {
NamedList params = (NamedList) namedList[0].getVal(0);
updateRequest.setParams(new ModifiableSolrParams(SolrParams.toSolrParams(params)));
if (handler == null) return super.readIterator(fis);
while (true) {
Object o = readVal(fis);
if (o == END_OBJ) break;
SolrInputDocument sdoc = null;
if (o instanceof List) {
sdoc = listToSolrInputDocument((List<NamedList>) o);
} else if (o instanceof NamedList) {
UpdateRequest req = new UpdateRequest();
req.setParams(new ModifiableSolrParams(SolrParams.toSolrParams((NamedList) o)));
handler.update(null, req);
} else {
sdoc = (SolrInputDocument) o;
}
handler.update(sdoc, updateRequest);
}
return Collections.EMPTY_LIST;
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/request/CoreAdminRequest.java
Override
public Collection<ContentStream> getContentStreams() throws IOException {
return null;
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/request/CoreAdminRequest.java
Override
public CoreAdminResponse process(SolrServer server) throws SolrServerException, IOException
{
long startTime = System.currentTimeMillis();
CoreAdminResponse res = new CoreAdminResponse();
res.setResponse( server.request( this ) );
res.setElapsedTime( System.currentTimeMillis()-startTime );
return res;
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/request/CoreAdminRequest.java
public static CoreAdminResponse reloadCore( String name, SolrServer server ) throws SolrServerException, IOException
{
CoreAdminRequest req = new CoreAdminRequest();
req.setCoreName( name );
req.setAction( CoreAdminAction.RELOAD );
return req.process( server );
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/request/CoreAdminRequest.java
public static CoreAdminResponse unloadCore( String name, SolrServer server ) throws SolrServerException, IOException
{
return unloadCore(name, false, server);
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/request/CoreAdminRequest.java
public static CoreAdminResponse unloadCore( String name, boolean deleteIndex, SolrServer server ) throws SolrServerException, IOException
{
Unload req = new Unload(deleteIndex);
req.setCoreName( name );
return req.process( server );
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/request/CoreAdminRequest.java
public static CoreAdminResponse renameCore(String coreName, String newName, SolrServer server ) throws SolrServerException, IOException
{
CoreAdminRequest req = new CoreAdminRequest();
req.setCoreName(coreName);
req.setOtherCoreName(newName);
req.setAction( CoreAdminAction.RENAME );
return req.process( server );
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/request/CoreAdminRequest.java
public static CoreAdminResponse getStatus( String name, SolrServer server ) throws SolrServerException, IOException
{
CoreAdminRequest req = new CoreAdminRequest();
req.setCoreName( name );
req.setAction( CoreAdminAction.STATUS );
return req.process( server );
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/request/CoreAdminRequest.java
public static CoreAdminResponse createCore( String name, String instanceDir, SolrServer server ) throws SolrServerException, IOException
{
return CoreAdminRequest.createCore(name, instanceDir, server, null, null);
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/request/CoreAdminRequest.java
public static CoreAdminResponse createCore( String name, String instanceDir, SolrServer server, String configFile, String schemaFile ) throws SolrServerException, IOException
{
CoreAdminRequest.Create req = new CoreAdminRequest.Create();
req.setCoreName( name );
req.setInstanceDir(instanceDir);
if(configFile != null){
req.setConfigName(configFile);
}
if(schemaFile != null){
req.setSchemaName(schemaFile);
}
return req.process( server );
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/request/CoreAdminRequest.java
public static CoreAdminResponse persist(String fileName, SolrServer server) throws SolrServerException, IOException
{
CoreAdminRequest.Persist req = new CoreAdminRequest.Persist();
req.setFileName(fileName);
return req.process(server);
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/request/CoreAdminRequest.java
public static CoreAdminResponse mergeIndexes(String name,
String[] indexDirs, String[] srcCores, SolrServer server) throws SolrServerException,
IOException {
CoreAdminRequest.MergeIndexes req = new CoreAdminRequest.MergeIndexes();
req.setCoreName(name);
req.setIndexDirs(Arrays.asList(indexDirs));
req.setSrcCores(Arrays.asList(srcCores));
return req.process(server);
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/impl/ConcurrentUpdateSolrServer.java
public void run() {
runnerLock.lock();
// info is ok since this should only happen once for each thread
log.info("starting runner: {}", this);
HttpPost method = null;
HttpResponse response = null;
try {
while (!queue.isEmpty()) {
try {
final UpdateRequest updateRequest = queue.poll(250,
TimeUnit.MILLISECONDS);
if (updateRequest == null)
break;
String contentType = server.requestWriter.getUpdateContentType();
final boolean isXml = ClientUtils.TEXT_XML.equals(contentType);
final ModifiableSolrParams origParams = new ModifiableSolrParams(updateRequest.getParams());
EntityTemplate template = new EntityTemplate(new ContentProducer() {
public void writeTo(OutputStream out) throws IOException {
try {
if (isXml) {
out.write("<stream>".getBytes("UTF-8")); // can be anything
}
UpdateRequest req = updateRequest;
while (req != null) {
SolrParams currentParams = new ModifiableSolrParams(req.getParams());
if (!origParams.toNamedList().equals(currentParams.toNamedList())) {
queue.add(req); // params are different, push back to queue
break;
}
server.requestWriter.write(req, out);
if (isXml) {
// check for commit or optimize
SolrParams params = req.getParams();
if (params != null) {
String fmt = null;
if (params.getBool(UpdateParams.OPTIMIZE, false)) {
fmt = "<optimize waitSearcher=\"%s\" waitFlush=\"%s\" />";
} else if (params.getBool(UpdateParams.COMMIT, false)) {
fmt = "<commit waitSearcher=\"%s\" waitFlush=\"%s\" />";
}
if (fmt != null) {
byte[] content = String.format(
fmt,
params.getBool(UpdateParams.WAIT_SEARCHER, false)
+ "").getBytes("UTF-8");
out.write(content);
}
}
}
out.flush();
req = queue.poll(250, TimeUnit.MILLISECONDS);
}
if (isXml) {
out.write("</stream>".getBytes("UTF-8"));
}
} catch (InterruptedException e) {
e.printStackTrace();
}
}
});
// The parser 'wt=' and 'version=' params are used instead of the
// original params
ModifiableSolrParams requestParams = new ModifiableSolrParams(origParams);
requestParams.set(CommonParams.WT, server.parser.getWriterType());
requestParams.set(CommonParams.VERSION, server.parser.getVersion());
method = new HttpPost(server.getBaseURL() + "/update"
+ ClientUtils.toQueryString(requestParams, false));
method.setEntity(template);
method.addHeader("User-Agent", HttpSolrServer.AGENT);
method.addHeader("Content-Type", contentType);
response = server.getHttpClient().execute(method);
int statusCode = response.getStatusLine().getStatusCode();
log.info("Status for: "
+ updateRequest.getDocuments().get(0).getFieldValue("id")
+ " is " + statusCode);
if (statusCode != HttpStatus.SC_OK) {
StringBuilder msg = new StringBuilder();
msg.append(response.getStatusLine().getReasonPhrase());
msg.append("\n\n");
msg.append("\n\n");
msg.append("request: ").append(method.getURI());
handleError(new Exception(msg.toString()));
}
} finally {
try {
if (response != null) {
response.getEntity().getContent().close();
}
} catch (Exception ex) {
}
}
}
} catch (Throwable e) {
handleError(e);
} finally {
// remove it from the list of running things unless we are the last
// runner and the queue is full...
// in which case, the next queue.put() would block and there would be no
// runners to handle it.
// This case has been further handled by using offer instead of put, and
// using a retry loop
// to avoid blocking forever (see request()).
synchronized (runners) {
if (runners.size() == 1 && queue.remainingCapacity() == 0) {
// keep this runner alive
scheduler.execute(this);
} else {
runners.remove(this);
}
}
log.info("finished: {}", this);
runnerLock.unlock();
}
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/impl/ConcurrentUpdateSolrServer.java
public void writeTo(OutputStream out) throws IOException {
try {
if (isXml) {
out.write("<stream>".getBytes("UTF-8")); // can be anything
}
UpdateRequest req = updateRequest;
while (req != null) {
SolrParams currentParams = new ModifiableSolrParams(req.getParams());
if (!origParams.toNamedList().equals(currentParams.toNamedList())) {
queue.add(req); // params are different, push back to queue
break;
}
server.requestWriter.write(req, out);
if (isXml) {
// check for commit or optimize
SolrParams params = req.getParams();
if (params != null) {
String fmt = null;
if (params.getBool(UpdateParams.OPTIMIZE, false)) {
fmt = "<optimize waitSearcher=\"%s\" waitFlush=\"%s\" />";
} else if (params.getBool(UpdateParams.COMMIT, false)) {
fmt = "<commit waitSearcher=\"%s\" waitFlush=\"%s\" />";
}
if (fmt != null) {
byte[] content = String.format(
fmt,
params.getBool(UpdateParams.WAIT_SEARCHER, false)
+ "").getBytes("UTF-8");
out.write(content);
}
}
}
out.flush();
req = queue.poll(250, TimeUnit.MILLISECONDS);
}
if (isXml) {
out.write("</stream>".getBytes("UTF-8"));
}
} catch (InterruptedException e) {
e.printStackTrace();
}
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/impl/ConcurrentUpdateSolrServer.java
public NamedList<Object> request(final SolrRequest request)
throws SolrServerException, IOException {
if (!(request instanceof UpdateRequest)) {
return server.request(request);
}
UpdateRequest req = (UpdateRequest) request;
// this happens for commit...
if (req.getDocuments() == null || req.getDocuments().isEmpty()) {
blockUntilFinished();
return server.request(request);
}
SolrParams params = req.getParams();
if (params != null) {
// check if it is waiting for the searcher
if (params.getBool(UpdateParams.WAIT_SEARCHER, false)) {
log.info("blocking for commit/optimize");
blockUntilFinished(); // empty the queue
return server.request(request);
}
}
try {
CountDownLatch tmpLock = lock;
if (tmpLock != null) {
tmpLock.await();
}
boolean success = queue.offer(req);
for (;;) {
synchronized (runners) {
if (runners.isEmpty() || (queue.remainingCapacity() < queue.size() // queue
// is
// half
// full
// and
// we
// can
// add
// more
// runners
&& runners.size() < threadCount)) {
// We need more runners, so start a new one.
Runner r = new Runner();
runners.add(r);
scheduler.execute(r);
} else {
// break out of the retry loop if we added the element to the queue
// successfully, *and*
// while we are still holding the runners lock to prevent race
// conditions.
// race conditions.
if (success)
break;
}
}
// Retry to add to the queue w/o the runners lock held (else we risk
// temporary deadlock)
// This retry could also fail because
// 1) existing runners were not able to take off any new elements in the
// queue
// 2) the queue was filled back up since our last try
// If we succeed, the queue may have been completely emptied, and all
// runners stopped.
// In all cases, we should loop back to the top to see if we need to
// start more runners.
//
if (!success) {
success = queue.offer(req, 100, TimeUnit.MILLISECONDS);
}
}
} catch (InterruptedException e) {
log.error("interrupted", e);
throw new IOException(e.getLocalizedMessage());
}
// RETURN A DUMMY result
NamedList<Object> dummy = new NamedList<Object>();
dummy.add("NOTE", "the request is processed in a background stream");
return dummy;
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/impl/BinaryRequestWriter.java
Override
public Collection<ContentStream> getContentStreams(SolrRequest req) throws IOException {
if (req instanceof UpdateRequest) {
UpdateRequest updateRequest = (UpdateRequest) req;
if (isNull(updateRequest.getDocuments()) &&
isNull(updateRequest.getDeleteById()) &&
isNull(updateRequest.getDeleteQuery())
&& (updateRequest.getDocIterator() == null) ) {
return null;
}
List<ContentStream> l = new ArrayList<ContentStream>();
l.add(new LazyContentStream(updateRequest));
return l;
} else {
return super.getContentStreams(req);
}
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/impl/BinaryRequestWriter.java
Override
public ContentStream getContentStream(final UpdateRequest request) throws IOException {
final BAOS baos = new BAOS();
new JavaBinUpdateRequestCodec().marshal(request, baos);
return new ContentStream() {
public String getName() {
return null;
}
public String getSourceInfo() {
return "javabin";
}
public String getContentType() {
return "application/javabin";
}
public Long getSize() // size if we know it, otherwise null
{
return new Long(baos.size());
}
public InputStream getStream() throws IOException {
return new ByteArrayInputStream(baos.getbuf(), 0, baos.size());
}
public Reader getReader() throws IOException {
throw new RuntimeException("No reader available . this is a binarystream");
}
};
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/impl/BinaryRequestWriter.java
public InputStream getStream() throws IOException {
return new ByteArrayInputStream(baos.getbuf(), 0, baos.size());
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/impl/BinaryRequestWriter.java
public Reader getReader() throws IOException {
throw new RuntimeException("No reader available . this is a binarystream");
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/impl/BinaryRequestWriter.java
Override
public void write(SolrRequest request, OutputStream os) throws IOException {
if (request instanceof UpdateRequest) {
UpdateRequest updateRequest = (UpdateRequest) request;
new JavaBinUpdateRequestCodec().marshal(updateRequest, os);
}
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrServer.java
Override
public NamedList<Object> request(SolrRequest request) throws SolrServerException, IOException {
connect();
// TODO: if you can hash here, you could favor the shard leader
CloudState cloudState = zkStateReader.getCloudState();
SolrParams reqParams = request.getParams();
if (reqParams == null) {
reqParams = new ModifiableSolrParams();
}
String collection = reqParams.get("collection", defaultCollection);
if (collection == null) {
throw new SolrServerException("No collection param specified on request and no default collection has been set.");
}
// Extract each comma separated collection name and store in a List.
List<String> collectionList = StrUtils.splitSmart(collection, ",", true);
// Retrieve slices from the cloud state and, for each collection specified,
// add it to the Map of slices.
Map<String,Slice> slices = new HashMap<String,Slice>();
for (int i = 0; i < collectionList.size(); i++) {
String coll= collectionList.get(i);
ClientUtils.appendMap(coll, slices, cloudState.getSlices(coll));
}
Set<String> liveNodes = cloudState.getLiveNodes();
// IDEA: have versions on various things... like a global cloudState version
// or shardAddressVersion (which only changes when the shards change)
// to allow caching.
// build a map of unique nodes
// TODO: allow filtering by group, role, etc
Map<String,ZkNodeProps> nodes = new HashMap<String,ZkNodeProps>();
List<String> urlList = new ArrayList<String>();
for (Slice slice : slices.values()) {
for (ZkNodeProps nodeProps : slice.getShards().values()) {
ZkCoreNodeProps coreNodeProps = new ZkCoreNodeProps(nodeProps);
String node = coreNodeProps.getNodeName();
if (!liveNodes.contains(coreNodeProps.getNodeName())
|| !coreNodeProps.getState().equals(
ZkStateReader.ACTIVE)) continue;
if (nodes.put(node, nodeProps) == null) {
String url = coreNodeProps.getCoreUrl();
urlList.add(url);
}
}
}
Collections.shuffle(urlList, rand);
//System.out.println("########################## MAKING REQUEST TO " + urlList);
LBHttpSolrServer.Req req = new LBHttpSolrServer.Req(request, urlList);
LBHttpSolrServer.Rsp rsp = lbServer.request(req);
return rsp.getResponse();
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/impl/StreamingBinaryResponseParser.java
Override
public NamedList<Object> processResponse(InputStream body, String encoding) {
try {
JavaBinCodec codec = new JavaBinCodec() {
@Override
public SolrDocument readSolrDocument(FastInputStream dis) throws IOException {
SolrDocument doc = super.readSolrDocument(dis);
callback.streamSolrDocument( doc );
return null;
}
@Override
public SolrDocumentList readSolrDocumentList(FastInputStream dis) throws IOException {
SolrDocumentList solrDocs = new SolrDocumentList();
List list = (List) readVal(dis);
solrDocs.setNumFound((Long) list.get(0));
solrDocs.setStart((Long) list.get(1));
solrDocs.setMaxScore((Float) list.get(2));
callback.streamDocListInfo(
solrDocs.getNumFound(),
solrDocs.getStart(),
solrDocs.getMaxScore() );
// Read the Array
tagByte = dis.readByte();
if( (tagByte >>> 5) != (ARR >>> 5) ) {
throw new RuntimeException( "doclist must have an array" );
}
int sz = readSize(dis);
for (int i = 0; i < sz; i++) {
// must be a SolrDocument
readVal( dis );
}
return solrDocs;
}
};
return (NamedList<Object>) codec.unmarshal(body);
}
catch (IOException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "parsing error", e);
}
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/impl/StreamingBinaryResponseParser.java
Override
public SolrDocument readSolrDocument(FastInputStream dis) throws IOException {
SolrDocument doc = super.readSolrDocument(dis);
callback.streamSolrDocument( doc );
return null;
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/impl/StreamingBinaryResponseParser.java
Override
public SolrDocumentList readSolrDocumentList(FastInputStream dis) throws IOException {
SolrDocumentList solrDocs = new SolrDocumentList();
List list = (List) readVal(dis);
solrDocs.setNumFound((Long) list.get(0));
solrDocs.setStart((Long) list.get(1));
solrDocs.setMaxScore((Float) list.get(2));
callback.streamDocListInfo(
solrDocs.getNumFound(),
solrDocs.getStart(),
solrDocs.getMaxScore() );
// Read the Array
tagByte = dis.readByte();
if( (tagByte >>> 5) != (ARR >>> 5) ) {
throw new RuntimeException( "doclist must have an array" );
}
int sz = readSize(dis);
for (int i = 0; i < sz; i++) {
// must be a SolrDocument
readVal( dis );
}
return solrDocs;
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/impl/HttpClientUtil.java
Override
public void process(HttpRequest request, HttpContext context)
throws HttpException, IOException {
if (!request.containsHeader("Accept-Encoding")) {
request.addHeader("Accept-Encoding", "gzip, deflate");
}
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/impl/HttpClientUtil.java
public void process(final HttpResponse response, final HttpContext context)
throws HttpException, IOException {
HttpEntity entity = response.getEntity();
Header ceheader = entity.getContentEncoding();
if (ceheader != null) {
HeaderElement[] codecs = ceheader.getElements();
for (int i = 0; i < codecs.length; i++) {
if (codecs[i].getName().equalsIgnoreCase("gzip")) {
response
.setEntity(new GzipDecompressingEntity(response.getEntity()));
return;
}
if (codecs[i].getName().equalsIgnoreCase("deflate")) {
response.setEntity(new DeflateDecompressingEntity(response
.getEntity()));
return;
}
}
}
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/impl/HttpClientUtil.java
public InputStream getContent() throws IOException, IllegalStateException {
return new GZIPInputStream(wrappedEntity.getContent());
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/impl/HttpClientUtil.java
public InputStream getContent() throws IOException, IllegalStateException {
return new InflaterInputStream(wrappedEntity.getContent());
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/impl/HttpSolrServer.java
Override
public NamedList<Object> request(final SolrRequest request)
throws SolrServerException, IOException {
ResponseParser responseParser = request.getResponseParser();
if (responseParser == null) {
responseParser = parser;
}
return request(request, responseParser);
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/impl/HttpSolrServer.java
public NamedList<Object> request(final SolrRequest request,
final ResponseParser processor) throws SolrServerException, IOException {
HttpRequestBase method = null;
InputStream is = null;
SolrParams params = request.getParams();
Collection<ContentStream> streams = requestWriter.getContentStreams(request);
String path = requestWriter.getPath(request);
if (path == null || !path.startsWith("/")) {
path = DEFAULT_PATH;
}
ResponseParser parser = request.getResponseParser();
if (parser == null) {
parser = this.parser;
}
// The parser 'wt=' and 'version=' params are used instead of the original
// params
ModifiableSolrParams wparams = new ModifiableSolrParams(params);
wparams.set(CommonParams.WT, parser.getWriterType());
wparams.set(CommonParams.VERSION, parser.getVersion());
if (invariantParams != null) {
wparams.add(invariantParams);
}
params = wparams;
int tries = maxRetries + 1;
try {
while( tries-- > 0 ) {
// Note: since we aren't do intermittent time keeping
// ourselves, the potential non-timeout latency could be as
// much as tries-times (plus scheduling effects) the given
// timeAllowed.
try {
if( SolrRequest.METHOD.GET == request.getMethod() ) {
if( streams != null ) {
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "GET can't send streams!" );
}
method = new HttpGet( baseUrl + path + ClientUtils.toQueryString( params, false ) );
}
else if( SolrRequest.METHOD.POST == request.getMethod() ) {
String url = baseUrl + path;
boolean isMultipart = ( streams != null && streams.size() > 1 );
LinkedList<NameValuePair> postParams = new LinkedList<NameValuePair>();
if (streams == null || isMultipart) {
HttpPost post = new HttpPost(url);
post.setHeader("Content-Charset", "UTF-8");
if (!this.useMultiPartPost && !isMultipart) {
post.addHeader("Content-Type",
"application/x-www-form-urlencoded; charset=UTF-8");
}
List<FormBodyPart> parts = new LinkedList<FormBodyPart>();
Iterator<String> iter = params.getParameterNamesIterator();
while (iter.hasNext()) {
String p = iter.next();
String[] vals = params.getParams(p);
if (vals != null) {
for (String v : vals) {
if (this.useMultiPartPost || isMultipart) {
parts.add(new FormBodyPart(p, new StringBody(v, Charset.forName("UTF-8"))));
} else {
postParams.add(new BasicNameValuePair(p, v));
}
}
}
}
if (isMultipart) {
for (ContentStream content : streams) {
String contentType = content.getContentType();
if(contentType==null) {
contentType = "application/octet-stream"; // default
}
parts.add(new FormBodyPart(content.getName(),
new InputStreamBody(
content.getStream(),
contentType,
content.getName())));
}
}
if (parts.size() > 0) {
MultipartEntity entity = new MultipartEntity(HttpMultipartMode.STRICT);
for(FormBodyPart p: parts) {
entity.addPart(p);
}
post.setEntity(entity);
} else {
//not using multipart
post.setEntity(new UrlEncodedFormEntity(postParams, "UTF-8"));
}
method = post;
}
// It is has one stream, it is the post body, put the params in the URL
else {
String pstr = ClientUtils.toQueryString(params, false);
HttpPost post = new HttpPost(url + pstr);
// Single stream as body
// Using a loop just to get the first one
final ContentStream[] contentStream = new ContentStream[1];
for (ContentStream content : streams) {
contentStream[0] = content;
break;
}
if (contentStream[0] instanceof RequestWriter.LazyContentStream) {
post.setEntity(new InputStreamEntity(contentStream[0].getStream(), -1) {
@Override
public Header getContentType() {
return new BasicHeader("Content-Type", contentStream[0].getContentType());
}
@Override
public boolean isRepeatable() {
return false;
}
});
} else {
post.setEntity(new InputStreamEntity(contentStream[0].getStream(), -1) {
@Override
public Header getContentType() {
return new BasicHeader("Content-Type", contentStream[0].getContentType());
}
@Override
public boolean isRepeatable() {
return false;
}
});
}
method = post;
}
}
else {
throw new SolrServerException("Unsupported method: "+request.getMethod() );
}
}
catch( NoHttpResponseException r ) {
method = null;
if(is != null) {
is.close();
}
// If out of tries then just rethrow (as normal error).
if (tries < 1) {
throw r;
}
}
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/impl/HttpSolrServer.java
public UpdateResponse add(Iterator<SolrInputDocument> docIterator)
throws SolrServerException, IOException {
UpdateRequest req = new UpdateRequest();
req.setDocIterator(docIterator);
return req.process(this);
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/impl/HttpSolrServer.java
public UpdateResponse addBeans(final Iterator<?> beanIterator)
throws SolrServerException, IOException {
UpdateRequest req = new UpdateRequest();
req.setDocIterator(new Iterator<SolrInputDocument>() {
public boolean hasNext() {
return beanIterator.hasNext();
}
public SolrInputDocument next() {
Object o = beanIterator.next();
if (o == null) return null;
return getBinder().toSolrInputDocument(o);
}
public void remove() {
beanIterator.remove();
}
});
return req.process(this);
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBHttpSolrServer.java
public Rsp request(Req req) throws SolrServerException, IOException {
Rsp rsp = new Rsp();
Exception ex = null;
List<ServerWrapper> skipped = new ArrayList<ServerWrapper>(req.getNumDeadServersToTry());
for (String serverStr : req.getServers()) {
serverStr = normalize(serverStr);
// if the server is currently a zombie, just skip to the next one
ServerWrapper wrapper = zombieServers.get(serverStr);
if (wrapper != null) {
// System.out.println("ZOMBIE SERVER QUERIED: " + serverStr);
if (skipped.size() < req.getNumDeadServersToTry())
skipped.add(wrapper);
continue;
}
rsp.server = serverStr;
HttpSolrServer server = makeServer(serverStr);
try {
rsp.rsp = server.request(req.getRequest());
return rsp; // SUCCESS
} catch (SolrException e) {
// we retry on 404 or 403 or 503 - you can see this on solr shutdown
if (e.code() == 404 || e.code() == 403 || e.code() == 503 || e.code() == 500) {
ex = addZombie(server, e);
} else {
// Server is alive but the request was likely malformed or invalid
throw e;
}
// TODO: consider using below above - currently does cause a problem with distrib updates:
// seems to match up against a failed forward to leader exception as well...
// || e.getMessage().contains("java.net.SocketException")
// || e.getMessage().contains("java.net.ConnectException")
} catch (SocketException e) {
ex = addZombie(server, e);
} catch (SocketTimeoutException e) {
ex = addZombie(server, e);
} catch (SolrServerException e) {
Throwable rootCause = e.getRootCause();
if (rootCause instanceof IOException) {
ex = addZombie(server, e);
} else {
throw e;
}
} catch (Exception e) {
throw new SolrServerException(e);
}
}
// try the servers we previously skipped
for (ServerWrapper wrapper : skipped) {
try {
rsp.rsp = wrapper.solrServer.request(req.getRequest());
zombieServers.remove(wrapper.getKey());
return rsp; // SUCCESS
} catch (SolrException e) {
// we retry on 404 or 403 or 503 - you can see this on solr shutdown
if (e.code() == 404 || e.code() == 403 || e.code() == 503 || e.code() == 500) {
ex = e;
// already a zombie, no need to re-add
} else {
// Server is alive but the request was malformed or invalid
zombieServers.remove(wrapper.getKey());
throw e;
}
} catch (SocketException e) {
ex = e;
} catch (SocketTimeoutException e) {
ex = e;
} catch (SolrServerException e) {
Throwable rootCause = e.getRootCause();
if (rootCause instanceof IOException) {
ex = e;
// already a zombie, no need to re-add
} else {
throw e;
}
} catch (Exception e) {
throw new SolrServerException(e);
}
}
if (ex == null) {
throw new SolrServerException("No live SolrServers available to handle this request");
} else {
throw new SolrServerException("No live SolrServers available to handle this request:" + zombieServers.keySet(), ex);
}
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBHttpSolrServer.java
Override
public NamedList<Object> request(final SolrRequest request)
throws SolrServerException, IOException {
Exception ex = null;
ServerWrapper[] serverList = aliveServerList;
int maxTries = serverList.length;
Map<String,ServerWrapper> justFailed = null;
for (int attempts=0; attempts<maxTries; attempts++) {
int count = counter.incrementAndGet();
ServerWrapper wrapper = serverList[count % serverList.length];
wrapper.lastUsed = System.currentTimeMillis();
try {
return wrapper.solrServer.request(request);
} catch (SolrException e) {
// Server is alive but the request was malformed or invalid
throw e;
} catch (SolrServerException e) {
if (e.getRootCause() instanceof IOException) {
ex = e;
moveAliveToDead(wrapper);
if (justFailed == null) justFailed = new HashMap<String,ServerWrapper>();
justFailed.put(wrapper.getKey(), wrapper);
} else {
throw e;
}
} catch (Exception e) {
throw new SolrServerException(e);
}
}
// try other standard servers that we didn't try just now
for (ServerWrapper wrapper : zombieServers.values()) {
if (wrapper.standard==false || justFailed!=null && justFailed.containsKey(wrapper.getKey())) continue;
try {
NamedList<Object> rsp = wrapper.solrServer.request(request);
// remove from zombie list *before* adding to alive to avoid a race that could lose a server
zombieServers.remove(wrapper.getKey());
addToAlive(wrapper);
return rsp;
} catch (SolrException e) {
// Server is alive but the request was malformed or invalid
throw e;
} catch (SolrServerException e) {
if (e.getRootCause() instanceof IOException) {
ex = e;
// still dead
} else {
throw e;
}
} catch (Exception e) {
throw new SolrServerException(e);
}
}
if (ex == null) {
throw new SolrServerException("No live SolrServers available to handle this request");
} else {
throw new SolrServerException("No live SolrServers available to handle this request", ex);
}
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/SolrServer.java
public UpdateResponse add(Collection<SolrInputDocument> docs) throws SolrServerException, IOException {
return add(docs, -1);
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/SolrServer.java
public UpdateResponse add(Collection<SolrInputDocument> docs, int commitWithinMs) throws SolrServerException, IOException {
UpdateRequest req = new UpdateRequest();
req.add(docs);
req.setCommitWithin(commitWithinMs);
return req.process(this);
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/SolrServer.java
public UpdateResponse addBeans(Collection<?> beans ) throws SolrServerException, IOException {
return addBeans(beans, -1);
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/SolrServer.java
public UpdateResponse addBeans(Collection<?> beans, int commitWithinMs) throws SolrServerException, IOException {
DocumentObjectBinder binder = this.getBinder();
ArrayList<SolrInputDocument> docs = new ArrayList<SolrInputDocument>(beans.size());
for (Object bean : beans) {
docs.add(binder.toSolrInputDocument(bean));
}
return add(docs, commitWithinMs);
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/SolrServer.java
public UpdateResponse add(SolrInputDocument doc ) throws SolrServerException, IOException {
return add(doc, -1);
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/SolrServer.java
public UpdateResponse add(SolrInputDocument doc, int commitWithinMs) throws SolrServerException, IOException {
UpdateRequest req = new UpdateRequest();
req.add(doc);
req.setCommitWithin(commitWithinMs);
return req.process(this);
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/SolrServer.java
public UpdateResponse addBean(Object obj) throws IOException, SolrServerException {
return addBean(obj, -1);
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/SolrServer.java
public UpdateResponse addBean(Object obj, int commitWithinMs) throws IOException, SolrServerException {
return add(getBinder().toSolrInputDocument(obj),commitWithinMs);
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/SolrServer.java
public UpdateResponse commit( ) throws SolrServerException, IOException {
return commit(true, true);
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/SolrServer.java
public UpdateResponse optimize( ) throws SolrServerException, IOException {
return optimize(true, true, 1);
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/SolrServer.java
public UpdateResponse commit( boolean waitFlush, boolean waitSearcher ) throws SolrServerException, IOException {
return new UpdateRequest().setAction( UpdateRequest.ACTION.COMMIT, waitFlush, waitSearcher ).process( this );
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/SolrServer.java
public UpdateResponse commit( boolean waitFlush, boolean waitSearcher, boolean softCommit ) throws SolrServerException, IOException {
return new UpdateRequest().setAction( UpdateRequest.ACTION.COMMIT, waitFlush, waitSearcher, softCommit ).process( this );
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/SolrServer.java
public UpdateResponse optimize( boolean waitFlush, boolean waitSearcher ) throws SolrServerException, IOException {
return optimize(waitFlush, waitSearcher, 1);
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/SolrServer.java
public UpdateResponse optimize(boolean waitFlush, boolean waitSearcher, int maxSegments ) throws SolrServerException, IOException {
return new UpdateRequest().setAction( UpdateRequest.ACTION.OPTIMIZE, waitFlush, waitSearcher, maxSegments ).process( this );
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/SolrServer.java
public UpdateResponse rollback() throws SolrServerException, IOException {
return new UpdateRequest().rollback().process( this );
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/SolrServer.java
public UpdateResponse deleteById(String id) throws SolrServerException, IOException {
return deleteById(id, -1);
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/SolrServer.java
public UpdateResponse deleteById(String id, int commitWithinMs) throws SolrServerException, IOException {
UpdateRequest req = new UpdateRequest();
req.deleteById(id);
req.setCommitWithin(commitWithinMs);
return req.process(this);
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/SolrServer.java
public UpdateResponse deleteById(List<String> ids) throws SolrServerException, IOException {
return deleteById(ids, -1);
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/SolrServer.java
public UpdateResponse deleteById(List<String> ids, int commitWithinMs) throws SolrServerException, IOException {
UpdateRequest req = new UpdateRequest();
req.deleteById(ids);
req.setCommitWithin(commitWithinMs);
return req.process(this);
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/SolrServer.java
public UpdateResponse deleteByQuery(String query) throws SolrServerException, IOException {
return deleteByQuery(query, -1);
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/SolrServer.java
public UpdateResponse deleteByQuery(String query, int commitWithinMs) throws SolrServerException, IOException {
UpdateRequest req = new UpdateRequest();
req.deleteByQuery(query);
req.setCommitWithin(commitWithinMs);
return req.process(this);
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/SolrServer.java
public SolrPingResponse ping() throws SolrServerException, IOException {
return new SolrPing().process( this );
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/SolrServer.java
public QueryResponse queryAndStreamResponse( SolrParams params, StreamingResponseCallback callback ) throws SolrServerException, IOException
{
ResponseParser parser = new StreamingBinaryResponseParser( callback );
QueryRequest req = new QueryRequest( params );
req.setStreamingResponseCallback( callback );
req.setResponseParser( parser );
return req.process(this);
}
// in solr/solrj/src/java/org/apache/solr/client/solrj/util/ClientUtils.java
public static void writeXML( SolrInputDocument doc, Writer writer ) throws IOException
{
writer.write("<doc boost=\""+doc.getDocumentBoost()+"\">");
for( SolrInputField field : doc ) {
float boost = field.getBoost();
String name = field.getName();
for( Object v : field ) {
String update = null;
if (v instanceof Map) {
// currently only supports a single value
for (Entry<Object,Object> entry : ((Map<Object,Object>)v).entrySet()) {
update = entry.getKey().toString();
Object fieldVal = entry.getValue();
v = fieldVal;
}
}
if (v instanceof Date) {
v = DateUtil.getThreadLocalDateFormat().format( (Date)v );
} else if (v instanceof byte[]) {
byte[] bytes = (byte[]) v;
v = Base64.byteArrayToBase64(bytes, 0,bytes.length);
} else if (v instanceof ByteBuffer) {
ByteBuffer bytes = (ByteBuffer) v;
v = Base64.byteArrayToBase64(bytes.array(), bytes.position(),bytes.limit() - bytes.position());
}
if (update == null) {
if( boost != 1.0f ) {
XML.writeXML(writer, "field", v.toString(), "name", name, "boost", boost );
} else if (v != null) {
XML.writeXML(writer, "field", v.toString(), "name", name );
}
} else {
if( boost != 1.0f ) {
XML.writeXML(writer, "field", v.toString(), "name", name, "boost", boost, "update", update);
} else if (v != null) {
XML.writeXML(writer, "field", v.toString(), "name", name, "update", update);
}
}
// only write the boost for the first multi-valued field
// otherwise, the used boost is the product of all the boost values
boost = 1.0f;
}
}
writer.write("</doc>");
}
// in solr/solrj/src/java/org/apache/noggit/CharArr.java
public int read() throws IOException {
if (start>=end) return -1;
return buf[start++];
}
// in solr/solrj/src/java/org/apache/noggit/CharArr.java
public int read(CharBuffer cb) throws IOException {
/***
int sz = size();
if (sz<=0) return -1;
if (sz>0) cb.put(buf, start, sz);
return -1;
***/
int sz = size();
if (sz>0) cb.put(buf, start, sz);
start=end;
while (true) {
fill();
int s = size();
if (s==0) return sz==0 ? -1 : sz;
sz += s;
cb.put(buf, start, s);
}
}
// in solr/solrj/src/java/org/apache/noggit/CharArr.java
public int fill() throws IOException {
return 0; // or -1?
}
// in solr/solrj/src/java/org/apache/noggit/CharArr.java
public final Appendable append(CharSequence csq) throws IOException {
return append(csq, 0, csq.length());
}
// in solr/solrj/src/java/org/apache/noggit/CharArr.java
public Appendable append(CharSequence csq, int start, int end) throws IOException {
write(csq.subSequence(start, end).toString());
return null;
}
// in solr/solrj/src/java/org/apache/noggit/CharArr.java
public final Appendable append(char c) throws IOException {
write(c);
return this;
}
// in solr/solrj/src/java/org/apache/noggit/CharArr.java
public Appendable append(CharSequence csq, int start, int end) throws IOException {
return this;
}
// in solr/solrj/src/java/org/apache/noggit/CharArr.java
public int read() throws IOException {
if (start>=end) fill();
return start>=end ? -1 : buf[start++];
}
// in solr/solrj/src/java/org/apache/noggit/CharArr.java
public int read(CharBuffer cb) throws IOException {
// empty the buffer and then read direct
int sz = size();
if (sz>0) cb.put(buf,start,end);
int sz2 = in.read(cb);
if (sz2>=0) return sz+sz2;
return sz>0 ? sz : -1;
}
// in solr/solrj/src/java/org/apache/noggit/CharArr.java
public int fill() throws IOException {
if (start>=end) {
reset();
} else if (start>0) {
System.arraycopy(buf, start, buf, 0, size());
end=size(); start=0;
}
/***
// fill fully or not???
do {
int sz = in.read(buf,end,buf.length-end);
if (sz==-1) return;
end+=sz;
} while (end < buf.length);
***/
int sz = in.read(buf,end,buf.length-end);
if (sz>0) end+=sz;
return sz;
}
// in solr/solrj/src/java/org/apache/noggit/JSONParser.java
protected void fill() throws IOException {
if (in!=null) {
gpos += end;
start=0;
int num = in.read(buf,0,buf.length);
end = num>=0 ? num : 0;
}
if (start>=end) eof=true;
}
// in solr/solrj/src/java/org/apache/noggit/JSONParser.java
private void getMore() throws IOException {
fill();
if (start>=end) {
throw err(null);
}
}
// in solr/solrj/src/java/org/apache/noggit/JSONParser.java
protected int getChar() throws IOException {
if (start>=end) {
fill();
if (start>=end) return -1;
}
return buf[start++];
}
// in solr/solrj/src/java/org/apache/noggit/JSONParser.java
private int getCharNWS() throws IOException {
for (;;) {
int ch = getChar();
if (!(ch==' ' || ch=='\t' || ch=='\n' || ch=='\r')) return ch;
}
}
// in solr/solrj/src/java/org/apache/noggit/JSONParser.java
private void expect(char[] arr) throws IOException {
for (int i=1; i<arr.length; i++) {
int ch = getChar();
if (ch != arr[i]) {
throw err("Expected " + new String(arr));
}
}
}
// in solr/solrj/src/java/org/apache/noggit/JSONParser.java
private long readNumber(int firstChar, boolean isNeg) throws IOException {
out.unsafeWrite(firstChar); // unsafe OK since we know output is big enough
// We build up the number in the negative plane since it's larger (by one) than
// the positive plane.
long v = '0' - firstChar;
// can't overflow a long in 18 decimal digits (i.e. 17 additional after the first).
// we also need 22 additional to handle double so we'll handle in 2 separate loops.
int i;
for (i=0; i<17; i++) {
int ch = getChar();
// TODO: is this switch faster as an if-then-else?
switch(ch) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
v = v*10 - (ch-'0');
out.unsafeWrite(ch);
continue;
case '.':
out.unsafeWrite('.');
valstate = readFrac(out,22-i);
return 0;
case 'e':
case 'E':
out.unsafeWrite(ch);
nstate=0;
valstate = readExp(out,22-i);
return 0;
default:
// return the number, relying on nextEvent() to return an error
// for invalid chars following the number.
if (ch!=-1) --start; // push back last char if not EOF
valstate = LONG;
return isNeg ? v : -v;
}
}
// after this, we could overflow a long and need to do extra checking
boolean overflow = false;
long maxval = isNeg ? Long.MIN_VALUE : -Long.MAX_VALUE;
for (; i<22; i++) {
int ch = getChar();
switch(ch) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
if (v < (0x8000000000000000L/10)) overflow=true; // can't multiply by 10 w/o overflowing
v *= 10;
int digit = ch - '0';
if (v < maxval + digit) overflow=true; // can't add digit w/o overflowing
v -= digit;
out.unsafeWrite(ch);
continue;
case '.':
out.unsafeWrite('.');
valstate = readFrac(out,22-i);
return 0;
case 'e':
case 'E':
out.unsafeWrite(ch);
nstate=0;
valstate = readExp(out,22-i);
return 0;
default:
// return the number, relying on nextEvent() to return an error
// for invalid chars following the number.
if (ch!=-1) --start; // push back last char if not EOF
valstate = overflow ? BIGNUMBER : LONG;
return isNeg ? v : -v;
}
}
nstate=0;
valstate = BIGNUMBER;
return 0;
}
// in solr/solrj/src/java/org/apache/noggit/JSONParser.java
private int readFrac(CharArr arr, int lim) throws IOException {
nstate = HAS_FRACTION; // deliberate set instead of '|'
while(--lim>=0) {
int ch = getChar();
if (ch>='0' && ch<='9') {
arr.write(ch);
} else if (ch=='e' || ch=='E') {
arr.write(ch);
return readExp(arr,lim);
} else {
if (ch!=-1) start--; // back up
return NUMBER;
}
}
return BIGNUMBER;
}
// in solr/solrj/src/java/org/apache/noggit/JSONParser.java
private int readExp(CharArr arr, int lim) throws IOException {
nstate |= HAS_EXPONENT;
int ch = getChar(); lim--;
if (ch=='+' || ch=='-') {
arr.write(ch);
ch = getChar(); lim--;
}
// make sure at least one digit is read.
if (ch<'0' || ch>'9') {
throw err("missing exponent number");
}
arr.write(ch);
return readExpDigits(arr,lim);
}
// in solr/solrj/src/java/org/apache/noggit/JSONParser.java
private int readExpDigits(CharArr arr, int lim) throws IOException {
while (--lim>=0) {
int ch = getChar();
if (ch>='0' && ch<='9') {
arr.write(ch);
} else {
if (ch!=-1) start--; // back up
return NUMBER;
}
}
return BIGNUMBER;
}
// in solr/solrj/src/java/org/apache/noggit/JSONParser.java
private void continueNumber(CharArr arr) throws IOException {
if (arr != out) arr.write(out);
if ((nstate & HAS_EXPONENT)!=0){
readExpDigits(arr, Integer.MAX_VALUE);
return;
}
if (nstate != 0) {
readFrac(arr, Integer.MAX_VALUE);
return;
}
for(;;) {
int ch = getChar();
if (ch>='0' && ch <='9') {
arr.write(ch);
} else if (ch=='.') {
arr.write(ch);
readFrac(arr,Integer.MAX_VALUE);
return;
} else if (ch=='e' || ch=='E') {
arr.write(ch);
readExp(arr,Integer.MAX_VALUE);
return;
} else {
if (ch!=-1) start--;
return;
}
}
}
// in solr/solrj/src/java/org/apache/noggit/JSONParser.java
private char readEscapedChar() throws IOException {
switch (getChar()) {
case '"' : return '"';
case '\\' : return '\\';
case '/' : return '/';
case 'n' : return '\n';
case 'r' : return '\r';
case 't' : return '\t';
case 'f' : return '\f';
case 'b' : return '\b';
case 'u' :
return (char)(
(hexval(getChar()) << 12)
| (hexval(getChar()) << 8)
| (hexval(getChar()) << 4)
| (hexval(getChar())));
}
throw err("Invalid character escape in string");
}
// in solr/solrj/src/java/org/apache/noggit/JSONParser.java
private CharArr readStringChars() throws IOException {
char c=0;
int i;
for (i=start; i<end; i++) {
c = buf[i];
if (c=='"') {
tmp.set(buf,start,i); // directly use input buffer
start=i+1; // advance past last '"'
return tmp;
} else if (c=='\\') {
break;
}
}
out.reset();
readStringChars2(out, i);
return out;
}
// in solr/solrj/src/java/org/apache/noggit/JSONParser.java
private void readStringChars2(CharArr arr, int middle) throws IOException {
for (;;) {
if (middle>=end) {
arr.write(buf,start,middle-start);
start=middle;
getMore();
middle=start;
}
int ch = buf[middle++];
if (ch=='"') {
int len = middle-start-1;
if (len>0) arr.write(buf,start,len);
start=middle;
return;
} else if (ch=='\\') {
int len = middle-start-1;
if (len>0) arr.write(buf,start,len);
start=middle;
arr.write(readEscapedChar());
middle=start;
}
}
}
// in solr/solrj/src/java/org/apache/noggit/JSONParser.java
private int next(int ch) throws IOException {
for(;;) {
switch (ch) {
case ' ':
case '\t': break;
case '\r':
case '\n': break; // try and keep track of linecounts?
case '"' :
valstate = STRING;
return STRING;
case '{' :
push();
state= DID_OBJSTART;
return OBJECT_START;
case '[':
push();
state=DID_ARRSTART;
return ARRAY_START;
case '0' :
out.reset();
//special case '0'? If next char isn't '.' val=0
ch=getChar();
if (ch=='.') {
start--; ch='0';
readNumber('0',false);
return valstate;
} else if (ch>'9' || ch<'0') {
out.unsafeWrite('0');
if (ch!=-1) start--;
lval = 0;
valstate=LONG;
return LONG;
} else {
throw err("Leading zeros not allowed");
}
case '1' :
case '2' :
case '3' :
case '4' :
case '5' :
case '6' :
case '7' :
case '8' :
case '9' :
out.reset();
lval = readNumber(ch,false);
return valstate;
case '-' :
out.reset();
out.unsafeWrite('-');
ch = getChar();
if (ch<'0' || ch>'9') throw err("expected digit after '-'");
lval = readNumber(ch,true);
return valstate;
case 't':
valstate=BOOLEAN;
// TODO: test performance of this non-branching inline version.
// if ((('r'-getChar())|('u'-getChar())|('e'-getChar())) != 0) err("");
expect(JSONUtil.TRUE_CHARS);
bool=true;
return BOOLEAN;
case 'f':
valstate=BOOLEAN;
expect(JSONUtil.FALSE_CHARS);
bool=false;
return BOOLEAN;
case 'n':
valstate=NULL;
expect(JSONUtil.NULL_CHARS);
return NULL;
case -1:
if (getLevel()>0) throw err("Premature EOF");
return EOF;
default: throw err(null);
}
ch = getChar();
}
}
// in solr/solrj/src/java/org/apache/noggit/JSONParser.java
public int nextEvent() throws IOException {
if (valstate==STRING) {
readStringChars2(devNull,start);
}
else if (valstate==BIGNUMBER) {
continueNumber(devNull);
}
valstate=0;
int ch; // TODO: factor out getCharNWS() to here and check speed
switch (state) {
case 0:
return event = next(getCharNWS());
case DID_OBJSTART:
ch = getCharNWS();
if (ch=='}') {
pop();
return event = OBJECT_END;
}
if (ch != '"') {
throw err("Expected string");
}
state = DID_MEMNAME;
valstate = STRING;
return event = STRING;
case DID_MEMNAME:
ch = getCharNWS();
if (ch!=':') {
throw err("Expected key,value separator ':'");
}
state = DID_MEMVAL; // set state first because it might be pushed...
return event = next(getChar());
case DID_MEMVAL:
ch = getCharNWS();
if (ch=='}') {
pop();
return event = OBJECT_END;
} else if (ch!=',') {
throw err("Expected ',' or '}'");
}
ch = getCharNWS();
if (ch != '"') {
throw err("Expected string");
}
state = DID_MEMNAME;
valstate = STRING;
return event = STRING;
case DID_ARRSTART:
ch = getCharNWS();
if (ch==']') {
pop();
return event = ARRAY_END;
}
state = DID_ARRELEM; // set state first, might be pushed...
return event = next(ch);
case DID_ARRELEM:
ch = getCharNWS();
if (ch==']') {
pop();
return event = ARRAY_END;
} else if (ch!=',') {
throw err("Expected ',' or ']'");
}
// state = DID_ARRELEM;
return event = next(getChar());
}
return 0;
}
// in solr/solrj/src/java/org/apache/noggit/JSONParser.java
private void goTo(int what) throws IOException {
if (valstate==what) { valstate=0; return; }
if (valstate==0) {
int ev = nextEvent(); // TODO
if (valstate!=what) {
throw err("type mismatch");
}
valstate=0;
}
else {
throw err("type mismatch");
}
}
// in solr/solrj/src/java/org/apache/noggit/JSONParser.java
public String getString() throws IOException {
return getStringChars().toString();
}
// in solr/solrj/src/java/org/apache/noggit/JSONParser.java
public CharArr getStringChars() throws IOException {
goTo(STRING);
return readStringChars();
}
// in solr/solrj/src/java/org/apache/noggit/JSONParser.java
public void getString(CharArr output) throws IOException {
goTo(STRING);
readStringChars2(output,start);
}
// in solr/solrj/src/java/org/apache/noggit/JSONParser.java
public long getLong() throws IOException {
goTo(LONG);
return lval;
}
// in solr/solrj/src/java/org/apache/noggit/JSONParser.java
public double getDouble() throws IOException {
return Double.parseDouble(getNumberChars().toString());
}
// in solr/solrj/src/java/org/apache/noggit/JSONParser.java
public CharArr getNumberChars() throws IOException {
int ev=0;
if (valstate==0) ev = nextEvent();
if (valstate == LONG || valstate == NUMBER) {
valstate=0;
return out;
}
else if (valstate==BIGNUMBER) {
continueNumber(out);
valstate=0;
return out;
} else {
throw err("Unexpected " + ev);
}
}
// in solr/solrj/src/java/org/apache/noggit/JSONParser.java
public void getNumberChars(CharArr output) throws IOException {
int ev=0;
if (valstate==0) ev=nextEvent();
if (valstate == LONG || valstate == NUMBER) output.write(this.out);
else if (valstate==BIGNUMBER) {
continueNumber(output);
} else {
throw err("Unexpected " + ev);
}
valstate=0;
}
// in solr/solrj/src/java/org/apache/noggit/JSONParser.java
public boolean getBoolean() throws IOException {
goTo(BOOLEAN);
return bool;
}
// in solr/solrj/src/java/org/apache/noggit/JSONParser.java
public void getNull() throws IOException {
goTo(NULL);
}
// in solr/solrj/src/java/org/apache/noggit/ObjectBuilder.java
public static Object fromJSON(String json) throws IOException {
JSONParser p = new JSONParser(json);
return getVal(p);
}
// in solr/solrj/src/java/org/apache/noggit/ObjectBuilder.java
public static Object getVal(JSONParser parser) throws IOException {
return new ObjectBuilder(parser).getVal();
}
// in solr/solrj/src/java/org/apache/noggit/ObjectBuilder.java
public Object getVal() throws IOException {
int ev = parser.lastEvent();
switch(ev) {
case JSONParser.STRING: return getString();
case JSONParser.LONG: return getLong();
case JSONParser.NUMBER: return getNumber();
case JSONParser.BIGNUMBER: return getBigNumber();
case JSONParser.BOOLEAN: return getBoolean();
case JSONParser.NULL: return getNull();
case JSONParser.OBJECT_START: return getObject();
case JSONParser.OBJECT_END: return null; // OR ERROR?
case JSONParser.ARRAY_START: return getArray();
case JSONParser.ARRAY_END: return null; // OR ERROR?
case JSONParser.EOF: return null; // OR ERROR?
default: return null; // OR ERROR?
}
}
// in solr/solrj/src/java/org/apache/noggit/ObjectBuilder.java
public Object getString() throws IOException {
return parser.getString();
}
// in solr/solrj/src/java/org/apache/noggit/ObjectBuilder.java
public Object getLong() throws IOException {
return Long.valueOf(parser.getLong());
}
// in solr/solrj/src/java/org/apache/noggit/ObjectBuilder.java
public Object getNumber() throws IOException {
CharArr num = parser.getNumberChars();
String numstr = num.toString();
double d = Double.parseDouble(numstr);
if (!Double.isInfinite(d)) return Double.valueOf(d);
// TODO: use more efficient constructor in Java5
return new BigDecimal(numstr);
}
// in solr/solrj/src/java/org/apache/noggit/ObjectBuilder.java
public Object getBigNumber() throws IOException {
CharArr num = parser.getNumberChars();
String numstr = num.toString();
for(int ch; (ch=num.read())!=-1;) {
if (ch=='.' || ch=='e' || ch=='E') return new BigDecimal(numstr);
}
// in solr/solrj/src/java/org/apache/noggit/ObjectBuilder.java
public Object getBoolean() throws IOException {
return parser.getBoolean();
}
// in solr/solrj/src/java/org/apache/noggit/ObjectBuilder.java
public Object getNull() throws IOException {
parser.getNull();
return null;
}
// in solr/solrj/src/java/org/apache/noggit/ObjectBuilder.java
public Object newObject() throws IOException {
return new LinkedHashMap();
}
// in solr/solrj/src/java/org/apache/noggit/ObjectBuilder.java
public Object getKey() throws IOException {
return parser.getString();
}
// in solr/solrj/src/java/org/apache/noggit/ObjectBuilder.java
public void addKeyVal(Object map, Object key, Object val) throws IOException {
Object prev = ((Map)map).put(key,val);
// TODO: test for repeated value?
}
// in solr/solrj/src/java/org/apache/noggit/ObjectBuilder.java
public Object getObject() throws IOException {
Object m = newObject();
for(;;) {
int ev = parser.nextEvent();
if (ev==JSONParser.OBJECT_END) return objectEnd(m);
Object key = getKey();
ev = parser.nextEvent();
Object val = getVal();
addKeyVal(m, key, val);
}
}
// in solr/solrj/src/java/org/apache/noggit/ObjectBuilder.java
public void addArrayVal(Object arr, Object val) throws IOException {
((List)arr).add(val);
}
// in solr/solrj/src/java/org/apache/noggit/ObjectBuilder.java
public Object getArray() throws IOException {
Object arr = newArray();
for(;;) {
int ev = parser.nextEvent();
if (ev==JSONParser.ARRAY_END) return endArray(arr);
Object val = getVal();
addArrayVal(arr, val);
}
}
// in solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java
void doAdd(SolrContentHandler handler, AddUpdateCommand template)
throws IOException {
template.solrDoc = handler.newDocument();
processor.processAdd(template);
}
// in solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java
void addDoc(SolrContentHandler handler) throws IOException {
templateAdd.clear();
doAdd(handler, templateAdd);
}
// in solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingRequestHandler.java
private TikaConfig getDefaultConfig(ClassLoader classLoader) throws MimeTypeException, IOException {
return new TikaConfig(classLoader);
}
// in solr/contrib/uima/src/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java
Override
public void processAdd(AddUpdateCommand cmd) throws IOException {
String text = null;
try {
/* get Solr document */
SolrInputDocument solrInputDocument = cmd.getSolrInputDocument();
/* get the fields to analyze */
String[] texts = getTextsToAnalyze(solrInputDocument);
for (int i = 0; i < texts.length; i++) {
text = texts[i];
if (text != null && text.length()>0) {
/* process the text value */
JCas jcas = processText(text);
UIMAToSolrMapper uimaToSolrMapper = new UIMAToSolrMapper(solrInputDocument, jcas);
/* get field mapping from config */
Map<String, Map<String, MapField>> typesAndFeaturesFieldsMap = solrUIMAConfiguration
.getTypesFeaturesFieldsMapping();
/* map type features on fields */
for (String typeFQN : typesAndFeaturesFieldsMap.keySet()) {
uimaToSolrMapper.map(typeFQN, typesAndFeaturesFieldsMap.get(typeFQN));
}
}
}
} catch (Exception e) {
String logField = solrUIMAConfiguration.getLogField();
if(logField == null){
SchemaField uniqueKeyField = solrCore.getSchema().getUniqueKeyField();
if(uniqueKeyField != null){
logField = uniqueKeyField.getName();
}
}
String optionalFieldInfo = logField == null ? "." :
new StringBuilder(". ").append(logField).append("=")
.append((String)cmd.getSolrInputDocument().getField(logField).getValue())
.append(", ").toString();
int len = Math.min(text.length(), 100);
if (solrUIMAConfiguration.isIgnoreErrors()) {
log.warn(new StringBuilder("skip the text processing due to ")
.append(e.getLocalizedMessage()).append(optionalFieldInfo)
.append(" text=\"").append(text.substring(0, len)).append("...\"").toString());
} else {
throw new SolrException(ErrorCode.SERVER_ERROR,
new StringBuilder("processing error: ")
.append(e.getLocalizedMessage()).append(optionalFieldInfo)
.append(" text=\"").append(text.substring(0, len)).append("...\"").toString(), e);
}
}
super.processAdd(cmd);
}
// in solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/LuceneCarrot2TokenizerFactory.java
public short nextToken() throws IOException {
final boolean hasNextToken = wordTokenFilter.incrementToken();
if (hasNextToken) {
short flags = 0;
final char[] image = term.buffer();
final int length = term.length();
tempCharSequence.reset(image, 0, length);
if (length == 1 && image[0] == ',') {
// ChineseTokenizer seems to convert all punctuation to ','
// characters
flags = ITokenizer.TT_PUNCTUATION;
} else if (numeric.matcher(tempCharSequence).matches()) {
flags = ITokenizer.TT_NUMERIC;
} else {
flags = ITokenizer.TT_TERM;
}
return flags;
}
return ITokenizer.TT_EOF;
}
// in solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/LuceneCarrot2TokenizerFactory.java
public void reset(Reader input) throws IOException {
try {
sentenceTokenizer.reset(input);
wordTokenFilter = (TokenStream) tokenFilterClass.getConstructor(
TokenStream.class).newInstance(sentenceTokenizer);
term = wordTokenFilter.addAttribute(CharTermAttribute.class);
} catch (Exception e) {
throw ExceptionUtils.wrapAsRuntimeException(e);
}
}
// in solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
Override
public IResource[] getAll(final String resource) {
final String resourceName = carrot2ResourcesDir + "/" + resource;
log.debug("Looking for Solr resource: " + resourceName);
InputStream resourceStream = null;
final byte [] asBytes;
try {
resourceStream = resourceLoader.openResource(resourceName);
asBytes = IOUtils.toByteArray(resourceStream);
} catch (RuntimeException e) {
log.debug("Resource not found in Solr's config: " + resourceName
+ ". Using the default " + resource + " from Carrot JAR.");
return new IResource[] {};
} catch (IOException e) {
log.warn("Could not read Solr resource " + resourceName);
return new IResource[] {};
} finally {
if (resourceStream != null) Closeables.closeQuietly(resourceStream);
}
log.info("Loaded Solr resource: " + resourceName);
final IResource foundResource = new IResource() {
@Override
public InputStream open() throws IOException {
return new ByteArrayInputStream(asBytes);
}
@Override
public int hashCode() {
// In case multiple resources are found they will be deduped, but we don't use it in Solr,
// so simply rely on instance equivalence.
return super.hashCode();
}
@Override
public boolean equals(Object obj) {
// In case multiple resources are found they will be deduped, but we don't use it in Solr,
// so simply rely on instance equivalence.
return super.equals(obj);
}
@Override
public String toString() {
return "Solr config resource: " + resourceName;
}
};
return new IResource[] { foundResource };
}
// in solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
Override
public InputStream open() throws IOException {
return new ByteArrayInputStream(asBytes);
}
// in solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
private List<Document> getDocuments(SolrDocumentList solrDocList, Map<SolrDocument, Integer> docIds,
Query query, final SolrQueryRequest sreq) throws IOException {
SolrHighlighter highlighter = null;
SolrParams solrParams = sreq.getParams();
SolrCore core = sreq.getCore();
String urlField = solrParams.get(CarrotParams.URL_FIELD_NAME, "url");
String titleFieldSpec = solrParams.get(CarrotParams.TITLE_FIELD_NAME, "title");
String snippetFieldSpec = solrParams.get(CarrotParams.SNIPPET_FIELD_NAME, titleFieldSpec);
String languageField = solrParams.get(CarrotParams.LANGUAGE_FIELD_NAME, null);
// Maps Solr field names to Carrot2 custom field names
Map<String, String> customFields = getCustomFieldsMap(solrParams);
// Parse language code map string into a map
Map<String, String> languageCodeMap = Maps.newHashMap();
if (StringUtils.isNotBlank(languageField)) {
for (String pair : solrParams.get(CarrotParams.LANGUAGE_CODE_MAP, "")
.split("[, ]")) {
final String[] split = pair.split(":");
if (split.length == 2 && StringUtils.isNotBlank(split[0]) && StringUtils.isNotBlank(split[1])) {
languageCodeMap.put(split[0], split[1]);
} else {
log.warn("Unsupported format for " + CarrotParams.LANGUAGE_CODE_MAP
+ ": '" + pair + "'. Skipping this mapping.");
}
}
}
// Get the documents
boolean produceSummary = solrParams.getBool(CarrotParams.PRODUCE_SUMMARY, false);
SolrQueryRequest req = null;
String[] snippetFieldAry = null;
if (produceSummary) {
highlighter = HighlightComponent.getHighlighter(core);
if (highlighter != null){
Map<String, Object> args = Maps.newHashMap();
snippetFieldAry = snippetFieldSpec.split("[, ]");
args.put(HighlightParams.FIELDS, snippetFieldAry);
args.put(HighlightParams.HIGHLIGHT, "true");
args.put(HighlightParams.SIMPLE_PRE, ""); //we don't care about actually highlighting the area
args.put(HighlightParams.SIMPLE_POST, "");
args.put(HighlightParams.FRAGSIZE, solrParams.getInt(CarrotParams.SUMMARY_FRAGSIZE, solrParams.getInt(HighlightParams.FRAGSIZE, 100)));
args.put(HighlightParams.SNIPPETS, solrParams.getInt(CarrotParams.SUMMARY_SNIPPETS, solrParams.getInt(HighlightParams.SNIPPETS, 1)));
req = new LocalSolrQueryRequest(core, query.toString(), "", 0, 1, args) {
@Override
public SolrIndexSearcher getSearcher() {
return sreq.getSearcher();
}
};
} else {
log.warn("No highlighter configured, cannot produce summary");
produceSummary = false;
}
}
Iterator<SolrDocument> docsIter = solrDocList.iterator();
List<Document> result = new ArrayList<Document>(solrDocList.size());
float[] scores = {1.0f};
int[] docsHolder = new int[1];
Query theQuery = query;
while (docsIter.hasNext()) {
SolrDocument sdoc = docsIter.next();
String snippet = null;
// TODO: docIds will be null when running distributed search.
// See comment in ClusteringComponent#finishStage().
if (produceSummary && docIds != null) {
docsHolder[0] = docIds.get(sdoc).intValue();
DocList docAsList = new DocSlice(0, 1, docsHolder, scores, 1, 1.0f);
NamedList<Object> highlights = highlighter.doHighlighting(docAsList, theQuery, req, snippetFieldAry);
if (highlights != null && highlights.size() == 1) {//should only be one value given our setup
//should only be one document
@SuppressWarnings("unchecked")
NamedList<String []> tmp = (NamedList<String[]>) highlights.getVal(0);
final StringBuilder sb = new StringBuilder();
for (int j = 0; j < snippetFieldAry.length; j++) {
// Join fragments with a period, so that Carrot2 does not create
// cross-fragment phrases, such phrases rarely make sense.
String [] highlt = tmp.get(snippetFieldAry[j]);
if (highlt != null && highlt.length > 0) {
for (int i = 0; i < highlt.length; i++) {
sb.append(highlt[i]);
sb.append(" . ");
}
}
}
snippet = sb.toString();
}
}
// If summaries not enabled or summary generation failed, use full content.
if (snippet == null) {
snippet = getConcatenated(sdoc, snippetFieldSpec);
}
// Create a Carrot2 document
Document carrotDocument = new Document(getConcatenated(sdoc, titleFieldSpec),
snippet, ObjectUtils.toString(sdoc.getFieldValue(urlField), ""));
// Store Solr id of the document, we need it to map document instances
// found in clusters back to identifiers.
carrotDocument.setField(SOLR_DOCUMENT_ID, sdoc.getFieldValue(idFieldName));
// Set language
if (StringUtils.isNotBlank(languageField)) {
Collection<Object> languages = sdoc.getFieldValues(languageField);
if (languages != null) {
// Use the first Carrot2-supported language
for (Object l : languages) {
String lang = ObjectUtils.toString(l, "");
if (languageCodeMap.containsKey(lang)) {
lang = languageCodeMap.get(lang);
}
// Language detection Library for Java uses dashes to separate
// language variants, such as 'zh-cn', but Carrot2 uses underscores.
if (lang.indexOf('-') > 0) {
lang = lang.replace('-', '_');
}
// If the language is supported by Carrot2, we'll get a non-null value
final LanguageCode carrot2Language = LanguageCode.forISOCode(lang);
if (carrot2Language != null) {
carrotDocument.setLanguage(carrot2Language);
break;
}
}
}
}
// Add custom fields
if (customFields != null) {
for (Entry<String, String> entry : customFields.entrySet()) {
carrotDocument.setField(entry.getValue(), sdoc.getFieldValue(entry.getKey()));
}
}
result.add(carrotDocument);
}
return result;
}
// in solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringComponent.java
Override
public void prepare(ResponseBuilder rb) throws IOException {
SolrParams params = rb.req.getParams();
if (!params.getBool(COMPONENT_NAME, false)) {
return;
}
}
// in solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringComponent.java
Override
public void process(ResponseBuilder rb) throws IOException {
SolrParams params = rb.req.getParams();
if (!params.getBool(COMPONENT_NAME, false)) {
return;
}
String name = getClusteringEngineName(rb);
boolean useResults = params.getBool(ClusteringParams.USE_SEARCH_RESULTS, false);
if (useResults == true) {
SearchClusteringEngine engine = getSearchClusteringEngine(rb);
if (engine != null) {
DocListAndSet results = rb.getResults();
Map<SolrDocument,Integer> docIds = new HashMap<SolrDocument, Integer>(results.docList.size());
SolrDocumentList solrDocList = engine.getSolrDocumentList(results.docList, rb.req, docIds);
Object clusters = engine.cluster(rb.getQuery(), solrDocList, docIds, rb.req);
rb.rsp.add("clusters", clusters);
} else {
log.warn("No engine for: " + name);
}
}
boolean useCollection = params.getBool(ClusteringParams.USE_COLLECTION, false);
if (useCollection == true) {
DocumentClusteringEngine engine = documentClusteringEngines.get(name);
if (engine != null) {
boolean useDocSet = params.getBool(ClusteringParams.USE_DOC_SET, false);
NamedList nl = null;
//TODO: This likely needs to be made into a background task that runs in an executor
if (useDocSet == true) {
nl = engine.cluster(rb.getResults().docSet, params);
} else {
nl = engine.cluster(params);
}
rb.rsp.add("clusters", nl);
} else {
log.warn("No engine for " + name);
}
}
}
// in solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/SearchClusteringEngine.java
public SolrDocumentList getSolrDocumentList(DocList docList, SolrQueryRequest sreq,
Map<SolrDocument, Integer> docIds) throws IOException{
return SolrPluginUtils.docListToSolrDocumentList(
docList, sreq.getSearcher(), getFieldsToLoad(sreq), docIds);
}
// in solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java
Override
public void processAdd(AddUpdateCommand cmd) throws IOException {
if (isEnabled()) {
process(cmd.getSolrInputDocument());
} else {
log.debug("Processor not enabled, not running");
}
super.processAdd(cmd);
}
// in solr/contrib/langid/src/java/org/apache/solr/update/processor/LangDetectLanguageIdentifierUpdateProcessorFactory.java
public static synchronized void loadData() throws IOException, LangDetectException {
if (loaded) {
return;
}
loaded = true;
List<String> profileData = new ArrayList<String>();
Charset encoding = Charset.forName("UTF-8");
for (String language : languages) {
InputStream stream = LangDetectLanguageIdentifierUpdateProcessor.class.getResourceAsStream("langdetect-profiles/" + language);
BufferedReader reader = new BufferedReader(new InputStreamReader(stream, encoding));
profileData.add(new String(IOUtils.toCharArray(reader)));
reader.close();
}
DetectorFactory.loadProfile(profileData);
DetectorFactory.setSeed(0);
}
// in solr/contrib/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java
Override
public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException {
writer.writeStr(name, f.stringValue(), true);
}
// in solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/SolrWriter.java
static String getResourceAsString(InputStream in) throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream(1024);
byte[] buf = new byte[1024];
int sz = 0;
try {
while ((sz = in.read(buf)) != -1) {
baos.write(buf, 0, sz);
}
} finally {
try {
in.close();
} catch (Exception e) {
}
}
return new String(baos.toByteArray(), "UTF-8");
}
// in solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/XPathRecordReader.java
private void parse(XMLStreamReader parser,
Handler handler,
Map<String, Object> values,
Stack<Set<String>> stack, // lists of values to purge
boolean recordStarted
) throws IOException, XMLStreamException {
Set<String> valuesAddedinThisFrame = null;
if (isRecord) {
// This Node is a match for an XPATH from a forEach attribute,
// prepare for the clean up that will occurr when the record
// is emitted after its END_ELEMENT is matched
recordStarted = true;
valuesAddedinThisFrame = new HashSet<String>();
stack.push(valuesAddedinThisFrame);
} else if (recordStarted) {
// This node is a child of some parent which matched against forEach
// attribute. Continue to add values to an existing record.
valuesAddedinThisFrame = stack.peek();
}
try {
/* The input stream has deposited us at this Node in our tree of
* intresting nodes. Depending on how this node is of interest,
* process further tokens from the input stream and decide what
* we do next
*/
if (attributes != null) {
// we interested in storing attributes from the input stream
for (Node node : attributes) {
String value = parser.getAttributeValue(null, node.name);
if (value != null || (recordStarted && !isRecord)) {
putText(values, value, node.fieldName, node.multiValued);
valuesAddedinThisFrame.add(node.fieldName);
}
}
}
Set<Node> childrenFound = new HashSet<Node>();
int event = -1;
int flattenedStarts=0; // our tag depth when flattening elements
StringBuilder text = new StringBuilder();
while (true) {
event = parser.next();
if (event == END_ELEMENT) {
if (flattenedStarts > 0) flattenedStarts--;
else {
if (hasText && valuesAddedinThisFrame != null) {
valuesAddedinThisFrame.add(fieldName);
putText(values, text.toString(), fieldName, multiValued);
}
if (isRecord) handler.handle(getDeepCopy(values), forEachPath);
if (childNodes != null && recordStarted && !isRecord && !childrenFound.containsAll(childNodes)) {
// nonReccord nodes where we have not collected text for ALL
// the child nodes.
for (Node n : childNodes) {
// For the multivalue child nodes where we could have, but
// didnt, collect text. Push a null string into values.
if (!childrenFound.contains(n)) n.putNulls(values);
}
}
return;
}
}
else if (hasText && (event==CDATA || event==CHARACTERS || event==SPACE)) {
text.append(parser.getText());
}
else if (event == START_ELEMENT) {
if ( flatten )
flattenedStarts++;
else
handleStartElement(parser, childrenFound, handler, values, stack, recordStarted);
}
// END_DOCUMENT is least likely to appear and should be
// last in if-then-else skip chain
else if (event == END_DOCUMENT) return;
}
}finally {
if ((isRecord || !recordStarted) && !stack.empty()) {
Set<String> cleanThis = stack.pop();
if (cleanThis != null) {
for (String fld : cleanThis) values.remove(fld);
}
}
}
}
// in solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/XPathRecordReader.java
private void handleStartElement(XMLStreamReader parser, Set<Node> childrenFound,
Handler handler, Map<String, Object> values,
Stack<Set<String>> stack, boolean recordStarted)
throws IOException, XMLStreamException {
Node n = getMatchingNode(parser,childNodes);
Map<String, Object> decends=new HashMap<String, Object>();
if (n != null) {
childrenFound.add(n);
n.parse(parser, handler, values, stack, recordStarted);
return;
}
// The stream has diverged from the tree of interesting elements, but
// are there any wildCardNodes ... anywhere in our path from the root?
Node dn = this; // checking our Node first!
do {
if (dn.wildCardNodes != null) {
// Check to see if the streams tag matches one of the "//" all
// decendents type expressions for this node.
n = getMatchingNode(parser, dn.wildCardNodes);
if (n != null) {
childrenFound.add(n);
n.parse(parser, handler, values, stack, recordStarted);
break;
}
// add the list of this nodes wild decendents to the cache
for (Node nn : dn.wildCardNodes) decends.put(nn.name, nn);
}
dn = dn.wildAncestor; // leap back along the tree toward root
} while (dn != null) ;
if (n == null) {
// we have a START_ELEMENT which is not within the tree of
// interesting nodes. Skip over the contents of this element
// but recursivly repeat the above for any START_ELEMENTs
// found within this element.
int count = 1; // we have had our first START_ELEMENT
while (count != 0) {
int token = parser.next();
if (token == START_ELEMENT) {
Node nn = (Node) decends.get(parser.getLocalName());
if (nn != null) {
// We have a //Node which matches the stream's parser.localName
childrenFound.add(nn);
// Parse the contents of this stream element
nn.parse(parser, handler, values, stack, recordStarted);
}
else count++;
}
else if (token == END_ELEMENT) count--;
}
}
}
// in solr/contrib/velocity/src/java/org/apache/solr/response/VelocityResponseWriter.java
public void write(Writer writer, SolrQueryRequest request, SolrQueryResponse response) throws IOException {
VelocityEngine engine = getEngine(request); // TODO: have HTTP headers available for configuring engine
Template template = getTemplate(engine, request);
VelocityContext context = new VelocityContext();
context.put("request", request);
// Turn the SolrQueryResponse into a SolrResponse.
// QueryResponse has lots of conveniences suitable for a view
// Problem is, which SolrResponse class to use?
// One patch to SOLR-620 solved this by passing in a class name as
// as a parameter and using reflection and Solr's class loader to
// create a new instance. But for now the implementation simply
// uses QueryResponse, and if it chokes in a known way, fall back
// to bare bones SolrResponseBase.
// TODO: Can this writer know what the handler class is? With echoHandler=true it can get its string name at least
SolrResponse rsp = new QueryResponse();
NamedList<Object> parsedResponse = BinaryResponseWriter.getParsedResponse(request, response);
try {
rsp.setResponse(parsedResponse);
// page only injected if QueryResponse works
context.put("page", new PageTool(request, response)); // page tool only makes sense for a SearchHandler request... *sigh*
} catch (ClassCastException e) {
// known edge case where QueryResponse's extraction assumes "response" is a SolrDocumentList
// (AnalysisRequestHandler emits a "response")
e.printStackTrace();
rsp = new SolrResponseBase();
rsp.setResponse(parsedResponse);
}
context.put("response", rsp);
// Velocity context tools - TODO: make these pluggable
context.put("esc", new EscapeTool());
context.put("date", new ComparisonDateTool());
context.put("list", new ListTool());
context.put("math", new MathTool());
context.put("number", new NumberTool());
context.put("sort", new SortTool());
context.put("engine", engine); // for $engine.resourceExists(...)
String layout_template = request.getParams().get("v.layout");
String json_wrapper = request.getParams().get("v.json");
boolean wrap_response = (layout_template != null) || (json_wrapper != null);
// create output, optionally wrap it into a json object
if (wrap_response) {
StringWriter stringWriter = new StringWriter();
template.merge(context, stringWriter);
if (layout_template != null) {
context.put("content", stringWriter.toString());
stringWriter = new StringWriter();
try {
engine.getTemplate(layout_template + ".vm").merge(context, stringWriter);
} catch (Exception e) {
throw new IOException(e.getMessage());
}
}
if (json_wrapper != null) {
writer.write(request.getParams().get("v.json") + "(");
writer.write(getJSONWrap(stringWriter.toString()));
writer.write(')');
} else { // using a layout, but not JSON wrapping
writer.write(stringWriter.toString());
}
} else {
template.merge(context, writer);
}
}
// in solr/contrib/velocity/src/java/org/apache/solr/response/VelocityResponseWriter.java
private Template getTemplate(VelocityEngine engine, SolrQueryRequest request) throws IOException {
Template template;
String template_name = request.getParams().get("v.template");
String qt = request.getParams().get("qt");
String path = (String) request.getContext().get("path");
if (template_name == null && path != null) {
template_name = path;
} // TODO: path is never null, so qt won't get picked up maybe special case for '/select' to use qt, otherwise use path?
if (template_name == null && qt != null) {
template_name = qt;
}
if (template_name == null) template_name = "index";
try {
template = engine.getTemplate(template_name + ".vm");
} catch (Exception e) {
throw new IOException(e.getMessage());
}
return template;
}
// in solr/core/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java
DocumentAnalysisRequest resolveAnalysisRequest(SolrQueryRequest req) throws IOException, XMLStreamException {
DocumentAnalysisRequest request = new DocumentAnalysisRequest();
SolrParams params = req.getParams();
String query = params.get(AnalysisParams.QUERY, params.get(CommonParams.Q, null));
request.setQuery(query);
boolean showMatch = params.getBool(AnalysisParams.SHOW_MATCH, false);
request.setShowMatch(showMatch);
ContentStream stream = extractSingleContentStream(req);
InputStream is = null;
XMLStreamReader parser = null;
try {
is = stream.getStream();
final String charset = ContentStreamBase.getCharsetFromContentType(stream.getContentType());
parser = (charset == null) ?
inputFactory.createXMLStreamReader(is) : inputFactory.createXMLStreamReader(is, charset);
while (true) {
int event = parser.next();
switch (event) {
case XMLStreamConstants.END_DOCUMENT: {
parser.close();
return request;
}
case XMLStreamConstants.START_ELEMENT: {
String currTag = parser.getLocalName();
if ("doc".equals(currTag)) {
log.trace("Reading doc...");
SolrInputDocument document = readDocument(parser, req.getSchema());
request.addDocument(document);
}
break;
}
}
}
} finally {
if (parser != null) parser.close();
IOUtils.closeQuietly(is);
}
}
// in solr/core/src/java/org/apache/solr/handler/DumpRequestHandler.java
Override
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException
{
// Show params
rsp.add( "params", req.getParams().toNamedList() );
// Write the streams...
if( req.getContentStreams() != null ) {
ArrayList<NamedList<Object>> streams = new ArrayList<NamedList<Object>>();
// Cycle through each stream
for( ContentStream content : req.getContentStreams() ) {
NamedList<Object> stream = new SimpleOrderedMap<Object>();
stream.add( "name", content.getName() );
stream.add( "sourceInfo", content.getSourceInfo() );
stream.add( "size", content.getSize() );
stream.add( "contentType", content.getContentType() );
Reader reader = content.getReader();
try {
stream.add( "stream", IOUtils.toString(reader) );
} finally {
reader.close();
}
streams.add( stream );
}
rsp.add( "streams", streams );
}
rsp.add("context", req.getContext());
}
// in solr/core/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java
Override
public boolean incrementToken() throws IOException {
if (tokenIterator.hasNext()) {
clearAttributes();
AttributeSource next = tokenIterator.next();
Iterator<Class<? extends Attribute>> atts = next.getAttributeClassesIterator();
while (atts.hasNext()) // make sure all att impls in the token exist here
addAttribute(atts.next());
next.copyTo(this);
return true;
} else {
return false;
}
}
// in solr/core/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java
Override
public void reset() throws IOException {
super.reset();
tokenIterator = tokens.iterator();
}
// in solr/core/src/java/org/apache/solr/handler/SnapPuller.java
NamedList getCommandResponse(NamedList<String> commands) throws IOException {
HttpPost post = new HttpPost(masterUrl);
List<BasicNameValuePair> formparams = new ArrayList<BasicNameValuePair>();
formparams.add(new BasicNameValuePair("wt", "javabin"));
for (Map.Entry<String, String> c : commands) {
formparams.add(new BasicNameValuePair(c.getKey(), c.getValue()));
}
UrlEncodedFormEntity entity = new UrlEncodedFormEntity(formparams, "UTF-8");
post.setEntity(entity);
return getNamedListResponse(post);
}
// in solr/core/src/java/org/apache/solr/handler/SnapPuller.java
private NamedList<?> getNamedListResponse(HttpPost method) throws IOException {
InputStream input = null;
NamedList<?> result = null;
try {
HttpResponse response = myHttpClient.execute(method);
int status = response.getStatusLine().getStatusCode();
if (status != HttpStatus.SC_OK) {
throw new SolrException(SolrException.ErrorCode.SERVICE_UNAVAILABLE,
"Request failed for the url " + method);
}
input = response.getEntity().getContent();
result = (NamedList<?>)new JavaBinCodec().unmarshal(input);
} finally {
try {
if (input != null) {
input.close();
}
} catch (Exception e) {
}
}
return result;
}
// in solr/core/src/java/org/apache/solr/handler/SnapPuller.java
void fetchFileList(long gen) throws IOException {
HttpPost post = new HttpPost(masterUrl);
List<BasicNameValuePair> formparams = new ArrayList<BasicNameValuePair>();
formparams.add(new BasicNameValuePair("wt", "javabin"));
formparams.add(new BasicNameValuePair(COMMAND, CMD_GET_FILE_LIST));
formparams.add(new BasicNameValuePair(GENERATION, String.valueOf(gen)));
UrlEncodedFormEntity entity = new UrlEncodedFormEntity(formparams, "UTF-8");
post.setEntity(entity);
@SuppressWarnings("unchecked")
NamedList<List<Map<String, Object>>> nl
= (NamedList<List<Map<String, Object>>>) getNamedListResponse(post);
List<Map<String, Object>> f = nl.get(CMD_GET_FILE_LIST);
if (f != null)
filesToDownload = Collections.synchronizedList(f);
else {
filesToDownload = Collections.emptyList();
LOG.error("No files to download for index generation: "+ gen);
}
f = nl.get(CONF_FILES);
if (f != null)
confFilesToDownload = Collections.synchronizedList(f);
}
// in solr/core/src/java/org/apache/solr/handler/SnapPuller.java
boolean fetchLatestIndex(SolrCore core, boolean force) throws IOException, InterruptedException {
successfulInstall = false;
replicationStartTime = System.currentTimeMillis();
try {
//get the current 'replicateable' index version in the master
NamedList response = null;
try {
response = getLatestVersion();
} catch (Exception e) {
LOG.error("Master at: " + masterUrl + " is not available. Index fetch failed. Exception: " + e.getMessage());
return false;
}
long latestVersion = (Long) response.get(CMD_INDEX_VERSION);
long latestGeneration = (Long) response.get(GENERATION);
IndexCommit commit;
RefCounted<SolrIndexSearcher> searcherRefCounted = null;
try {
searcherRefCounted = core.getNewestSearcher(false);
if (searcherRefCounted == null) {
SolrException.log(LOG, "No open searcher found - fetch aborted");
return false;
}
commit = searcherRefCounted.get().getIndexReader().getIndexCommit();
} finally {
if (searcherRefCounted != null)
searcherRefCounted.decref();
}
if (latestVersion == 0L) {
if (force && commit.getGeneration() != 0) {
// since we won't get the files for an empty index,
// we just clear ours and commit
core.getUpdateHandler().getSolrCoreState().getIndexWriter(core).deleteAll();
SolrQueryRequest req = new LocalSolrQueryRequest(core,
new ModifiableSolrParams());
core.getUpdateHandler().commit(new CommitUpdateCommand(req, false));
}
//there is nothing to be replicated
successfulInstall = true;
return true;
}
if (!force && IndexDeletionPolicyWrapper.getCommitTimestamp(commit) == latestVersion) {
//master and slave are already in sync just return
LOG.info("Slave in sync with master.");
successfulInstall = true;
return true;
}
LOG.info("Master's generation: " + latestGeneration);
LOG.info("Slave's generation: " + commit.getGeneration());
LOG.info("Starting replication process");
// get the list of files first
fetchFileList(latestGeneration);
// this can happen if the commit point is deleted before we fetch the file list.
if(filesToDownload.isEmpty()) return false;
LOG.info("Number of files in latest index in master: " + filesToDownload.size());
// Create the sync service
fsyncService = Executors.newSingleThreadExecutor();
// use a synchronized list because the list is read by other threads (to show details)
filesDownloaded = Collections.synchronizedList(new ArrayList<Map<String, Object>>());
// if the generateion of master is older than that of the slave , it means they are not compatible to be copied
// then a new index direcory to be created and all the files need to be copied
boolean isFullCopyNeeded = IndexDeletionPolicyWrapper.getCommitTimestamp(commit) >= latestVersion || force;
File tmpIndexDir = createTempindexDir(core);
if (isIndexStale())
isFullCopyNeeded = true;
successfulInstall = false;
boolean deleteTmpIdxDir = true;
File indexDir = null ;
try {
indexDir = new File(core.getIndexDir());
downloadIndexFiles(isFullCopyNeeded, tmpIndexDir, latestGeneration);
LOG.info("Total time taken for download : " + ((System.currentTimeMillis() - replicationStartTime) / 1000) + " secs");
Collection<Map<String, Object>> modifiedConfFiles = getModifiedConfFiles(confFilesToDownload);
if (!modifiedConfFiles.isEmpty()) {
downloadConfFiles(confFilesToDownload, latestGeneration);
if (isFullCopyNeeded) {
successfulInstall = modifyIndexProps(tmpIndexDir.getName());
deleteTmpIdxDir = false;
} else {
successfulInstall = copyIndexFiles(tmpIndexDir, indexDir);
}
if (successfulInstall) {
LOG.info("Configuration files are modified, core will be reloaded");
logReplicationTimeAndConfFiles(modifiedConfFiles, successfulInstall);//write to a file time of replication and conf files.
reloadCore();
}
} else {
terminateAndWaitFsyncService();
if (isFullCopyNeeded) {
successfulInstall = modifyIndexProps(tmpIndexDir.getName());
deleteTmpIdxDir = false;
} else {
successfulInstall = copyIndexFiles(tmpIndexDir, indexDir);
}
if (successfulInstall) {
logReplicationTimeAndConfFiles(modifiedConfFiles, successfulInstall);
doCommit();
}
}
replicationStartTime = 0;
return successfulInstall;
} catch (ReplicationHandlerException e) {
LOG.error("User aborted Replication");
return false;
} catch (SolrException e) {
throw e;
} catch (InterruptedException e) {
throw new InterruptedException("Index fetch interrupted");
} catch (Exception e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Index fetch failed : ", e);
} finally {
if (deleteTmpIdxDir) delTree(tmpIndexDir);
else delTree(indexDir);
}
} finally {
if (!successfulInstall) {
logReplicationTimeAndConfFiles(null, successfulInstall);
}
filesToDownload = filesDownloaded = confFilesDownloaded = confFilesToDownload = null;
replicationStartTime = 0;
fileFetcher = null;
if (fsyncService != null && !fsyncService.isShutdown()) fsyncService.shutdownNow();
fsyncService = null;
stop = false;
fsyncException = null;
}
}
// in solr/core/src/java/org/apache/solr/handler/SnapPuller.java
private void doCommit() throws IOException {
SolrQueryRequest req = new LocalSolrQueryRequest(solrCore,
new ModifiableSolrParams());
// reboot the writer on the new index and get a new searcher
solrCore.getUpdateHandler().newIndexWriter();
try {
// first try to open an NRT searcher so that the new
// IndexWriter is registered with the reader
Future[] waitSearcher = new Future[1];
solrCore.getSearcher(true, false, waitSearcher, true);
if (waitSearcher[0] != null) {
try {
waitSearcher[0].get();
} catch (InterruptedException e) {
SolrException.log(LOG,e);
} catch (ExecutionException e) {
SolrException.log(LOG,e);
}
}
// update our commit point to the right dir
solrCore.getUpdateHandler().commit(new CommitUpdateCommand(req, false));
} finally {
req.close();
}
}
// in solr/core/src/java/org/apache/solr/handler/SnapPuller.java
private boolean copyIndexFiles(File tmpIdxDir, File indexDir) throws IOException {
String segmentsFile = null;
List<String> copiedfiles = new ArrayList<String>();
for (Map<String, Object> f : filesDownloaded) {
String fname = (String) f.get(NAME);
// the segments file must be copied last
// or else if there is a failure in between the
// index will be corrupted
if (fname.startsWith("segments_")) {
//The segments file must be copied in the end
//Otherwise , if the copy fails index ends up corrupted
segmentsFile = fname;
continue;
}
if (!copyAFile(tmpIdxDir, indexDir, fname, copiedfiles)) return false;
copiedfiles.add(fname);
}
//copy the segments file last
if (segmentsFile != null) {
if (!copyAFile(tmpIdxDir, indexDir, segmentsFile, copiedfiles)) return false;
}
return true;
}
// in solr/core/src/java/org/apache/solr/handler/SnapPuller.java
private void copyTmpConfFiles2Conf(File tmpconfDir) throws IOException {
File confDir = new File(solrCore.getResourceLoader().getConfigDir());
for (File file : tmpconfDir.listFiles()) {
File oldFile = new File(confDir, file.getName());
if (oldFile.exists()) {
File backupFile = new File(confDir, oldFile.getName() + "." + getDateAsStr(new Date(oldFile.lastModified())));
boolean status = oldFile.renameTo(backupFile);
if (!status) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"Unable to rename: " + oldFile + " to: " + backupFile);
}
}
boolean status = file.renameTo(oldFile);
if (status) {
} else {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"Unable to rename: " + file + " to: " + oldFile);
}
}
}
// in solr/core/src/java/org/apache/solr/handler/SnapPuller.java
FastInputStream getStream() throws IOException {
post = new HttpPost(masterUrl);
//the method is command=filecontent
List<BasicNameValuePair> formparams = new ArrayList<BasicNameValuePair>();
formparams.add(new BasicNameValuePair(COMMAND, CMD_GET_FILE));
//add the version to download. This is used to reserve the download
formparams.add(new BasicNameValuePair(GENERATION, indexGen.toString()));
if (isConf) {
//set cf instead of file for config file
formparams.add(new BasicNameValuePair(CONF_FILE_SHORT, fileName));
} else {
formparams.add(new BasicNameValuePair(FILE, fileName));
}
if (useInternal) {
formparams.add(new BasicNameValuePair(COMPRESSION, "true"));
}
if (useExternal) {
formparams.add(new BasicNameValuePair("Accept-Encoding", "gzip,deflate"));
}
//use checksum
if (this.includeChecksum)
formparams.add(new BasicNameValuePair(CHECKSUM, "true"));
//wt=filestream this is a custom protocol
formparams.add(new BasicNameValuePair("wt", FILE_STREAM));
// This happen if there is a failure there is a retry. the offset=<sizedownloaded> ensures that
// the server starts from the offset
if (bytesDownloaded > 0) {
formparams.add(new BasicNameValuePair(OFFSET, "" + bytesDownloaded));
}
UrlEncodedFormEntity entity = new UrlEncodedFormEntity(formparams, "UTF-8");
post.setEntity(entity);
HttpResponse response = myHttpClient.execute(post);
InputStream is = response.getEntity().getContent();
//wrap it using FastInputStream
if (useInternal) {
is = new InflaterInputStream(is);
} else if (useExternal) {
is = checkCompressed(post, is);
}
return new FastInputStream(is);
}
// in solr/core/src/java/org/apache/solr/handler/SnapPuller.java
private InputStream checkCompressed(AbstractHttpMessage method, InputStream respBody) throws IOException {
Header contentEncodingHeader = method.getFirstHeader("Content-Encoding");
if (contentEncodingHeader != null) {
String contentEncoding = contentEncodingHeader.getValue();
if (contentEncoding.contains("gzip")) {
respBody = new GZIPInputStream(respBody);
} else if (contentEncoding.contains("deflate")) {
respBody = new InflaterInputStream(respBody);
}
} else {
Header contentTypeHeader = method.getFirstHeader("Content-Type");
if (contentTypeHeader != null) {
String contentType = contentTypeHeader.getValue();
if (contentType != null) {
if (contentType.startsWith("application/x-gzip-compressed")) {
respBody = new GZIPInputStream(respBody);
} else if (contentType.startsWith("application/x-deflate")) {
respBody = new InflaterInputStream(respBody);
}
}
}
}
return respBody;
}
// in solr/core/src/java/org/apache/solr/handler/loader/XMLLoader.java
Transformer getTransformer(String xslt, SolrQueryRequest request) throws IOException {
// not the cleanest way to achieve this
// no need to synchronize access to context, right?
// Nothing else happens with it at the same time
final Map<Object,Object> ctx = request.getContext();
Transformer result = (Transformer)ctx.get(CONTEXT_TRANSFORMER_KEY);
if(result==null) {
SolrConfig solrConfig = request.getCore().getSolrConfig();
result = TransformerProvider.instance.getTransformer(solrConfig, xslt, xsltCacheLifetimeSeconds);
result.setErrorListener(xmllog);
ctx.put(CONTEXT_TRANSFORMER_KEY,result);
}
return result;
}
// in solr/core/src/java/org/apache/solr/handler/loader/XMLLoader.java
void processUpdate(SolrQueryRequest req, UpdateRequestProcessor processor, XMLStreamReader parser)
throws XMLStreamException, IOException, FactoryConfigurationError,
InstantiationException, IllegalAccessException,
TransformerConfigurationException {
AddUpdateCommand addCmd = null;
SolrParams params = req.getParams();
while (true) {
int event = parser.next();
switch (event) {
case XMLStreamConstants.END_DOCUMENT:
parser.close();
return;
case XMLStreamConstants.START_ELEMENT:
String currTag = parser.getLocalName();
if (currTag.equals(UpdateRequestHandler.ADD)) {
log.trace("SolrCore.update(add)");
addCmd = new AddUpdateCommand(req);
// First look for commitWithin parameter on the request, will be overwritten for individual <add>'s
addCmd.commitWithin = params.getInt(UpdateParams.COMMIT_WITHIN, -1);
addCmd.overwrite = params.getBool(UpdateParams.OVERWRITE, true);
for (int i = 0; i < parser.getAttributeCount(); i++) {
String attrName = parser.getAttributeLocalName(i);
String attrVal = parser.getAttributeValue(i);
if (UpdateRequestHandler.OVERWRITE.equals(attrName)) {
addCmd.overwrite = StrUtils.parseBoolean(attrVal);
} else if (UpdateRequestHandler.COMMIT_WITHIN.equals(attrName)) {
addCmd.commitWithin = Integer.parseInt(attrVal);
} else {
log.warn("Unknown attribute id in add:" + attrName);
}
}
} else if ("doc".equals(currTag)) {
if(addCmd != null) {
log.trace("adding doc...");
addCmd.clear();
addCmd.solrDoc = readDoc(parser);
processor.processAdd(addCmd);
} else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unexpected <doc> tag without an <add> tag surrounding it.");
}
} else if (UpdateRequestHandler.COMMIT.equals(currTag) || UpdateRequestHandler.OPTIMIZE.equals(currTag)) {
log.trace("parsing " + currTag);
CommitUpdateCommand cmd = new CommitUpdateCommand(req, UpdateRequestHandler.OPTIMIZE.equals(currTag));
ModifiableSolrParams mp = new ModifiableSolrParams();
for (int i = 0; i < parser.getAttributeCount(); i++) {
String attrName = parser.getAttributeLocalName(i);
String attrVal = parser.getAttributeValue(i);
mp.set(attrName, attrVal);
}
RequestHandlerUtils.validateCommitParams(mp);
SolrParams p = SolrParams.wrapDefaults(mp, req.getParams()); // default to the normal request params for commit options
RequestHandlerUtils.updateCommit(cmd, p);
processor.processCommit(cmd);
} // end commit
else if (UpdateRequestHandler.ROLLBACK.equals(currTag)) {
log.trace("parsing " + currTag);
RollbackUpdateCommand cmd = new RollbackUpdateCommand(req);
processor.processRollback(cmd);
} // end rollback
else if (UpdateRequestHandler.DELETE.equals(currTag)) {
log.trace("parsing delete");
processDelete(req, processor, parser);
} // end delete
break;
}
}
}
// in solr/core/src/java/org/apache/solr/handler/loader/XMLLoader.java
void processDelete(SolrQueryRequest req, UpdateRequestProcessor processor, XMLStreamReader parser) throws XMLStreamException, IOException {
// Parse the command
DeleteUpdateCommand deleteCmd = new DeleteUpdateCommand(req);
// First look for commitWithin parameter on the request, will be overwritten for individual <delete>'s
SolrParams params = req.getParams();
deleteCmd.commitWithin = params.getInt(UpdateParams.COMMIT_WITHIN, -1);
for (int i = 0; i < parser.getAttributeCount(); i++) {
String attrName = parser.getAttributeLocalName(i);
String attrVal = parser.getAttributeValue(i);
if ("fromPending".equals(attrName)) {
// deprecated
} else if ("fromCommitted".equals(attrName)) {
// deprecated
} else if (UpdateRequestHandler.COMMIT_WITHIN.equals(attrName)) {
deleteCmd.commitWithin = Integer.parseInt(attrVal);
} else {
log.warn("unexpected attribute delete/@" + attrName);
}
}
StringBuilder text = new StringBuilder();
while (true) {
int event = parser.next();
switch (event) {
case XMLStreamConstants.START_ELEMENT:
String mode = parser.getLocalName();
if (!("id".equals(mode) || "query".equals(mode))) {
log.warn("unexpected XML tag /delete/" + mode);
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"unexpected XML tag /delete/" + mode);
}
text.setLength(0);
if ("id".equals(mode)) {
for (int i = 0; i < parser.getAttributeCount(); i++) {
String attrName = parser.getAttributeLocalName(i);
String attrVal = parser.getAttributeValue(i);
if (UpdateRequestHandler.VERSION.equals(attrName)) {
deleteCmd.setVersion(Long.parseLong(attrVal));
}
}
}
break;
case XMLStreamConstants.END_ELEMENT:
String currTag = parser.getLocalName();
if ("id".equals(currTag)) {
deleteCmd.setId(text.toString());
} else if ("query".equals(currTag)) {
deleteCmd.setQuery(text.toString());
} else if ("delete".equals(currTag)) {
return;
} else {
log.warn("unexpected XML tag /delete/" + currTag);
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"unexpected XML tag /delete/" + currTag);
}
processor.processDelete(deleteCmd);
deleteCmd.clear();
break;
// Add everything to the text
case XMLStreamConstants.SPACE:
case XMLStreamConstants.CDATA:
case XMLStreamConstants.CHARACTERS:
text.append(parser.getText());
break;
}
}
}
// in solr/core/src/java/org/apache/solr/handler/loader/JavabinLoader.java
private void parseAndLoadDocs(final SolrQueryRequest req, SolrQueryResponse rsp, InputStream stream,
final UpdateRequestProcessor processor) throws IOException {
UpdateRequest update = null;
JavaBinUpdateRequestCodec.StreamingUpdateHandler handler = new JavaBinUpdateRequestCodec.StreamingUpdateHandler() {
private AddUpdateCommand addCmd = null;
@Override
public void update(SolrInputDocument document, UpdateRequest updateRequest) {
if (document == null) {
// Perhaps commit from the parameters
try {
RequestHandlerUtils.handleCommit(req, processor, updateRequest.getParams(), false);
RequestHandlerUtils.handleRollback(req, processor, updateRequest.getParams(), false);
} catch (IOException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "ERROR handling commit/rollback");
}
return;
}
if (addCmd == null) {
addCmd = getAddCommand(req, updateRequest.getParams());
}
addCmd.solrDoc = document;
try {
processor.processAdd(addCmd);
addCmd.clear();
} catch (IOException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "ERROR adding document " + document);
}
}
};
FastInputStream in = FastInputStream.wrap(stream);
for (; ; ) {
try {
update = new JavaBinUpdateRequestCodec().unmarshal(in, handler);
} catch (EOFException e) {
break; // this is expected
} catch (Exception e) {
log.error("Exception while processing update request", e);
break;
}
if (update.getDeleteById() != null || update.getDeleteQuery() != null) {
delete(req, update, processor);
}
}
}
// in solr/core/src/java/org/apache/solr/handler/loader/JavabinLoader.java
private void delete(SolrQueryRequest req, UpdateRequest update, UpdateRequestProcessor processor) throws IOException {
SolrParams params = update.getParams();
DeleteUpdateCommand delcmd = new DeleteUpdateCommand(req);
if(params != null) {
delcmd.commitWithin = params.getInt(UpdateParams.COMMIT_WITHIN, -1);
}
if(update.getDeleteById() != null) {
for (String s : update.getDeleteById()) {
delcmd.id = s;
processor.processDelete(delcmd);
}
delcmd.id = null;
}
if(update.getDeleteQuery() != null) {
for (String s : update.getDeleteQuery()) {
delcmd.query = s;
processor.processDelete(delcmd);
}
}
}
// in solr/core/src/java/org/apache/solr/handler/loader/JsonLoader.java
DeleteUpdateCommand parseDelete() throws IOException {
assertNextEvent( JSONParser.OBJECT_START );
DeleteUpdateCommand cmd = new DeleteUpdateCommand(req);
cmd.commitWithin = commitWithin;
while( true ) {
int ev = parser.nextEvent();
if( ev == JSONParser.STRING ) {
String key = parser.getString();
if( parser.wasKey() ) {
if( "id".equals( key ) ) {
cmd.setId(parser.getString());
}
else if( "query".equals(key) ) {
cmd.setQuery(parser.getString());
}
else if( "commitWithin".equals(key) ) {
cmd.commitWithin = Integer.parseInt(parser.getString());
} else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown key: "+key+" ["+parser.getPosition()+"]" );
}
}
else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"invalid string: " + key
+" at ["+parser.getPosition()+"]" );
}
}
else if( ev == JSONParser.OBJECT_END ) {
if( cmd.getId() == null && cmd.getQuery() == null ) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Missing id or query for delete ["+parser.getPosition()+"]" );
}
return cmd;
}
else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Got: "+JSONParser.getEventString( ev )
+" at ["+parser.getPosition()+"]" );
}
}
}
// in solr/core/src/java/org/apache/solr/handler/loader/JsonLoader.java
RollbackUpdateCommand parseRollback() throws IOException {
assertNextEvent( JSONParser.OBJECT_START );
assertNextEvent( JSONParser.OBJECT_END );
return new RollbackUpdateCommand(req);
}
// in solr/core/src/java/org/apache/solr/handler/loader/JsonLoader.java
void parseCommitOptions(CommitUpdateCommand cmd ) throws IOException
{
assertNextEvent( JSONParser.OBJECT_START );
final Map<String,Object> map = (Map)ObjectBuilder.getVal(parser);
// SolrParams currently expects string values...
SolrParams p = new SolrParams() {
@Override
public String get(String param) {
Object o = map.get(param);
return o == null ? null : o.toString();
}
@Override
public String[] getParams(String param) {
return new String[]{get(param)};
}
@Override
public Iterator<String> getParameterNamesIterator() {
return map.keySet().iterator();
}
};
RequestHandlerUtils.validateCommitParams(p);
p = SolrParams.wrapDefaults(p, req.getParams()); // default to the normal request params for commit options
RequestHandlerUtils.updateCommit(cmd, p);
}
// in solr/core/src/java/org/apache/solr/handler/loader/JsonLoader.java
AddUpdateCommand parseAdd() throws IOException
{
AddUpdateCommand cmd = new AddUpdateCommand(req);
cmd.commitWithin = commitWithin;
cmd.overwrite = overwrite;
float boost = 1.0f;
while( true ) {
int ev = parser.nextEvent();
if( ev == JSONParser.STRING ) {
if( parser.wasKey() ) {
String key = parser.getString();
if( "doc".equals( key ) ) {
if( cmd.solrDoc != null ) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "multiple docs in same add command" );
}
ev = assertNextEvent( JSONParser.OBJECT_START );
cmd.solrDoc = parseDoc( ev );
}
else if( UpdateRequestHandler.OVERWRITE.equals( key ) ) {
cmd.overwrite = parser.getBoolean(); // reads next boolean
}
else if( UpdateRequestHandler.COMMIT_WITHIN.equals( key ) ) {
cmd.commitWithin = (int)parser.getLong();
}
else if( "boost".equals( key ) ) {
boost = Float.parseFloat( parser.getNumberChars().toString() );
}
else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown key: "+key+" ["+parser.getPosition()+"]" );
}
}
else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Should be a key "
+" at ["+parser.getPosition()+"]" );
}
}
else if( ev == JSONParser.OBJECT_END ) {
if( cmd.solrDoc == null ) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"missing solr document. "+parser.getPosition() );
}
cmd.solrDoc.setDocumentBoost( boost );
return cmd;
}
else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Got: "+JSONParser.getEventString( ev )
+" at ["+parser.getPosition()+"]" );
}
}
}
// in solr/core/src/java/org/apache/solr/handler/loader/JsonLoader.java
void handleAdds() throws IOException
{
while( true ) {
AddUpdateCommand cmd = new AddUpdateCommand(req);
cmd.commitWithin = commitWithin;
cmd.overwrite = overwrite;
int ev = parser.nextEvent();
if (ev == JSONParser.ARRAY_END) break;
assertEvent(ev, JSONParser.OBJECT_START);
cmd.solrDoc = parseDoc(ev);
processor.processAdd(cmd);
}
}
// in solr/core/src/java/org/apache/solr/handler/loader/JsonLoader.java
int assertNextEvent(int expected ) throws IOException
{
int got = parser.nextEvent();
assertEvent(got, expected);
return got;
}
// in solr/core/src/java/org/apache/solr/handler/loader/JsonLoader.java
private SolrInputDocument parseDoc(int ev) throws IOException {
assert ev == JSONParser.OBJECT_START;
SolrInputDocument sdoc = new SolrInputDocument();
for (;;) {
SolrInputField sif = parseField();
if (sif == null) return sdoc;
SolrInputField prev = sdoc.put(sif.getName(), sif);
if (prev != null) {
// blech - repeated keys
sif.addValue(prev.getValue(), prev.getBoost());
}
}
}
// in solr/core/src/java/org/apache/solr/handler/loader/JsonLoader.java
private SolrInputField parseField() throws IOException {
int ev = parser.nextEvent();
if (ev == JSONParser.OBJECT_END) {
return null;
}
String fieldName = parser.getString();
SolrInputField sif = new SolrInputField(fieldName);
parseFieldValue(sif);
return sif;
}
// in solr/core/src/java/org/apache/solr/handler/loader/JsonLoader.java
private void parseFieldValue(SolrInputField sif) throws IOException {
int ev = parser.nextEvent();
if (ev == JSONParser.OBJECT_START) {
parseExtendedFieldValue(sif, ev);
} else {
Object val = parseNormalFieldValue(ev);
sif.setValue(val, 1.0f);
}
}
// in solr/core/src/java/org/apache/solr/handler/loader/JsonLoader.java
private void parseExtendedFieldValue(SolrInputField sif, int ev) throws IOException {
assert ev == JSONParser.OBJECT_START;
float boost = 1.0f;
Object normalFieldValue = null;
Map<String, Object> extendedInfo = null;
for (;;) {
ev = parser.nextEvent();
switch (ev) {
case JSONParser.STRING:
String label = parser.getString();
if ("boost".equals(label)) {
ev = parser.nextEvent();
if( ev != JSONParser.NUMBER &&
ev != JSONParser.LONG &&
ev != JSONParser.BIGNUMBER ) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "boost should have number! "+JSONParser.getEventString(ev) );
}
boost = (float)parser.getDouble();
} else if ("value".equals(label)) {
normalFieldValue = parseNormalFieldValue(parser.nextEvent());
} else {
// If we encounter other unknown map keys, then use a map
if (extendedInfo == null) {
extendedInfo = new HashMap<String, Object>(2);
}
// for now, the only extended info will be field values
// we could either store this as an Object or a SolrInputField
Object val = parseNormalFieldValue(parser.nextEvent());
extendedInfo.put(label, val);
}
break;
case JSONParser.OBJECT_END:
if (extendedInfo != null) {
if (normalFieldValue != null) {
extendedInfo.put("value",normalFieldValue);
}
sif.setValue(extendedInfo, boost);
} else {
sif.setValue(normalFieldValue, boost);
}
return;
default:
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error parsing JSON extended field value. Unexpected "+JSONParser.getEventString(ev) );
}
}
}
// in solr/core/src/java/org/apache/solr/handler/loader/JsonLoader.java
private Object parseNormalFieldValue(int ev) throws IOException {
if (ev == JSONParser.ARRAY_START) {
List<Object> val = parseArrayFieldValue(ev);
return val;
} else {
Object val = parseSingleFieldValue(ev);
return val;
}
}
// in solr/core/src/java/org/apache/solr/handler/loader/JsonLoader.java
private Object parseSingleFieldValue(int ev) throws IOException {
switch (ev) {
case JSONParser.STRING:
return parser.getString();
case JSONParser.LONG:
case JSONParser.NUMBER:
case JSONParser.BIGNUMBER:
return parser.getNumberChars().toString();
case JSONParser.BOOLEAN:
return Boolean.toString(parser.getBoolean()); // for legacy reasons, single values s are expected to be strings
case JSONParser.NULL:
parser.getNull();
return null;
case JSONParser.ARRAY_START:
return parseArrayFieldValue(ev);
default:
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error parsing JSON field value. Unexpected "+JSONParser.getEventString(ev) );
}
}
// in solr/core/src/java/org/apache/solr/handler/loader/JsonLoader.java
private List<Object> parseArrayFieldValue(int ev) throws IOException {
assert ev == JSONParser.ARRAY_START;
ArrayList lst = new ArrayList(2);
for (;;) {
ev = parser.nextEvent();
if (ev == JSONParser.ARRAY_END) {
return lst;
}
Object val = parseSingleFieldValue(ev);
lst.add(val);
}
}
// in solr/core/src/java/org/apache/solr/handler/loader/CSVLoader.java
Override
void addDoc(int line, String[] vals) throws IOException {
templateAdd.clear();
SolrInputDocument doc = new SolrInputDocument();
doAdd(line, vals, doc, templateAdd);
}
// in solr/core/src/java/org/apache/solr/handler/loader/CSVLoaderBase.java
Override
public void load(SolrQueryRequest req, SolrQueryResponse rsp, ContentStream stream, UpdateRequestProcessor processor) throws IOException {
errHeader = "CSVLoader: input=" + stream.getSourceInfo();
Reader reader = null;
try {
reader = stream.getReader();
if (skipLines>0) {
if (!(reader instanceof BufferedReader)) {
reader = new BufferedReader(reader);
}
BufferedReader r = (BufferedReader)reader;
for (int i=0; i<skipLines; i++) {
r.readLine();
}
}
CSVParser parser = new CSVParser(reader, strategy);
// parse the fieldnames from the header of the file
if (fieldnames==null) {
fieldnames = parser.getLine();
if (fieldnames==null) {
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Expected fieldnames in CSV input");
}
prepareFields();
}
// read the rest of the CSV file
for(;;) {
int line = parser.getLineNumber(); // for error reporting in MT mode
String[] vals = null;
try {
vals = parser.getLine();
} catch (IOException e) {
//Catch the exception and rethrow it with more line information
input_err("can't read line: " + line, null, line, e);
}
if (vals==null) break;
if (vals.length != fields.length) {
input_err("expected "+fields.length+" values but got "+vals.length, vals, line);
}
addDoc(line,vals);
}
} finally{
if (reader != null) {
IOUtils.closeQuietly(reader);
}
}
}
// in solr/core/src/java/org/apache/solr/handler/loader/CSVLoaderBase.java
void doAdd(int line, String[] vals, SolrInputDocument doc, AddUpdateCommand template) throws IOException {
// the line number is passed simply for error reporting in MT mode.
// first, create the lucene document
for (int i=0; i<vals.length; i++) {
if (fields[i]==null) continue; // ignore this field
String val = vals[i];
adders[i].add(doc, line, i, val);
}
// add any literals
for (SchemaField sf : literals.keySet()) {
String fn = sf.getName();
String val = literals.get(sf);
doc.addField(fn, val);
}
template.solrDoc = doc;
processor.processAdd(template);
}
// in solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java
private void registerFileStreamResponseWriter() {
core.registerResponseWriter(FILE_STREAM, new BinaryQueryResponseWriter() {
public void write(OutputStream out, SolrQueryRequest request, SolrQueryResponse resp) throws IOException {
FileStream stream = (FileStream) resp.getValues().get(FILE_STREAM);
stream.write(out);
}
public void write(Writer writer, SolrQueryRequest request, SolrQueryResponse response) throws IOException {
throw new RuntimeException("This is a binary writer , Cannot write to a characterstream");
}
public String getContentType(SolrQueryRequest request, SolrQueryResponse response) {
return "application/octet-stream";
}
public void init(NamedList args) { /*no op*/ }
});
}
// in solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java
public void write(OutputStream out, SolrQueryRequest request, SolrQueryResponse resp) throws IOException {
FileStream stream = (FileStream) resp.getValues().get(FILE_STREAM);
stream.write(out);
}
// in solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java
public void write(Writer writer, SolrQueryRequest request, SolrQueryResponse response) throws IOException {
throw new RuntimeException("This is a binary writer , Cannot write to a characterstream");
}
// in solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java
public void write(OutputStream out) throws IOException {
String fileName = params.get(FILE);
String cfileName = params.get(CONF_FILE_SHORT);
String sOffset = params.get(OFFSET);
String sLen = params.get(LEN);
String compress = params.get(COMPRESSION);
String sChecksum = params.get(CHECKSUM);
String sGen = params.get(GENERATION);
if (sGen != null) indexGen = Long.parseLong(sGen);
if (Boolean.parseBoolean(compress)) {
fos = new FastOutputStream(new DeflaterOutputStream(out));
} else {
fos = new FastOutputStream(out);
}
FileInputStream inputStream = null;
int packetsWritten = 0;
try {
long offset = -1;
int len = -1;
//check if checksum is requested
boolean useChecksum = Boolean.parseBoolean(sChecksum);
if (sOffset != null)
offset = Long.parseLong(sOffset);
if (sLen != null)
len = Integer.parseInt(sLen);
if (fileName == null && cfileName == null) {
//no filename do nothing
writeNothing();
}
File file = null;
if (cfileName != null) {
//if if is a conf file read from config diectory
file = new File(core.getResourceLoader().getConfigDir(), cfileName);
} else {
//else read from the indexdirectory
file = new File(core.getIndexDir(), fileName);
}
if (file.exists() && file.canRead()) {
inputStream = new FileInputStream(file);
FileChannel channel = inputStream.getChannel();
//if offset is mentioned move the pointer to that point
if (offset != -1)
channel.position(offset);
byte[] buf = new byte[(len == -1 || len > PACKET_SZ) ? PACKET_SZ : len];
Checksum checksum = null;
if (useChecksum)
checksum = new Adler32();
ByteBuffer bb = ByteBuffer.wrap(buf);
while (true) {
bb.clear();
long bytesRead = channel.read(bb);
if (bytesRead <= 0) {
writeNothing();
fos.close();
break;
}
fos.writeInt((int) bytesRead);
if (useChecksum) {
checksum.reset();
checksum.update(buf, 0, (int) bytesRead);
fos.writeLong(checksum.getValue());
}
fos.write(buf, 0, (int) bytesRead);
fos.flush();
if (indexGen != null && (packetsWritten % 5 == 0)) {
//after every 5 packets reserve the commitpoint for some time
delPolicy.setReserveDuration(indexGen, reserveCommitDuration);
}
packetsWritten++;
}
} else {
writeNothing();
}
} catch (IOException e) {
LOG.warn("Exception while writing response for params: " + params, e);
} finally {
IOUtils.closeQuietly(inputStream);
}
}
// in solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java
private void writeNothing() throws IOException {
fos.writeInt(0);
fos.flush();
}
// in solr/core/src/java/org/apache/solr/handler/RequestHandlerUtils.java
public static boolean handleCommit(SolrQueryRequest req, UpdateRequestProcessor processor, SolrParams params, boolean force ) throws IOException
{
if( params == null) {
params = new MapSolrParams( new HashMap<String, String>() );
}
boolean optimize = params.getBool( UpdateParams.OPTIMIZE, false );
boolean commit = params.getBool( UpdateParams.COMMIT, false );
boolean softCommit = params.getBool( UpdateParams.SOFT_COMMIT, false );
boolean prepareCommit = params.getBool( UpdateParams.PREPARE_COMMIT, false );
if( optimize || commit || softCommit || prepareCommit || force ) {
CommitUpdateCommand cmd = new CommitUpdateCommand(req, optimize );
updateCommit(cmd, params);
processor.processCommit( cmd );
return true;
}
return false;
}
// in solr/core/src/java/org/apache/solr/handler/RequestHandlerUtils.java
public static void updateCommit(CommitUpdateCommand cmd, SolrParams params) throws IOException
{
if( params == null ) return;
cmd.openSearcher = params.getBool( UpdateParams.OPEN_SEARCHER, cmd.openSearcher );
cmd.waitSearcher = params.getBool( UpdateParams.WAIT_SEARCHER, cmd.waitSearcher );
cmd.softCommit = params.getBool( UpdateParams.SOFT_COMMIT, cmd.softCommit );
cmd.expungeDeletes = params.getBool( UpdateParams.EXPUNGE_DELETES, cmd.expungeDeletes );
cmd.maxOptimizeSegments = params.getInt( UpdateParams.MAX_OPTIMIZE_SEGMENTS, cmd.maxOptimizeSegments );
cmd.prepareCommit = params.getBool( UpdateParams.PREPARE_COMMIT, cmd.prepareCommit );
}
// in solr/core/src/java/org/apache/solr/handler/RequestHandlerUtils.java
public static boolean handleRollback(SolrQueryRequest req, UpdateRequestProcessor processor, SolrParams params, boolean force ) throws IOException
{
if( params == null ) {
params = new MapSolrParams( new HashMap<String, String>() );
}
boolean rollback = params.getBool( UpdateParams.ROLLBACK, false );
if( rollback || force ) {
RollbackUpdateCommand cmd = new RollbackUpdateCommand(req);
processor.processRollback( cmd );
return true;
}
return false;
}
// in solr/core/src/java/org/apache/solr/handler/MoreLikeThisHandler.java
public DocListAndSet getMoreLikeThis( int id, int start, int rows, List<Query> filters, List<InterestingTerm> terms, int flags ) throws IOException
{
Document doc = reader.document(id);
rawMLTQuery = mlt.like(id);
boostedMLTQuery = getBoostedQuery( rawMLTQuery );
if( terms != null ) {
fillInterestingTermsFromMLTQuery( rawMLTQuery, terms );
}
// exclude current document from results
realMLTQuery = new BooleanQuery();
realMLTQuery.add(boostedMLTQuery, BooleanClause.Occur.MUST);
realMLTQuery.add(
new TermQuery(new Term(uniqueKeyField.getName(), uniqueKeyField.getType().storedToIndexed(doc.getField(uniqueKeyField.getName())))),
BooleanClause.Occur.MUST_NOT);
DocListAndSet results = new DocListAndSet();
if (this.needDocSet) {
results = searcher.getDocListAndSet(realMLTQuery, filters, null, start, rows, flags);
} else {
results.docList = searcher.getDocList(realMLTQuery, filters, null, start, rows, flags);
}
return results;
}
// in solr/core/src/java/org/apache/solr/handler/MoreLikeThisHandler.java
public DocListAndSet getMoreLikeThis( Reader reader, int start, int rows, List<Query> filters, List<InterestingTerm> terms, int flags ) throws IOException
{
// analyzing with the first field: previous (stupid) behavior
rawMLTQuery = mlt.like(reader, mlt.getFieldNames()[0]);
boostedMLTQuery = getBoostedQuery( rawMLTQuery );
if( terms != null ) {
fillInterestingTermsFromMLTQuery( boostedMLTQuery, terms );
}
DocListAndSet results = new DocListAndSet();
if (this.needDocSet) {
results = searcher.getDocListAndSet( boostedMLTQuery, filters, null, start, rows, flags);
} else {
results.docList = searcher.getDocList( boostedMLTQuery, filters, null, start, rows, flags);
}
return results;
}
// in solr/core/src/java/org/apache/solr/handler/MoreLikeThisHandler.java
Deprecated
public NamedList<DocList> getMoreLikeThese( DocList docs, int rows, int flags ) throws IOException
{
IndexSchema schema = searcher.getSchema();
NamedList<DocList> mlt = new SimpleOrderedMap<DocList>();
DocIterator iterator = docs.iterator();
while( iterator.hasNext() ) {
int id = iterator.nextDoc();
DocListAndSet sim = getMoreLikeThis( id, 0, rows, null, null, flags );
String name = schema.printableUniqueKey( reader.document( id ) );
mlt.add(name, sim.docList);
}
return mlt;
}
// in solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java
Override
public void prepare(ResponseBuilder rb) throws IOException {
SolrParams params = rb.req.getParams();
if (params.getBool(TermsParams.TERMS, false)) {
rb.doTerms = true;
}
// TODO: temporary... this should go in a different component.
String shards = params.get(ShardParams.SHARDS);
if (shards != null) {
rb.isDistrib = true;
if (params.get(ShardParams.SHARDS_QT) == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No shards.qt parameter specified");
}
List<String> lst = StrUtils.splitSmart(shards, ",", true);
rb.shards = lst.toArray(new String[lst.size()]);
}
}
// in solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java
Override
public void process(ResponseBuilder rb) throws IOException {
SolrParams params = rb.req.getParams();
if (!params.getBool(TermsParams.TERMS, false)) return;
String[] fields = params.getParams(TermsParams.TERMS_FIELD);
NamedList<Object> termsResult = new SimpleOrderedMap<Object>();
rb.rsp.add("terms", termsResult);
if (fields == null || fields.length==0) return;
int limit = params.getInt(TermsParams.TERMS_LIMIT, 10);
if (limit < 0) {
limit = Integer.MAX_VALUE;
}
String lowerStr = params.get(TermsParams.TERMS_LOWER);
String upperStr = params.get(TermsParams.TERMS_UPPER);
boolean upperIncl = params.getBool(TermsParams.TERMS_UPPER_INCLUSIVE, false);
boolean lowerIncl = params.getBool(TermsParams.TERMS_LOWER_INCLUSIVE, true);
boolean sort = !TermsParams.TERMS_SORT_INDEX.equals(
params.get(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT));
int freqmin = params.getInt(TermsParams.TERMS_MINCOUNT, 1);
int freqmax = params.getInt(TermsParams.TERMS_MAXCOUNT, UNLIMITED_MAX_COUNT);
if (freqmax<0) {
freqmax = Integer.MAX_VALUE;
}
String prefix = params.get(TermsParams.TERMS_PREFIX_STR);
String regexp = params.get(TermsParams.TERMS_REGEXP_STR);
Pattern pattern = regexp != null ? Pattern.compile(regexp, resolveRegexpFlags(params)) : null;
boolean raw = params.getBool(TermsParams.TERMS_RAW, false);
final AtomicReader indexReader = rb.req.getSearcher().getAtomicReader();
Fields lfields = indexReader.fields();
for (String field : fields) {
NamedList<Integer> fieldTerms = new NamedList<Integer>();
termsResult.add(field, fieldTerms);
Terms terms = lfields == null ? null : lfields.terms(field);
if (terms == null) {
// no terms for this field
continue;
}
FieldType ft = raw ? null : rb.req.getSchema().getFieldTypeNoEx(field);
if (ft==null) ft = new StrField();
// prefix must currently be text
BytesRef prefixBytes = prefix==null ? null : new BytesRef(prefix);
BytesRef upperBytes = null;
if (upperStr != null) {
upperBytes = new BytesRef();
ft.readableToIndexed(upperStr, upperBytes);
}
BytesRef lowerBytes;
if (lowerStr == null) {
// If no lower bound was specified, use the prefix
lowerBytes = prefixBytes;
} else {
lowerBytes = new BytesRef();
if (raw) {
// TODO: how to handle binary? perhaps we don't for "raw"... or if the field exists
// perhaps we detect if the FieldType is non-character and expect hex if so?
lowerBytes = new BytesRef(lowerStr);
} else {
lowerBytes = new BytesRef();
ft.readableToIndexed(lowerStr, lowerBytes);
}
}
TermsEnum termsEnum = terms.iterator(null);
BytesRef term = null;
if (lowerBytes != null) {
if (termsEnum.seekCeil(lowerBytes, true) == TermsEnum.SeekStatus.END) {
termsEnum = null;
} else {
term = termsEnum.term();
//Only advance the enum if we are excluding the lower bound and the lower Term actually matches
if (lowerIncl == false && term.equals(lowerBytes)) {
term = termsEnum.next();
}
}
} else {
// position termsEnum on first term
term = termsEnum.next();
}
int i = 0;
BoundedTreeSet<CountPair<BytesRef, Integer>> queue = (sort ? new BoundedTreeSet<CountPair<BytesRef, Integer>>(limit) : null);
CharsRef external = new CharsRef();
while (term != null && (i<limit || sort)) {
boolean externalized = false; // did we fill in "external" yet for this term?
// stop if the prefix doesn't match
if (prefixBytes != null && !StringHelper.startsWith(term, prefixBytes)) break;
if (pattern != null) {
// indexed text or external text?
// TODO: support "raw" mode?
ft.indexedToReadable(term, external);
externalized = true;
if (!pattern.matcher(external).matches()) {
term = termsEnum.next();
continue;
}
}
if (upperBytes != null) {
int upperCmp = term.compareTo(upperBytes);
// if we are past the upper term, or equal to it (when don't include upper) then stop.
if (upperCmp>0 || (upperCmp==0 && !upperIncl)) break;
}
// This is a good term in the range. Check if mincount/maxcount conditions are satisfied.
int docFreq = termsEnum.docFreq();
if (docFreq >= freqmin && docFreq <= freqmax) {
// add the term to the list
if (sort) {
queue.add(new CountPair<BytesRef, Integer>(BytesRef.deepCopyOf(term), docFreq));
} else {
// TODO: handle raw somehow
if (!externalized) {
ft.indexedToReadable(term, external);
}
fieldTerms.add(external.toString(), docFreq);
i++;
}
}
term = termsEnum.next();
}
if (sort) {
for (CountPair<BytesRef, Integer> item : queue) {
if (i >= limit) break;
ft.indexedToReadable(item.key, external);
fieldTerms.add(external.toString(), item.val);
i++;
}
}
}
}
// in solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java
Override
public int distributedProcess(ResponseBuilder rb) throws IOException {
if (!rb.doTerms) {
return ResponseBuilder.STAGE_DONE;
}
if (rb.stage == ResponseBuilder.STAGE_EXECUTE_QUERY) {
TermsHelper th = rb._termsHelper;
if (th == null) {
th = rb._termsHelper = new TermsHelper();
th.init(rb.req.getParams());
}
ShardRequest sreq = createShardQuery(rb.req.getParams());
rb.addRequest(this, sreq);
}
if (rb.stage < ResponseBuilder.STAGE_EXECUTE_QUERY) {
return ResponseBuilder.STAGE_EXECUTE_QUERY;
} else {
return ResponseBuilder.STAGE_DONE;
}
}
// in solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java
private Map<String, ElevationObj> loadElevationMap(Config cfg) throws IOException {
XPath xpath = XPathFactory.newInstance().newXPath();
Map<String, ElevationObj> map = new HashMap<String, ElevationObj>();
NodeList nodes = (NodeList) cfg.evaluate("elevate/query", XPathConstants.NODESET);
for (int i = 0; i < nodes.getLength(); i++) {
Node node = nodes.item(i);
String qstr = DOMUtil.getAttr(node, "text", "missing query 'text'");
NodeList children = null;
try {
children = (NodeList) xpath.evaluate("doc", node, XPathConstants.NODESET);
} catch (XPathExpressionException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"query requires '<doc .../>' child");
}
ArrayList<String> include = new ArrayList<String>();
ArrayList<String> exclude = new ArrayList<String>();
for (int j = 0; j < children.getLength(); j++) {
Node child = children.item(j);
String id = DOMUtil.getAttr(child, "id", "missing 'id'");
String e = DOMUtil.getAttr(child, EXCLUDE, null);
if (e != null) {
if (Boolean.valueOf(e)) {
exclude.add(id);
continue;
}
}
include.add(id);
}
ElevationObj elev = new ElevationObj(qstr, include, exclude);
if (map.containsKey(elev.analyzed)) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"Boosting query defined twice for query: '" + elev.text + "' (" + elev.analyzed + "')");
}
map.put(elev.analyzed, elev);
}
return map;
}
// in solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java
void setTopQueryResults(IndexReader reader, String query, String[] ids, String[] ex) throws IOException {
if (ids == null) {
ids = new String[0];
}
if (ex == null) {
ex = new String[0];
}
Map<String, ElevationObj> elev = elevationCache.get(reader);
if (elev == null) {
elev = new HashMap<String, ElevationObj>();
elevationCache.put(reader, elev);
}
ElevationObj obj = new ElevationObj(query, Arrays.asList(ids), Arrays.asList(ex));
elev.put(obj.analyzed, obj);
}
// in solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java
String getAnalyzedQuery(String query) throws IOException {
if (analyzer == null) {
return query;
}
StringBuilder norm = new StringBuilder();
TokenStream tokens = analyzer.tokenStream("", new StringReader(query));
tokens.reset();
CharTermAttribute termAtt = tokens.addAttribute(CharTermAttribute.class);
while (tokens.incrementToken()) {
norm.append(termAtt.buffer(), 0, termAtt.length());
}
tokens.end();
tokens.close();
return norm.toString();
}
// in solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java
Override
public void prepare(ResponseBuilder rb) throws IOException {
SolrQueryRequest req = rb.req;
SolrParams params = req.getParams();
// A runtime param can skip
if (!params.getBool(QueryElevationParams.ENABLE, true)) {
return;
}
boolean exclusive = params.getBool(QueryElevationParams.EXCLUSIVE, false);
// A runtime parameter can alter the config value for forceElevation
boolean force = params.getBool(QueryElevationParams.FORCE_ELEVATION, forceElevation);
boolean markExcludes = params.getBool(QueryElevationParams.MARK_EXCLUDES, false);
Query query = rb.getQuery();
String qstr = rb.getQueryString();
if (query == null || qstr == null) {
return;
}
qstr = getAnalyzedQuery(qstr);
IndexReader reader = req.getSearcher().getIndexReader();
ElevationObj booster = null;
try {
booster = getElevationMap(reader, req.getCore()).get(qstr);
} catch (Exception ex) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"Error loading elevation", ex);
}
if (booster != null) {
rb.req.getContext().put(BOOSTED, booster.ids);
// Change the query to insert forced documents
if (exclusive == true) {
//we only want these results
rb.setQuery(booster.include);
} else {
BooleanQuery newq = new BooleanQuery(true);
newq.add(query, BooleanClause.Occur.SHOULD);
newq.add(booster.include, BooleanClause.Occur.SHOULD);
if (booster.exclude != null) {
if (markExcludes == false) {
for (TermQuery tq : booster.exclude) {
newq.add(new BooleanClause(tq, BooleanClause.Occur.MUST_NOT));
}
} else {
//we are only going to mark items as excluded, not actually exclude them. This works
//with the EditorialMarkerFactory
rb.req.getContext().put(EXCLUDED, booster.excludeIds);
for (TermQuery tq : booster.exclude) {
newq.add(new BooleanClause(tq, BooleanClause.Occur.SHOULD));
}
}
}
rb.setQuery(newq);
}
ElevationComparatorSource comparator = new ElevationComparatorSource(booster);
// if the sort is 'score desc' use a custom sorting method to
// insert documents in their proper place
SortSpec sortSpec = rb.getSortSpec();
if (sortSpec.getSort() == null) {
sortSpec.setSort(new Sort(new SortField[]{
new SortField("_elevate_", comparator, true),
new SortField(null, SortField.Type.SCORE, false)
}));
} else {
// Check if the sort is based on score
boolean modify = false;
SortField[] current = sortSpec.getSort().getSort();
ArrayList<SortField> sorts = new ArrayList<SortField>(current.length + 1);
// Perhaps force it to always sort by score
if (force && current[0].getType() != SortField.Type.SCORE) {
sorts.add(new SortField("_elevate_", comparator, true));
modify = true;
}
for (SortField sf : current) {
if (sf.getType() == SortField.Type.SCORE) {
sorts.add(new SortField("_elevate_", comparator, !sf.getReverse()));
modify = true;
}
sorts.add(sf);
}
if (modify) {
sortSpec.setSort(new Sort(sorts.toArray(new SortField[sorts.size()])));
}
}
}
// Add debugging information
if (rb.isDebug()) {
List<String> match = null;
if (booster != null) {
// Extract the elevated terms into a list
match = new ArrayList<String>(booster.priority.size());
for (Object o : booster.include.clauses()) {
TermQuery tq = (TermQuery) ((BooleanClause) o).getQuery();
match.add(tq.getTerm().text());
}
}
SimpleOrderedMap<Object> dbg = new SimpleOrderedMap<Object>();
dbg.add("q", qstr);
dbg.add("match", match);
if (rb.isDebugQuery()) {
rb.addDebugInfo("queryBoosting", dbg);
}
}
}
// in solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java
Override
public void process(ResponseBuilder rb) throws IOException {
// Do nothing -- the real work is modifying the input query
}
// in solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java
Override
public FieldComparator<Integer> newComparator(String fieldname, final int numHits, int sortPos, boolean reversed) throws IOException {
return new FieldComparator<Integer>() {
private final int[] values = new int[numHits];
private int bottomVal;
private TermsEnum termsEnum;
private DocsEnum docsEnum;
Set<String> seen = new HashSet<String>(elevations.ids.size());
@Override
public int compare(int slot1, int slot2) {
return values[slot1] - values[slot2]; // values will be small enough that there is no overflow concern
}
@Override
public void setBottom(int slot) {
bottomVal = values[slot];
}
private int docVal(int doc) throws IOException {
if (ordSet.size() > 0) {
int slot = ordSet.find(doc);
if (slot >= 0) {
BytesRef id = termValues[slot];
Integer prio = elevations.priority.get(id);
return prio == null ? 0 : prio.intValue();
}
}
return 0;
}
@Override
public int compareBottom(int doc) throws IOException {
return bottomVal - docVal(doc);
}
@Override
public void copy(int slot, int doc) throws IOException {
values[slot] = docVal(doc);
}
@Override
public FieldComparator setNextReader(AtomicReaderContext context) throws IOException {
//convert the ids to Lucene doc ids, the ordSet and termValues needs to be the same size as the number of elevation docs we have
ordSet.clear();
Fields fields = context.reader().fields();
if (fields == null) return this;
Terms terms = fields.terms(idField);
if (terms == null) return this;
termsEnum = terms.iterator(termsEnum);
BytesRef term = new BytesRef();
Bits liveDocs = context.reader().getLiveDocs();
for (String id : elevations.ids) {
term.copyChars(id);
if (seen.contains(id) == false && termsEnum.seekExact(term, false)) {
docsEnum = termsEnum.docs(liveDocs, docsEnum, false);
if (docsEnum != null) {
int docId = docsEnum.nextDoc();
if (docId == DocIdSetIterator.NO_MORE_DOCS ) continue; // must have been deleted
termValues[ordSet.put(docId)] = BytesRef.deepCopyOf(term);
seen.add(id);
assert docsEnum.nextDoc() == DocIdSetIterator.NO_MORE_DOCS;
}
}
}
return this;
}
@Override
public Integer value(int slot) {
return values[slot];
}
@Override
public int compareDocToValue(int doc, Integer valueObj) throws IOException {
final int value = valueObj.intValue();
final int docValue = docVal(doc);
return docValue - value; // values will be small enough that there is no overflow concern
}
};
}
// in solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java
private int docVal(int doc) throws IOException {
if (ordSet.size() > 0) {
int slot = ordSet.find(doc);
if (slot >= 0) {
BytesRef id = termValues[slot];
Integer prio = elevations.priority.get(id);
return prio == null ? 0 : prio.intValue();
}
}
return 0;
}
// in solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java
Override
public int compareBottom(int doc) throws IOException {
return bottomVal - docVal(doc);
}
// in solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java
Override
public void copy(int slot, int doc) throws IOException {
values[slot] = docVal(doc);
}
// in solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java
Override
public FieldComparator setNextReader(AtomicReaderContext context) throws IOException {
//convert the ids to Lucene doc ids, the ordSet and termValues needs to be the same size as the number of elevation docs we have
ordSet.clear();
Fields fields = context.reader().fields();
if (fields == null) return this;
Terms terms = fields.terms(idField);
if (terms == null) return this;
termsEnum = terms.iterator(termsEnum);
BytesRef term = new BytesRef();
Bits liveDocs = context.reader().getLiveDocs();
for (String id : elevations.ids) {
term.copyChars(id);
if (seen.contains(id) == false && termsEnum.seekExact(term, false)) {
docsEnum = termsEnum.docs(liveDocs, docsEnum, false);
if (docsEnum != null) {
int docId = docsEnum.nextDoc();
if (docId == DocIdSetIterator.NO_MORE_DOCS ) continue; // must have been deleted
termValues[ordSet.put(docId)] = BytesRef.deepCopyOf(term);
seen.add(id);
assert docsEnum.nextDoc() == DocIdSetIterator.NO_MORE_DOCS;
}
}
}
return this;
}
// in solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java
Override
public int compareDocToValue(int doc, Integer valueObj) throws IOException {
final int value = valueObj.intValue();
final int docValue = docVal(doc);
return docValue - value; // values will be small enough that there is no overflow concern
}
// in solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
private Collection<Token> getTokens(String q, Analyzer analyzer) throws IOException {
Collection<Token> result = new ArrayList<Token>();
assert analyzer != null;
TokenStream ts = analyzer.tokenStream("", new StringReader(q));
ts.reset();
// TODO: support custom attributes
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class);
FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class);
PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
while (ts.incrementToken()){
Token token = new Token();
token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
token.setType(typeAtt.type());
token.setFlags(flagsAtt.getFlags());
token.setPayload(payloadAtt.getPayload());
token.setPositionIncrement(posIncAtt.getPositionIncrement());
result.add(token);
}
ts.end();
ts.close();
return result;
}
// in solr/core/src/java/org/apache/solr/handler/component/DebugComponent.java
Override
public void prepare(ResponseBuilder rb) throws IOException
{
}
// in solr/core/src/java/org/apache/solr/handler/component/PivotFacetHelper.java
public SimpleOrderedMap<List<NamedList<Object>>> process(ResponseBuilder rb, SolrParams params, String[] pivots) throws IOException {
if (!rb.doFacets || pivots == null)
return null;
int minMatch = params.getInt( FacetParams.FACET_PIVOT_MINCOUNT, 1 );
SimpleOrderedMap<List<NamedList<Object>>> pivotResponse = new SimpleOrderedMap<List<NamedList<Object>>>();
for (String pivot : pivots) {
String[] fields = pivot.split(","); // only support two levels for now
if( fields.length < 2 ) {
throw new SolrException( ErrorCode.BAD_REQUEST,
"Pivot Facet needs at least two fields: "+pivot );
}
DocSet docs = rb.getResults().docSet;
String field = fields[0];
String subField = fields[1];
Deque<String> fnames = new LinkedList<String>();
for( int i=fields.length-1; i>1; i-- ) {
fnames.push( fields[i] );
}
SimpleFacets sf = getFacetImplementation(rb.req, rb.getResults().docSet, rb.req.getParams());
NamedList<Integer> superFacets = sf.getTermCounts(field);
pivotResponse.add(pivot, doPivots(superFacets, field, subField, fnames, rb, docs, minMatch));
}
return pivotResponse;
}
// in solr/core/src/java/org/apache/solr/handler/component/PivotFacetHelper.java
protected List<NamedList<Object>> doPivots( NamedList<Integer> superFacets, String field, String subField, Deque<String> fnames, ResponseBuilder rb, DocSet docs, int minMatch ) throws IOException
{
SolrIndexSearcher searcher = rb.req.getSearcher();
// TODO: optimize to avoid converting to an external string and then having to convert back to internal below
SchemaField sfield = searcher.getSchema().getField(field);
FieldType ftype = sfield.getType();
String nextField = fnames.poll();
List<NamedList<Object>> values = new ArrayList<NamedList<Object>>( superFacets.size() );
for (Map.Entry<String, Integer> kv : superFacets) {
// Only sub-facet if parent facet has positive count - still may not be any values for the sub-field though
if (kv.getValue() >= minMatch ) {
// don't reuse the same BytesRef each time since we will be constructing Term
// objects that will most likely be cached.
BytesRef termval = new BytesRef();
ftype.readableToIndexed(kv.getKey(), termval);
SimpleOrderedMap<Object> pivot = new SimpleOrderedMap<Object>();
pivot.add( "field", field );
pivot.add( "value", ftype.toObject(sfield, termval) );
pivot.add( "count", kv.getValue() );
if( subField == null ) {
values.add( pivot );
}
else {
Query query = new TermQuery(new Term(field, termval));
DocSet subset = searcher.getDocSet(query, docs);
SimpleFacets sf = getFacetImplementation(rb.req, subset, rb.req.getParams());
NamedList<Integer> nl = sf.getTermCounts(subField);
if (nl.size() >= minMatch ) {
pivot.add( "pivot", doPivots( nl, subField, nextField, fnames, rb, subset, minMatch ) );
values.add( pivot ); // only add response if there are some counts
}
}
}
}
// put the field back on the list
fnames.push( nextField );
return values;
}
// in solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
Override
public void prepare(ResponseBuilder rb) throws IOException
{
SolrQueryRequest req = rb.req;
SolrParams params = req.getParams();
if (!params.getBool(COMPONENT_NAME, true)) {
return;
}
SolrQueryResponse rsp = rb.rsp;
// Set field flags
ReturnFields returnFields = new ReturnFields( req );
rsp.setReturnFields( returnFields );
int flags = 0;
if (returnFields.wantsScore()) {
flags |= SolrIndexSearcher.GET_SCORES;
}
rb.setFieldFlags( flags );
String defType = params.get(QueryParsing.DEFTYPE,QParserPlugin.DEFAULT_QTYPE);
// get it from the response builder to give a different component a chance
// to set it.
String queryString = rb.getQueryString();
if (queryString == null) {
// this is the normal way it's set.
queryString = params.get( CommonParams.Q );
rb.setQueryString(queryString);
}
try {
QParser parser = QParser.getParser(rb.getQueryString(), defType, req);
Query q = parser.getQuery();
if (q == null) {
// normalize a null query to a query that matches nothing
q = new BooleanQuery();
}
rb.setQuery( q );
rb.setSortSpec( parser.getSort(true) );
rb.setQparser(parser);
rb.setScoreDoc(parser.getPaging());
String[] fqs = req.getParams().getParams(CommonParams.FQ);
if (fqs!=null && fqs.length!=0) {
List<Query> filters = rb.getFilters();
if (filters==null) {
filters = new ArrayList<Query>(fqs.length);
}
for (String fq : fqs) {
if (fq != null && fq.trim().length()!=0) {
QParser fqp = QParser.getParser(fq, null, req);
filters.add(fqp.getQuery());
}
}
// only set the filters if they are not empty otherwise
// fq=&someotherParam= will trigger all docs filter for every request
// if filter cache is disabled
if (!filters.isEmpty()) {
rb.setFilters( filters );
}
}
} catch (ParseException e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
}
boolean grouping = params.getBool(GroupParams.GROUP, false);
if (!grouping) {
return;
}
SolrIndexSearcher.QueryCommand cmd = rb.getQueryCommand();
SolrIndexSearcher searcher = rb.req.getSearcher();
GroupingSpecification groupingSpec = new GroupingSpecification();
rb.setGroupingSpec(groupingSpec);
//TODO: move weighting of sort
Sort groupSort = searcher.weightSort(cmd.getSort());
if (groupSort == null) {
groupSort = Sort.RELEVANCE;
}
// groupSort defaults to sort
String groupSortStr = params.get(GroupParams.GROUP_SORT);
//TODO: move weighting of sort
Sort sortWithinGroup = groupSortStr == null ? groupSort : searcher.weightSort(QueryParsing.parseSort(groupSortStr, req));
if (sortWithinGroup == null) {
sortWithinGroup = Sort.RELEVANCE;
}
groupingSpec.setSortWithinGroup(sortWithinGroup);
groupingSpec.setGroupSort(groupSort);
String formatStr = params.get(GroupParams.GROUP_FORMAT, Grouping.Format.grouped.name());
Grouping.Format responseFormat;
try {
responseFormat = Grouping.Format.valueOf(formatStr);
} catch (IllegalArgumentException e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, String.format("Illegal %s parameter", GroupParams.GROUP_FORMAT));
}
groupingSpec.setResponseFormat(responseFormat);
groupingSpec.setFields(params.getParams(GroupParams.GROUP_FIELD));
groupingSpec.setQueries(params.getParams(GroupParams.GROUP_QUERY));
groupingSpec.setFunctions(params.getParams(GroupParams.GROUP_FUNC));
groupingSpec.setGroupOffset(params.getInt(GroupParams.GROUP_OFFSET, 0));
groupingSpec.setGroupLimit(params.getInt(GroupParams.GROUP_LIMIT, 1));
groupingSpec.setOffset(rb.getSortSpec().getOffset());
groupingSpec.setLimit(rb.getSortSpec().getCount());
groupingSpec.setIncludeGroupCount(params.getBool(GroupParams.GROUP_TOTAL_COUNT, false));
groupingSpec.setMain(params.getBool(GroupParams.GROUP_MAIN, false));
groupingSpec.setNeedScore((cmd.getFlags() & SolrIndexSearcher.GET_SCORES) != 0);
groupingSpec.setTruncateGroups(params.getBool(GroupParams.GROUP_TRUNCATE, false));
}
// in solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
Override
public void process(ResponseBuilder rb) throws IOException
{
SolrQueryRequest req = rb.req;
SolrQueryResponse rsp = rb.rsp;
SolrParams params = req.getParams();
if (!params.getBool(COMPONENT_NAME, true)) {
return;
}
SolrIndexSearcher searcher = req.getSearcher();
if (rb.getQueryCommand().getOffset() < 0) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "'start' parameter cannot be negative");
}
// -1 as flag if not set.
long timeAllowed = (long)params.getInt( CommonParams.TIME_ALLOWED, -1 );
// Optional: This could also be implemented by the top-level searcher sending
// a filter that lists the ids... that would be transparent to
// the request handler, but would be more expensive (and would preserve score
// too if desired).
String ids = params.get(ShardParams.IDS);
if (ids != null) {
SchemaField idField = req.getSchema().getUniqueKeyField();
List<String> idArr = StrUtils.splitSmart(ids, ",", true);
int[] luceneIds = new int[idArr.size()];
int docs = 0;
for (int i=0; i<idArr.size(); i++) {
int id = req.getSearcher().getFirstMatch(
new Term(idField.getName(), idField.getType().toInternal(idArr.get(i))));
if (id >= 0)
luceneIds[docs++] = id;
}
DocListAndSet res = new DocListAndSet();
res.docList = new DocSlice(0, docs, luceneIds, null, docs, 0);
if (rb.isNeedDocSet()) {
// TODO: create a cache for this!
List<Query> queries = new ArrayList<Query>();
queries.add(rb.getQuery());
List<Query> filters = rb.getFilters();
if (filters != null) queries.addAll(filters);
res.docSet = searcher.getDocSet(queries);
}
rb.setResults(res);
ResultContext ctx = new ResultContext();
ctx.docs = rb.getResults().docList;
ctx.query = null; // anything?
rsp.add("response", ctx);
return;
}
SolrIndexSearcher.QueryCommand cmd = rb.getQueryCommand();
cmd.setTimeAllowed(timeAllowed);
SolrIndexSearcher.QueryResult result = new SolrIndexSearcher.QueryResult();
//
// grouping / field collapsing
//
GroupingSpecification groupingSpec = rb.getGroupingSpec();
if (groupingSpec != null) {
try {
boolean needScores = (cmd.getFlags() & SolrIndexSearcher.GET_SCORES) != 0;
if (params.getBool(GroupParams.GROUP_DISTRIBUTED_FIRST, false)) {
CommandHandler.Builder topsGroupsActionBuilder = new CommandHandler.Builder()
.setQueryCommand(cmd)
.setNeedDocSet(false) // Order matters here
.setIncludeHitCount(true)
.setSearcher(searcher);
for (String field : groupingSpec.getFields()) {
topsGroupsActionBuilder.addCommandField(new SearchGroupsFieldCommand.Builder()
.setField(searcher.getSchema().getField(field))
.setGroupSort(groupingSpec.getGroupSort())
.setTopNGroups(cmd.getOffset() + cmd.getLen())
.setIncludeGroupCount(groupingSpec.isIncludeGroupCount())
.build()
);
}
CommandHandler commandHandler = topsGroupsActionBuilder.build();
commandHandler.execute();
SearchGroupsResultTransformer serializer = new SearchGroupsResultTransformer(searcher);
rsp.add("firstPhase", commandHandler.processResult(result, serializer));
rsp.add("totalHitCount", commandHandler.getTotalHitCount());
rb.setResult(result);
return;
} else if (params.getBool(GroupParams.GROUP_DISTRIBUTED_SECOND, false)) {
CommandHandler.Builder secondPhaseBuilder = new CommandHandler.Builder()
.setQueryCommand(cmd)
.setTruncateGroups(groupingSpec.isTruncateGroups() && groupingSpec.getFields().length > 0)
.setSearcher(searcher);
for (String field : groupingSpec.getFields()) {
String[] topGroupsParam = params.getParams(GroupParams.GROUP_DISTRIBUTED_TOPGROUPS_PREFIX + field);
if (topGroupsParam == null) {
topGroupsParam = new String[0];
}
List<SearchGroup<BytesRef>> topGroups = new ArrayList<SearchGroup<BytesRef>>(topGroupsParam.length);
for (String topGroup : topGroupsParam) {
SearchGroup<BytesRef> searchGroup = new SearchGroup<BytesRef>();
if (!topGroup.equals(TopGroupsShardRequestFactory.GROUP_NULL_VALUE)) {
searchGroup.groupValue = new BytesRef(searcher.getSchema().getField(field).getType().readableToIndexed(topGroup));
}
topGroups.add(searchGroup);
}
secondPhaseBuilder.addCommandField(
new TopGroupsFieldCommand.Builder()
.setField(searcher.getSchema().getField(field))
.setGroupSort(groupingSpec.getGroupSort())
.setSortWithinGroup(groupingSpec.getSortWithinGroup())
.setFirstPhaseGroups(topGroups)
.setMaxDocPerGroup(groupingSpec.getGroupOffset() + groupingSpec.getGroupLimit())
.setNeedScores(needScores)
.setNeedMaxScore(needScores)
.build()
);
}
for (String query : groupingSpec.getQueries()) {
secondPhaseBuilder.addCommandField(new QueryCommand.Builder()
.setDocsToCollect(groupingSpec.getOffset() + groupingSpec.getLimit())
.setSort(groupingSpec.getGroupSort())
.setQuery(query, rb.req)
.setDocSet(searcher)
.build()
);
}
CommandHandler commandHandler = secondPhaseBuilder.build();
commandHandler.execute();
TopGroupsResultTransformer serializer = new TopGroupsResultTransformer(rb);
rsp.add("secondPhase", commandHandler.processResult(result, serializer));
rb.setResult(result);
return;
}
int maxDocsPercentageToCache = params.getInt(GroupParams.GROUP_CACHE_PERCENTAGE, 0);
boolean cacheSecondPassSearch = maxDocsPercentageToCache >= 1 && maxDocsPercentageToCache <= 100;
Grouping.TotalCount defaultTotalCount = groupingSpec.isIncludeGroupCount() ?
Grouping.TotalCount.grouped : Grouping.TotalCount.ungrouped;
int limitDefault = cmd.getLen(); // this is normally from "rows"
Grouping grouping =
new Grouping(searcher, result, cmd, cacheSecondPassSearch, maxDocsPercentageToCache, groupingSpec.isMain());
grouping.setSort(groupingSpec.getGroupSort())
.setGroupSort(groupingSpec.getSortWithinGroup())
.setDefaultFormat(groupingSpec.getResponseFormat())
.setLimitDefault(limitDefault)
.setDefaultTotalCount(defaultTotalCount)
.setDocsPerGroupDefault(groupingSpec.getGroupLimit())
.setGroupOffsetDefault(groupingSpec.getGroupOffset())
.setGetGroupedDocSet(groupingSpec.isTruncateGroups());
if (groupingSpec.getFields() != null) {
for (String field : groupingSpec.getFields()) {
grouping.addFieldCommand(field, rb.req);
}
}
if (groupingSpec.getFunctions() != null) {
for (String groupByStr : groupingSpec.getFunctions()) {
grouping.addFunctionCommand(groupByStr, rb.req);
}
}
if (groupingSpec.getQueries() != null) {
for (String groupByStr : groupingSpec.getQueries()) {
grouping.addQueryCommand(groupByStr, rb.req);
}
}
if (rb.doHighlights || rb.isDebug() || params.getBool(MoreLikeThisParams.MLT, false)) {
// we need a single list of the returned docs
cmd.setFlags(SolrIndexSearcher.GET_DOCLIST);
}
grouping.execute();
if (grouping.isSignalCacheWarning()) {
rsp.add(
"cacheWarning",
String.format("Cache limit of %d percent relative to maxdoc has exceeded. Please increase cache size or disable caching.", maxDocsPercentageToCache)
);
}
rb.setResult(result);
if (grouping.mainResult != null) {
ResultContext ctx = new ResultContext();
ctx.docs = grouping.mainResult;
ctx.query = null; // TODO? add the query?
rsp.add("response", ctx);
rsp.getToLog().add("hits", grouping.mainResult.matches());
} else if (!grouping.getCommands().isEmpty()) { // Can never be empty since grouping.execute() checks for this.
rsp.add("grouped", result.groupedResults);
rsp.getToLog().add("hits", grouping.getCommands().get(0).getMatches());
}
return;
} catch (ParseException e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
}
}
// normal search result
searcher.search(result,cmd);
rb.setResult( result );
ResultContext ctx = new ResultContext();
ctx.docs = rb.getResults().docList;
ctx.query = rb.getQuery();
rsp.add("response", ctx);
rsp.getToLog().add("hits", rb.getResults().docList.matches());
doFieldSortValues(rb, searcher);
doPrefetch(rb);
}
// in solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
protected void doFieldSortValues(ResponseBuilder rb, SolrIndexSearcher searcher) throws IOException
{
SolrQueryRequest req = rb.req;
SolrQueryResponse rsp = rb.rsp;
final CharsRef spare = new CharsRef();
// The query cache doesn't currently store sort field values, and SolrIndexSearcher doesn't
// currently have an option to return sort field values. Because of this, we
// take the documents given and re-derive the sort values.
boolean fsv = req.getParams().getBool(ResponseBuilder.FIELD_SORT_VALUES,false);
if(fsv){
Sort sort = searcher.weightSort(rb.getSortSpec().getSort());
SortField[] sortFields = sort==null ? new SortField[]{SortField.FIELD_SCORE} : sort.getSort();
NamedList<Object[]> sortVals = new NamedList<Object[]>(); // order is important for the sort fields
Field field = new StringField("dummy", ""); // a dummy Field
IndexReaderContext topReaderContext = searcher.getTopReaderContext();
AtomicReaderContext[] leaves = topReaderContext.leaves();
AtomicReaderContext currentLeaf = null;
if (leaves.length==1) {
// if there is a single segment, use that subReader and avoid looking up each time
currentLeaf = leaves[0];
leaves=null;
}
DocList docList = rb.getResults().docList;
// sort ids from lowest to highest so we can access them in order
int nDocs = docList.size();
long[] sortedIds = new long[nDocs];
DocIterator it = rb.getResults().docList.iterator();
for (int i=0; i<nDocs; i++) {
sortedIds[i] = (((long)it.nextDoc()) << 32) | i;
}
Arrays.sort(sortedIds);
for (SortField sortField: sortFields) {
SortField.Type type = sortField.getType();
if (type==SortField.Type.SCORE || type==SortField.Type.DOC) continue;
FieldComparator comparator = null;
String fieldname = sortField.getField();
FieldType ft = fieldname==null ? null : req.getSchema().getFieldTypeNoEx(fieldname);
Object[] vals = new Object[nDocs];
int lastIdx = -1;
int idx = 0;
for (long idAndPos : sortedIds) {
int doc = (int)(idAndPos >>> 32);
int position = (int)idAndPos;
if (leaves != null) {
idx = ReaderUtil.subIndex(doc, leaves);
currentLeaf = leaves[idx];
if (idx != lastIdx) {
// we switched segments. invalidate comparator.
comparator = null;
}
}
if (comparator == null) {
comparator = sortField.getComparator(1,0);
comparator = comparator.setNextReader(currentLeaf);
}
doc -= currentLeaf.docBase; // adjust for what segment this is in
comparator.copy(0, doc);
Object val = comparator.value(0);
// Sortable float, double, int, long types all just use a string
// comparator. For these, we need to put the type into a readable
// format. One reason for this is that XML can't represent all
// string values (or even all unicode code points).
// indexedToReadable() should be a no-op and should
// thus be harmless anyway (for all current ways anyway)
if (val instanceof String) {
field.setStringValue((String)val);
val = ft.toObject(field);
}
// Must do the same conversion when sorting by a
// String field in Lucene, which returns the terms
// data as BytesRef:
if (val instanceof BytesRef) {
UnicodeUtil.UTF8toUTF16((BytesRef)val, spare);
field.setStringValue(spare.toString());
val = ft.toObject(field);
}
vals[position] = val;
}
sortVals.add(fieldname, vals);
}
rsp.add("sort_values", sortVals);
}
}
// in solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
protected void doPrefetch(ResponseBuilder rb) throws IOException
{
SolrQueryRequest req = rb.req;
SolrQueryResponse rsp = rb.rsp;
//pre-fetch returned documents
if (!req.getParams().getBool(ShardParams.IS_SHARD,false) && rb.getResults().docList != null && rb.getResults().docList.size()<=50) {
SolrPluginUtils.optimizePreFetchDocs(rb, rb.getResults().docList, rb.getQuery(), req, rsp);
}
}
// in solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
Override
public int distributedProcess(ResponseBuilder rb) throws IOException {
if (rb.grouping()) {
return groupedDistributedProcess(rb);
} else {
return regularDistributedProcess(rb);
}
}
// in solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java
Override
public void prepare(ResponseBuilder rb) throws IOException
{
if (rb.req.getParams().getBool(FacetParams.FACET,false)) {
rb.setNeedDocSet( true );
rb.doFacets = true;
}
}
// in solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java
Override
public void process(ResponseBuilder rb) throws IOException
{
if (rb.doFacets) {
SolrParams params = rb.req.getParams();
SimpleFacets f = new SimpleFacets(rb.req,
rb.getResults().docSet,
params,
rb );
NamedList<Object> counts = f.getFacetCounts();
String[] pivots = params.getParams( FacetParams.FACET_PIVOT );
if( pivots != null && pivots.length > 0 ) {
NamedList v = pivotHelper.process(rb, params, pivots);
if( v != null ) {
counts.add( PIVOT_KEY, v );
}
}
// TODO ???? add this directly to the response, or to the builder?
rb.rsp.add( "facet_counts", counts );
}
}
// in solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java
Override
public int distributedProcess(ResponseBuilder rb) throws IOException {
if (!rb.doFacets) {
return ResponseBuilder.STAGE_DONE;
}
if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) {
// overlap facet refinement requests (those shards that we need a count for
// particular facet values from), where possible, with
// the requests to get fields (because we know that is the
// only other required phase).
// We do this in distributedProcess so we can look at all of the
// requests in the outgoing queue at once.
for (int shardNum=0; shardNum<rb.shards.length; shardNum++) {
List<String> refinements = null;
for (DistribFieldFacet dff : rb._facetInfo.facets.values()) {
if (!dff.needRefinements) continue;
List<String> refList = dff._toRefine[shardNum];
if (refList == null || refList.size()==0) continue;
String key = dff.getKey(); // reuse the same key that was used for the main facet
String termsKey = key + "__terms";
String termsVal = StrUtils.join(refList, ',');
String facetCommand;
// add terms into the original facet.field command
// do it via parameter reference to avoid another layer of encoding.
String termsKeyEncoded = QueryParsing.encodeLocalParamVal(termsKey);
if (dff.localParams != null) {
facetCommand = commandPrefix+termsKeyEncoded + " " + dff.facetStr.substring(2);
} else {
facetCommand = commandPrefix+termsKeyEncoded+'}'+dff.field;
}
if (refinements == null) {
refinements = new ArrayList<String>();
}
refinements.add(facetCommand);
refinements.add(termsKey);
refinements.add(termsVal);
}
if (refinements == null) continue;
String shard = rb.shards[shardNum];
ShardRequest refine = null;
boolean newRequest = false;
// try to find a request that is already going out to that shard.
// If nshards becomes to great, we way want to move to hashing for better
// scalability.
for (ShardRequest sreq : rb.outgoing) {
if ((sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS)!=0
&& sreq.shards != null
&& sreq.shards.length==1
&& sreq.shards[0].equals(shard))
{
refine = sreq;
break;
}
}
if (refine == null) {
// we didn't find any other suitable requests going out to that shard, so
// create one ourselves.
newRequest = true;
refine = new ShardRequest();
refine.shards = new String[]{rb.shards[shardNum]};
refine.params = new ModifiableSolrParams(rb.req.getParams());
// don't request any documents
refine.params.remove(CommonParams.START);
refine.params.set(CommonParams.ROWS,"0");
}
refine.purpose |= ShardRequest.PURPOSE_REFINE_FACETS;
refine.params.set(FacetParams.FACET, "true");
refine.params.remove(FacetParams.FACET_FIELD);
refine.params.remove(FacetParams.FACET_QUERY);
for (int i=0; i<refinements.size();) {
String facetCommand=refinements.get(i++);
String termsKey=refinements.get(i++);
String termsVal=refinements.get(i++);
refine.params.add(FacetParams.FACET_FIELD, facetCommand);
refine.params.set(termsKey, termsVal);
}
if (newRequest) {
rb.addRequest(this, refine);
}
}
}
return ResponseBuilder.STAGE_DONE;
}
// in solr/core/src/java/org/apache/solr/handler/component/HighlightComponent.java
Override
public void prepare(ResponseBuilder rb) throws IOException {
SolrParams params = rb.req.getParams();
rb.doHighlights = highlighter.isHighlightingEnabled(params);
if(rb.doHighlights){
String hlq = params.get(HighlightParams.Q);
if(hlq != null){
try {
QParser parser = QParser.getParser(hlq, null, rb.req);
rb.setHighlightQuery(parser.getHighlightQuery());
} catch (ParseException e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
}
}
}
}
// in solr/core/src/java/org/apache/solr/handler/component/HighlightComponent.java
Override
public void process(ResponseBuilder rb) throws IOException {
if (rb.doHighlights) {
SolrQueryRequest req = rb.req;
SolrParams params = req.getParams();
String[] defaultHighlightFields; //TODO: get from builder by default?
if (rb.getQparser() != null) {
defaultHighlightFields = rb.getQparser().getDefaultHighlightFields();
} else {
defaultHighlightFields = params.getParams(CommonParams.DF);
}
Query highlightQuery = rb.getHighlightQuery();
if(highlightQuery==null) {
if (rb.getQparser() != null) {
try {
highlightQuery = rb.getQparser().getHighlightQuery();
rb.setHighlightQuery( highlightQuery );
} catch (Exception e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
}
} else {
highlightQuery = rb.getQuery();
rb.setHighlightQuery( highlightQuery );
}
}
if(highlightQuery != null) {
boolean rewrite = !(Boolean.valueOf(params.get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true")) &&
Boolean.valueOf(params.get(HighlightParams.HIGHLIGHT_MULTI_TERM, "true")));
highlightQuery = rewrite ? highlightQuery.rewrite(req.getSearcher().getIndexReader()) : highlightQuery;
}
// No highlighting if there is no query -- consider q.alt="*:*
if( highlightQuery != null ) {
NamedList sumData = highlighter.doHighlighting(
rb.getResults().docList,
highlightQuery,
req, defaultHighlightFields );
if(sumData != null) {
// TODO ???? add this directly to the response?
rb.rsp.add("highlighting", sumData);
}
}
}
}
// in solr/core/src/java/org/apache/solr/handler/component/SearchComponent.java
public int distributedProcess(ResponseBuilder rb) throws IOException {
return ResponseBuilder.STAGE_DONE;
}
// in solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java
Override
public void prepare(ResponseBuilder rb) throws IOException {
if (rb.req.getParams().getBool(StatsParams.STATS,false)) {
rb.setNeedDocSet( true );
rb.doStats = true;
}
}
// in solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java
Override
public void process(ResponseBuilder rb) throws IOException {
if (rb.doStats) {
SolrParams params = rb.req.getParams();
SimpleStats s = new SimpleStats(rb.req,
rb.getResults().docSet,
params );
// TODO ???? add this directly to the response, or to the builder?
rb.rsp.add( "stats", s.getStatsCounts() );
}
}
// in solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java
Override
public int distributedProcess(ResponseBuilder rb) throws IOException {
return ResponseBuilder.STAGE_DONE;
}
// in solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java
public NamedList<Object> getStatsCounts() throws IOException {
NamedList<Object> res = new SimpleOrderedMap<Object>();
res.add("stats_fields", getStatsFields());
return res;
}
// in solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java
public NamedList<Object> getStatsFields() throws IOException {
NamedList<Object> res = new SimpleOrderedMap<Object>();
String[] statsFs = params.getParams(StatsParams.STATS_FIELD);
boolean isShard = params.getBool(ShardParams.IS_SHARD, false);
if (null != statsFs) {
for (String f : statsFs) {
String[] facets = params.getFieldParams(f, StatsParams.STATS_FACET);
if (facets == null) {
facets = new String[0]; // make sure it is something...
}
SchemaField sf = searcher.getSchema().getField(f);
FieldType ft = sf.getType();
NamedList<?> stv;
// Currently, only UnInvertedField can deal with multi-part trie fields
String prefix = TrieField.getMainValuePrefix(ft);
if (sf.multiValued() || ft.multiValuedFieldCache() || prefix!=null) {
//use UnInvertedField for multivalued fields
UnInvertedField uif = UnInvertedField.getUnInvertedField(f, searcher);
stv = uif.getStats(searcher, docs, facets).getStatsValues();
} else {
stv = getFieldCacheStats(f, facets);
}
if (isShard == true || (Long) stv.get("count") > 0) {
res.add(f, stv);
} else {
res.add(f, null);
}
}
}
return res;
}
// in solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
Override
public void prepare(ResponseBuilder rb) throws IOException {
// Set field flags
ReturnFields returnFields = new ReturnFields( rb.req );
rb.rsp.setReturnFields( returnFields );
}
// in solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
Override
public void process(ResponseBuilder rb) throws IOException
{
SolrQueryRequest req = rb.req;
SolrQueryResponse rsp = rb.rsp;
SolrParams params = req.getParams();
if (!params.getBool(COMPONENT_NAME, true)) {
return;
}
String val = params.get("getVersions");
if (val != null) {
processGetVersions(rb);
return;
}
val = params.get("getUpdates");
if (val != null) {
processGetUpdates(rb);
return;
}
String id[] = params.getParams("id");
String ids[] = params.getParams("ids");
if (id == null && ids == null) {
return;
}
String[] allIds = id==null ? new String[0] : id;
if (ids != null) {
List<String> lst = new ArrayList<String>();
for (String s : allIds) {
lst.add(s);
}
for (String idList : ids) {
lst.addAll( StrUtils.splitSmart(idList, ",", true) );
}
allIds = lst.toArray(new String[lst.size()]);
}
SchemaField idField = req.getSchema().getUniqueKeyField();
FieldType fieldType = idField.getType();
SolrDocumentList docList = new SolrDocumentList();
UpdateLog ulog = req.getCore().getUpdateHandler().getUpdateLog();
RefCounted<SolrIndexSearcher> searcherHolder = null;
DocTransformer transformer = rsp.getReturnFields().getTransformer();
if (transformer != null) {
TransformContext context = new TransformContext();
context.req = req;
transformer.setContext(context);
}
try {
SolrIndexSearcher searcher = null;
BytesRef idBytes = new BytesRef();
for (String idStr : allIds) {
fieldType.readableToIndexed(idStr, idBytes);
if (ulog != null) {
Object o = ulog.lookup(idBytes);
if (o != null) {
// should currently be a List<Oper,Ver,Doc/Id>
List entry = (List)o;
assert entry.size() >= 3;
int oper = (Integer)entry.get(0) & UpdateLog.OPERATION_MASK;
switch (oper) {
case UpdateLog.ADD:
SolrDocument doc = toSolrDoc((SolrInputDocument)entry.get(entry.size()-1), req.getSchema());
if(transformer!=null) {
transformer.transform(doc, -1); // unknown docID
}
docList.add(doc);
break;
case UpdateLog.DELETE:
break;
default:
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown Operation! " + oper);
}
continue;
}
}
// didn't find it in the update log, so it should be in the newest searcher opened
if (searcher == null) {
searcherHolder = req.getCore().getRealtimeSearcher();
searcher = searcherHolder.get();
}
// SolrCore.verbose("RealTimeGet using searcher ", searcher);
int docid = searcher.getFirstMatch(new Term(idField.getName(), idBytes));
if (docid < 0) continue;
Document luceneDocument = searcher.doc(docid);
SolrDocument doc = toSolrDoc(luceneDocument, req.getSchema());
if( transformer != null ) {
transformer.transform(doc, docid);
}
docList.add(doc);
}
} finally {
if (searcherHolder != null) {
searcherHolder.decref();
}
}
// if the client specified a single id=foo, then use "doc":{
// otherwise use a standard doclist
if (ids == null && allIds.length <= 1) {
// if the doc was not found, then use a value of null.
rsp.add("doc", docList.size() > 0 ? docList.get(0) : null);
} else {
docList.setNumFound(docList.size());
rsp.add("response", docList);
}
}
// in solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
public static SolrInputDocument getInputDocument(SolrCore core, BytesRef idBytes) throws IOException {
SolrInputDocument sid = null;
RefCounted<SolrIndexSearcher> searcherHolder = null;
try {
SolrIndexSearcher searcher = null;
UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
if (ulog != null) {
Object o = ulog.lookup(idBytes);
if (o != null) {
// should currently be a List<Oper,Ver,Doc/Id>
List entry = (List)o;
assert entry.size() >= 3;
int oper = (Integer)entry.get(0) & UpdateLog.OPERATION_MASK;
switch (oper) {
case UpdateLog.ADD:
sid = (SolrInputDocument)entry.get(entry.size()-1);
break;
case UpdateLog.DELETE:
return null;
default:
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown Operation! " + oper);
}
}
}
if (sid == null) {
// didn't find it in the update log, so it should be in the newest searcher opened
if (searcher == null) {
searcherHolder = core.getRealtimeSearcher();
searcher = searcherHolder.get();
}
// SolrCore.verbose("RealTimeGet using searcher ", searcher);
SchemaField idField = core.getSchema().getUniqueKeyField();
int docid = searcher.getFirstMatch(new Term(idField.getName(), idBytes));
if (docid < 0) return null;
Document luceneDocument = searcher.doc(docid);
sid = toSolrInputDocument(luceneDocument, core.getSchema());
}
} finally {
if (searcherHolder != null) {
searcherHolder.decref();
}
}
return sid;
}
// in solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
Override
public int distributedProcess(ResponseBuilder rb) throws IOException {
if (rb.stage < ResponseBuilder.STAGE_GET_FIELDS)
return ResponseBuilder.STAGE_GET_FIELDS;
if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) {
return createSubRequests(rb);
}
return ResponseBuilder.STAGE_DONE;
}
// in solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
public int createSubRequests(ResponseBuilder rb) throws IOException {
SolrParams params = rb.req.getParams();
String id1[] = params.getParams("id");
String ids[] = params.getParams("ids");
if (id1 == null && ids == null) {
return ResponseBuilder.STAGE_DONE;
}
List<String> allIds = new ArrayList<String>();
if (id1 != null) {
for (String s : id1) {
allIds.add(s);
}
}
if (ids != null) {
for (String s : ids) {
allIds.addAll( StrUtils.splitSmart(s, ",", true) );
}
}
// TODO: handle collection=...?
ZkController zkController = rb.req.getCore().getCoreDescriptor().getCoreContainer().getZkController();
// if shards=... then use that
if (zkController != null && params.get("shards") == null) {
SchemaField sf = rb.req.getSchema().getUniqueKeyField();
CloudDescriptor cloudDescriptor = rb.req.getCore().getCoreDescriptor().getCloudDescriptor();
String collection = cloudDescriptor.getCollectionName();
CloudState cloudState = zkController.getCloudState();
Map<String, List<String>> shardToId = new HashMap<String, List<String>>();
for (String id : allIds) {
BytesRef br = new BytesRef();
sf.getType().readableToIndexed(id, br);
int hash = Hash.murmurhash3_x86_32(br.bytes, br.offset, br.length, 0);
String shard = cloudState.getShard(hash, collection);
List<String> idsForShard = shardToId.get(shard);
if (idsForShard == null) {
idsForShard = new ArrayList<String>(2);
shardToId.put(shard, idsForShard);
}
idsForShard.add(id);
}
for (Map.Entry<String,List<String>> entry : shardToId.entrySet()) {
String shard = entry.getKey();
String shardIdList = StrUtils.join(entry.getValue(), ',');
ShardRequest sreq = new ShardRequest();
sreq.purpose = 1;
// sreq.shards = new String[]{shard}; // TODO: would be nice if this would work...
sreq.shards = sliceToShards(rb, collection, shard);
sreq.actualShards = sreq.shards;
sreq.params = new ModifiableSolrParams();
sreq.params.set(ShardParams.SHARDS_QT,"/get"); // TODO: how to avoid hardcoding this and hit the same handler?
sreq.params.set("distrib",false);
sreq.params.set("ids", shardIdList);
rb.addRequest(this, sreq);
}
} else {
String shardIdList = StrUtils.join(allIds, ',');
ShardRequest sreq = new ShardRequest();
sreq.purpose = 1;
sreq.shards = null; // ALL
sreq.actualShards = sreq.shards;
sreq.params = new ModifiableSolrParams();
sreq.params.set(ShardParams.SHARDS_QT,"/get"); // TODO: how to avoid hardcoding this and hit the same handler?
sreq.params.set("distrib",false);
sreq.params.set("ids", shardIdList);
rb.addRequest(this, sreq);
}
return ResponseBuilder.STAGE_DONE;
}
// in solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
public void processGetVersions(ResponseBuilder rb) throws IOException
{
SolrQueryRequest req = rb.req;
SolrQueryResponse rsp = rb.rsp;
SolrParams params = req.getParams();
if (!params.getBool(COMPONENT_NAME, true)) {
return;
}
int nVersions = params.getInt("getVersions", -1);
if (nVersions == -1) return;
String sync = params.get("sync");
if (sync != null) {
processSync(rb, nVersions, sync);
return;
}
UpdateLog ulog = req.getCore().getUpdateHandler().getUpdateLog();
if (ulog == null) return;
UpdateLog.RecentUpdates recentUpdates = ulog.getRecentUpdates();
try {
rb.rsp.add("versions", recentUpdates.getVersions(nVersions));
} finally {
recentUpdates.close(); // cache this somehow?
}
}
// in solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
public void processGetUpdates(ResponseBuilder rb) throws IOException
{
SolrQueryRequest req = rb.req;
SolrQueryResponse rsp = rb.rsp;
SolrParams params = req.getParams();
if (!params.getBool(COMPONENT_NAME, true)) {
return;
}
String versionsStr = params.get("getUpdates");
if (versionsStr == null) return;
UpdateLog ulog = req.getCore().getUpdateHandler().getUpdateLog();
if (ulog == null) return;
List<String> versions = StrUtils.splitSmart(versionsStr, ",", true);
// TODO: get this from cache instead of rebuilding?
UpdateLog.RecentUpdates recentUpdates = ulog.getRecentUpdates();
List<Object> updates = new ArrayList<Object>(versions.size());
long minVersion = Long.MAX_VALUE;
try {
for (String versionStr : versions) {
long version = Long.parseLong(versionStr);
try {
Object o = recentUpdates.lookup(version);
if (o == null) continue;
if (version > 0) {
minVersion = Math.min(minVersion, version);
}
// TODO: do any kind of validation here?
updates.add(o);
} catch (SolrException e) {
log.warn("Exception reading log for updates", e);
} catch (ClassCastException e) {
log.warn("Exception reading log for updates", e);
}
}
// Must return all delete-by-query commands that occur after the first add requested
// since they may apply.
updates.addAll( recentUpdates.getDeleteByQuery(minVersion));
rb.rsp.add("updates", updates);
} finally {
recentUpdates.close(); // cache this somehow?
}
}
// in solr/core/src/java/org/apache/solr/handler/component/MoreLikeThisComponent.java
Override
public void prepare(ResponseBuilder rb) throws IOException
{
}
// in solr/core/src/java/org/apache/solr/handler/component/MoreLikeThisComponent.java
Override
public void process(ResponseBuilder rb) throws IOException
{
SolrParams p = rb.req.getParams();
if( p.getBool( MoreLikeThisParams.MLT, false ) ) {
SolrIndexSearcher searcher = rb.req.getSearcher();
NamedList<DocList> sim = getMoreLikeThese( rb, searcher,
rb.getResults().docList, rb.getFieldFlags() );
// TODO ???? add this directly to the response?
rb.rsp.add( "moreLikeThis", sim );
}
}
// in solr/core/src/java/org/apache/solr/handler/component/MoreLikeThisComponent.java
NamedList<DocList> getMoreLikeThese( ResponseBuilder rb, SolrIndexSearcher searcher,
DocList docs, int flags ) throws IOException {
SolrParams p = rb.req.getParams();
IndexSchema schema = searcher.getSchema();
MoreLikeThisHandler.MoreLikeThisHelper mltHelper
= new MoreLikeThisHandler.MoreLikeThisHelper( p, searcher );
NamedList<DocList> mlt = new SimpleOrderedMap<DocList>();
DocIterator iterator = docs.iterator();
SimpleOrderedMap<Object> dbg = null;
if( rb.isDebug() ){
dbg = new SimpleOrderedMap<Object>();
}
while( iterator.hasNext() ) {
int id = iterator.nextDoc();
int rows = p.getInt( MoreLikeThisParams.DOC_COUNT, 5 );
DocListAndSet sim = mltHelper.getMoreLikeThis( id, 0, rows, null, null, flags );
String name = schema.printableUniqueKey( searcher.doc( id ) );
mlt.add(name, sim.docList);
if( dbg != null ){
SimpleOrderedMap<Object> docDbg = new SimpleOrderedMap<Object>();
docDbg.add( "rawMLTQuery", mltHelper.getRawMLTQuery().toString() );
docDbg.add( "boostedMLTQuery", mltHelper.getBoostedMLTQuery().toString() );
docDbg.add( "realMLTQuery", mltHelper.getRealMLTQuery().toString() );
SimpleOrderedMap<Object> explains = new SimpleOrderedMap<Object>();
DocIterator mltIte = sim.docList.iterator();
while( mltIte.hasNext() ){
int mltid = mltIte.nextDoc();
String key = schema.printableUniqueKey( searcher.doc( mltid ) );
explains.add( key, searcher.explain( mltHelper.getRealMLTQuery(), mltid ) );
}
docDbg.add( "explain", explains );
dbg.add( name, docDbg );
}
}
// add debug information
if( dbg != null ){
rb.addDebugInfo( "moreLikeThis", dbg );
}
return mlt;
}
// in solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java
Override
public void process(ResponseBuilder rb) throws IOException {
SolrParams params = rb.req.getParams();
if (!params.getBool(COMPONENT_NAME, false)) {
return;
}
NamedList<Object> termVectors = new NamedList<Object>();
rb.rsp.add(TERM_VECTORS, termVectors);
FieldOptions allFields = new FieldOptions();
//figure out what options we have, and try to get the appropriate vector
allFields.termFreq = params.getBool(TermVectorParams.TF, false);
allFields.positions = params.getBool(TermVectorParams.POSITIONS, false);
allFields.offsets = params.getBool(TermVectorParams.OFFSETS, false);
allFields.docFreq = params.getBool(TermVectorParams.DF, false);
allFields.tfIdf = params.getBool(TermVectorParams.TF_IDF, false);
//boolean cacheIdf = params.getBool(TermVectorParams.IDF, false);
//short cut to all values.
if (params.getBool(TermVectorParams.ALL, false)) {
allFields.termFreq = true;
allFields.positions = true;
allFields.offsets = true;
allFields.docFreq = true;
allFields.tfIdf = true;
}
String fldLst = params.get(TermVectorParams.FIELDS);
if (fldLst == null) {
fldLst = params.get(CommonParams.FL);
}
//use this to validate our fields
IndexSchema schema = rb.req.getSchema();
//Build up our per field mapping
Map<String, FieldOptions> fieldOptions = new HashMap<String, FieldOptions>();
NamedList<List<String>> warnings = new NamedList<List<String>>();
List<String> noTV = new ArrayList<String>();
List<String> noPos = new ArrayList<String>();
List<String> noOff = new ArrayList<String>();
//we have specific fields to retrieve
if (fldLst != null) {
String [] fields = SolrPluginUtils.split(fldLst);
for (String field : fields) {
SchemaField sf = schema.getFieldOrNull(field);
if (sf != null) {
if (sf.storeTermVector()) {
FieldOptions option = fieldOptions.get(field);
if (option == null) {
option = new FieldOptions();
option.fieldName = field;
fieldOptions.put(field, option);
}
//get the per field mappings
option.termFreq = params.getFieldBool(field, TermVectorParams.TF, allFields.termFreq);
option.docFreq = params.getFieldBool(field, TermVectorParams.DF, allFields.docFreq);
option.tfIdf = params.getFieldBool(field, TermVectorParams.TF_IDF, allFields.tfIdf);
//Validate these are even an option
option.positions = params.getFieldBool(field, TermVectorParams.POSITIONS, allFields.positions);
if (option.positions && !sf.storeTermPositions()){
noPos.add(field);
}
option.offsets = params.getFieldBool(field, TermVectorParams.OFFSETS, allFields.offsets);
if (option.offsets && !sf.storeTermOffsets()){
noOff.add(field);
}
} else {//field doesn't have term vectors
noTV.add(field);
}
} else {
//field doesn't exist
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "undefined field: " + field);
}
}
} //else, deal with all fields
boolean hasWarnings = false;
if (!noTV.isEmpty()) {
warnings.add("noTermVectors", noTV);
hasWarnings = true;
}
if (!noPos.isEmpty()) {
warnings.add("noPositions", noPos);
hasWarnings = true;
}
if (!noOff.isEmpty()) {
warnings.add("noOffsets", noOff);
hasWarnings = true;
}
if (hasWarnings) {
termVectors.add("warnings", warnings);
}
DocListAndSet listAndSet = rb.getResults();
List<Integer> docIds = getInts(params.getParams(TermVectorParams.DOC_IDS));
Iterator<Integer> iter;
if (docIds != null && !docIds.isEmpty()) {
iter = docIds.iterator();
} else {
DocList list = listAndSet.docList;
iter = list.iterator();
}
SolrIndexSearcher searcher = rb.req.getSearcher();
IndexReader reader = searcher.getIndexReader();
//the TVMapper is a TermVectorMapper which can be used to optimize loading of Term Vectors
SchemaField keyField = schema.getUniqueKeyField();
String uniqFieldName = null;
if (keyField != null) {
uniqFieldName = keyField.getName();
}
//Only load the id field to get the uniqueKey of that
//field
final String finalUniqFieldName = uniqFieldName;
final List<String> uniqValues = new ArrayList<String>();
// TODO: is this required to be single-valued? if so, we should STOP
// once we find it...
final StoredFieldVisitor getUniqValue = new StoredFieldVisitor() {
@Override
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
uniqValues.add(value);
}
@Override
public void intField(FieldInfo fieldInfo, int value) throws IOException {
uniqValues.add(Integer.toString(value));
}
@Override
public void longField(FieldInfo fieldInfo, long value) throws IOException {
uniqValues.add(Long.toString(value));
}
@Override
public Status needsField(FieldInfo fieldInfo) throws IOException {
return (fieldInfo.name.equals(finalUniqFieldName)) ? Status.YES : Status.NO;
}
};
TermsEnum termsEnum = null;
while (iter.hasNext()) {
Integer docId = iter.next();
NamedList<Object> docNL = new NamedList<Object>();
termVectors.add("doc-" + docId, docNL);
if (keyField != null) {
reader.document(docId, getUniqValue);
String uniqVal = null;
if (uniqValues.size() != 0) {
uniqVal = uniqValues.get(0);
uniqValues.clear();
docNL.add("uniqueKey", uniqVal);
termVectors.add("uniqueKeyFieldName", uniqFieldName);
}
}
if (!fieldOptions.isEmpty()) {
for (Map.Entry<String, FieldOptions> entry : fieldOptions.entrySet()) {
final String field = entry.getKey();
final Terms vector = reader.getTermVector(docId, field);
if (vector != null) {
termsEnum = vector.iterator(termsEnum);
mapOneVector(docNL, entry.getValue(), reader, docId, vector.iterator(termsEnum), field);
}
}
} else {
// extract all fields
final Fields vectors = reader.getTermVectors(docId);
final FieldsEnum fieldsEnum = vectors.iterator();
String field;
while((field = fieldsEnum.next()) != null) {
Terms terms = fieldsEnum.terms();
if (terms != null) {
termsEnum = terms.iterator(termsEnum);
mapOneVector(docNL, allFields, reader, docId, termsEnum, field);
}
}
}
}
}
// in solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java
Override
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
uniqValues.add(value);
}
// in solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java
Override
public void intField(FieldInfo fieldInfo, int value) throws IOException {
uniqValues.add(Integer.toString(value));
}
// in solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java
Override
public void longField(FieldInfo fieldInfo, long value) throws IOException {
uniqValues.add(Long.toString(value));
}
// in solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java
Override
public Status needsField(FieldInfo fieldInfo) throws IOException {
return (fieldInfo.name.equals(finalUniqFieldName)) ? Status.YES : Status.NO;
}
// in solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java
private void mapOneVector(NamedList<Object> docNL, FieldOptions fieldOptions, IndexReader reader, int docID, TermsEnum termsEnum, String field) throws IOException {
NamedList<Object> fieldNL = new NamedList<Object>();
docNL.add(field, fieldNL);
BytesRef text;
DocsAndPositionsEnum dpEnum = null;
while((text = termsEnum.next()) != null) {
String term = text.utf8ToString();
NamedList<Object> termInfo = new NamedList<Object>();
fieldNL.add(term, termInfo);
final int freq = (int) termsEnum.totalTermFreq();
if (fieldOptions.termFreq == true) {
termInfo.add("tf", freq);
}
dpEnum = termsEnum.docsAndPositions(null, dpEnum, fieldOptions.offsets);
boolean useOffsets = fieldOptions.offsets;
if (dpEnum == null) {
useOffsets = false;
dpEnum = termsEnum.docsAndPositions(null, dpEnum, false);
}
boolean usePositions = false;
if (dpEnum != null) {
dpEnum.nextDoc();
usePositions = fieldOptions.positions;
}
NamedList<Number> theOffsets = null;
if (useOffsets) {
theOffsets = new NamedList<Number>();
termInfo.add("offsets", theOffsets);
}
NamedList<Integer> positionsNL = null;
if (usePositions || theOffsets != null) {
for (int i = 0; i < freq; i++) {
final int pos = dpEnum.nextPosition();
if (usePositions && pos >= 0) {
if (positionsNL == null) {
positionsNL = new NamedList<Integer>();
termInfo.add("positions", positionsNL);
}
positionsNL.add("position", pos);
}
if (theOffsets != null) {
theOffsets.add("start", dpEnum.startOffset());
theOffsets.add("end", dpEnum.endOffset());
}
}
}
if (fieldOptions.docFreq) {
termInfo.add("df", getDocFreq(reader, field, text));
}
if (fieldOptions.tfIdf) {
double tfIdfVal = ((double) freq) / getDocFreq(reader, field, text);
termInfo.add("tf-idf", tfIdfVal);
}
}
}
// in solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java
Override
public int distributedProcess(ResponseBuilder rb) throws IOException {
int result = ResponseBuilder.STAGE_DONE;
if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) {
//Go ask each shard for it's vectors
// for each shard, collect the documents for that shard.
HashMap<String, Collection<ShardDoc>> shardMap = new HashMap<String, Collection<ShardDoc>>();
for (ShardDoc sdoc : rb.resultIds.values()) {
Collection<ShardDoc> shardDocs = shardMap.get(sdoc.shard);
if (shardDocs == null) {
shardDocs = new ArrayList<ShardDoc>();
shardMap.put(sdoc.shard, shardDocs);
}
shardDocs.add(sdoc);
}
// Now create a request for each shard to retrieve the stored fields
for (Collection<ShardDoc> shardDocs : shardMap.values()) {
ShardRequest sreq = new ShardRequest();
sreq.purpose = ShardRequest.PURPOSE_GET_FIELDS;
sreq.shards = new String[]{shardDocs.iterator().next().shard};
sreq.params = new ModifiableSolrParams();
// add original params
sreq.params.add(rb.req.getParams());
sreq.params.remove(CommonParams.Q);//remove the query
ArrayList<String> ids = new ArrayList<String>(shardDocs.size());
for (ShardDoc shardDoc : shardDocs) {
ids.add(shardDoc.id.toString());
}
sreq.params.add(TermVectorParams.DOC_IDS, StrUtils.join(ids, ','));
rb.addRequest(this, sreq);
}
result = ResponseBuilder.STAGE_DONE;
}
return result;
}
// in solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java
Override
public void prepare(ResponseBuilder rb) throws IOException {
}
// in solr/core/src/java/org/apache/solr/handler/admin/ShowFileRequestHandler.java
Override
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException, KeeperException, InterruptedException
{
CoreContainer coreContainer = req.getCore().getCoreDescriptor().getCoreContainer();
if (coreContainer.isZooKeeperAware()) {
showFromZooKeeper(req, rsp, coreContainer);
} else {
showFromFileSystem(req, rsp);
}
}
// in solr/core/src/java/org/apache/solr/handler/admin/ShowFileRequestHandler.java
private void showFromFileSystem(SolrQueryRequest req, SolrQueryResponse rsp)
throws IOException {
File adminFile = null;
final SolrResourceLoader loader = req.getCore().getResourceLoader();
File configdir = new File( loader.getConfigDir() );
if (!configdir.exists()) {
// TODO: maybe we should just open it this way to start with?
try {
configdir = new File( loader.getClassLoader().getResource(loader.getConfigDir()).toURI() );
} catch (URISyntaxException e) {
throw new SolrException( ErrorCode.FORBIDDEN, "Can not access configuration directory!");
}
}
String fname = req.getParams().get("file", null);
if( fname == null ) {
adminFile = configdir;
}
else {
fname = fname.replace( '\\', '/' ); // normalize slashes
if( hiddenFiles.contains( fname.toUpperCase(Locale.ENGLISH) ) ) {
throw new SolrException( ErrorCode.FORBIDDEN, "Can not access: "+fname );
}
if( fname.indexOf( ".." ) >= 0 ) {
throw new SolrException( ErrorCode.FORBIDDEN, "Invalid path: "+fname );
}
adminFile = new File( configdir, fname );
}
// Make sure the file exists, is readable and is not a hidden file
if( !adminFile.exists() ) {
throw new SolrException( ErrorCode.BAD_REQUEST, "Can not find: "+adminFile.getName()
+ " ["+adminFile.getAbsolutePath()+"]" );
}
if( !adminFile.canRead() || adminFile.isHidden() ) {
throw new SolrException( ErrorCode.BAD_REQUEST, "Can not show: "+adminFile.getName()
+ " ["+adminFile.getAbsolutePath()+"]" );
}
// Show a directory listing
if( adminFile.isDirectory() ) {
int basePath = configdir.getAbsolutePath().length() + 1;
NamedList<SimpleOrderedMap<Object>> files = new SimpleOrderedMap<SimpleOrderedMap<Object>>();
for( File f : adminFile.listFiles() ) {
String path = f.getAbsolutePath().substring( basePath );
path = path.replace( '\\', '/' ); // normalize slashes
if( hiddenFiles.contains( path.toUpperCase(Locale.ENGLISH) ) ) {
continue; // don't show 'hidden' files
}
if( f.isHidden() || f.getName().startsWith( "." ) ) {
continue; // skip hidden system files...
}
SimpleOrderedMap<Object> fileInfo = new SimpleOrderedMap<Object>();
files.add( path, fileInfo );
if( f.isDirectory() ) {
fileInfo.add( "directory", true );
}
else {
// TODO? content type
fileInfo.add( "size", f.length() );
}
fileInfo.add( "modified", new Date( f.lastModified() ) );
}
rsp.add( "files", files );
}
else {
// Include the file contents
//The file logic depends on RawResponseWriter, so force its use.
ModifiableSolrParams params = new ModifiableSolrParams( req.getParams() );
params.set( CommonParams.WT, "raw" );
req.setParams(params);
ContentStreamBase content = new ContentStreamBase.FileStream( adminFile );
content.setContentType( req.getParams().get( USE_CONTENT_TYPE ) );
rsp.add(RawResponseWriter.CONTENT, content);
}
rsp.setHttpCaching(false);
}
// in solr/core/src/java/org/apache/solr/handler/admin/PropertiesRequestHandler.java
Override
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException
{
Object props = null;
String name = req.getParams().get( "name" );
if( name != null ) {
NamedList<String> p = new SimpleOrderedMap<String>();
p.add( name, System.getProperty(name) );
props = p;
}
else {
props = System.getProperties();
}
rsp.add( "system.properties", props );
rsp.setHttpCaching(false);
}
// in solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java
private static SimpleOrderedMap<Object> getDocumentFieldsInfo( Document doc, int docId, IndexReader reader,
IndexSchema schema ) throws IOException
{
final CharsRef spare = new CharsRef();
SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<Object>();
for( Object o : doc.getFields() ) {
Field field = (Field)o;
SimpleOrderedMap<Object> f = new SimpleOrderedMap<Object>();
SchemaField sfield = schema.getFieldOrNull( field.name() );
FieldType ftype = (sfield==null)?null:sfield.getType();
f.add( "type", (ftype==null)?null:ftype.getTypeName() );
f.add( "schema", getFieldFlags( sfield ) );
f.add( "flags", getFieldFlags( field ) );
Term t = new Term(field.name(), ftype!=null ? ftype.storedToIndexed(field) : field.stringValue());
f.add( "value", (ftype==null)?null:ftype.toExternal( field ) );
// TODO: this really should be "stored"
f.add( "internal", field.stringValue() ); // may be a binary number
BytesRef bytes = field.binaryValue();
if (bytes != null) {
f.add( "binary", Base64.byteArrayToBase64(bytes.bytes, bytes.offset, bytes.length));
}
f.add( "boost", field.boost() );
f.add( "docFreq", t.text()==null ? 0 : reader.docFreq( t ) ); // this can be 0 for non-indexed fields
// If we have a term vector, return that
if( field.fieldType().storeTermVectors() ) {
try {
Terms v = reader.getTermVector( docId, field.name() );
if( v != null ) {
SimpleOrderedMap<Integer> tfv = new SimpleOrderedMap<Integer>();
final TermsEnum termsEnum = v.iterator(null);
BytesRef text;
while((text = termsEnum.next()) != null) {
final int freq = (int) termsEnum.totalTermFreq();
UnicodeUtil.UTF8toUTF16(text, spare);
tfv.add(spare.toString(), freq);
}
f.add( "termVector", tfv );
}
}
catch( Exception ex ) {
log.warn( "error writing term vector", ex );
}
}
finfo.add( field.name(), f );
}
return finfo;
}
// in solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java
private static Document getFirstLiveDoc(AtomicReader reader, String fieldName, Terms terms) throws IOException {
DocsEnum docsEnum = null;
TermsEnum termsEnum = terms.iterator(null);
BytesRef text;
// Deal with the chance that the first bunch of terms are in deleted documents. Is there a better way?
for (int idx = 0; idx < 1000 && docsEnum == null; ++idx) {
text = termsEnum.next();
if (text == null) { // Ran off the end of the terms enum without finding any live docs with that field in them.
return null;
}
Term term = new Term(fieldName, text);
docsEnum = reader.termDocsEnum(reader.getLiveDocs(),
term.field(),
new BytesRef(term.text()),
false);
if (docsEnum != null) {
int docId;
if ((docId = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
return reader.document(docId);
}
}
}
return null;
}
// in solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java
public static SimpleOrderedMap<Object> getIndexInfo(DirectoryReader reader, boolean detail) throws IOException {
return getIndexInfo(reader);
}
// in solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java
public static SimpleOrderedMap<Object> getIndexInfo(DirectoryReader reader) throws IOException {
Directory dir = reader.directory();
SimpleOrderedMap<Object> indexInfo = new SimpleOrderedMap<Object>();
indexInfo.add("numDocs", reader.numDocs());
indexInfo.add("maxDoc", reader.maxDoc());
indexInfo.add("version", reader.getVersion()); // TODO? Is this different then: IndexReader.getCurrentVersion( dir )?
indexInfo.add("segmentCount", reader.getSequentialSubReaders().length);
indexInfo.add("current", reader.isCurrent() );
indexInfo.add("hasDeletions", reader.hasDeletions() );
indexInfo.add("directory", dir );
indexInfo.add("userData", reader.getIndexCommit().getUserData());
String s = reader.getIndexCommit().getUserData().get(SolrIndexWriter.COMMIT_TIME_MSEC_KEY);
if (s != null) {
indexInfo.add("lastModified", new Date(Long.parseLong(s)));
}
return indexInfo;
}
// in solr/core/src/java/org/apache/solr/handler/admin/ThreadDumpHandler.java
Override
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException
{
SimpleOrderedMap<Object> system = new SimpleOrderedMap<Object>();
rsp.add( "system", system );
ThreadMXBean tmbean = ManagementFactory.getThreadMXBean();
// Thread Count
SimpleOrderedMap<Object> nl = new SimpleOrderedMap<Object>();
nl.add( "current",tmbean.getThreadCount() );
nl.add( "peak", tmbean.getPeakThreadCount() );
nl.add( "daemon", tmbean.getDaemonThreadCount() );
system.add( "threadCount", nl );
// Deadlocks
ThreadInfo[] tinfos;
long[] tids = tmbean.findMonitorDeadlockedThreads();
if (tids != null) {
tinfos = tmbean.getThreadInfo(tids, Integer.MAX_VALUE);
NamedList<SimpleOrderedMap<Object>> lst = new NamedList<SimpleOrderedMap<Object>>();
for (ThreadInfo ti : tinfos) {
if (ti != null) {
lst.add( "thread", getThreadInfo( ti, tmbean ) );
}
}
system.add( "deadlocks", lst );
}
// Now show all the threads....
tids = tmbean.getAllThreadIds();
tinfos = tmbean.getThreadInfo(tids, Integer.MAX_VALUE);
NamedList<SimpleOrderedMap<Object>> lst = new NamedList<SimpleOrderedMap<Object>>();
for (ThreadInfo ti : tinfos) {
if (ti != null) {
lst.add( "thread", getThreadInfo( ti, tmbean ) );
}
}
system.add( "threadDump", lst );
rsp.setHttpCaching(false);
}
// in solr/core/src/java/org/apache/solr/handler/admin/ThreadDumpHandler.java
private static SimpleOrderedMap<Object> getThreadInfo( ThreadInfo ti, ThreadMXBean tmbean ) throws IOException
{
SimpleOrderedMap<Object> info = new SimpleOrderedMap<Object>();
long tid = ti.getThreadId();
info.add( "id", tid );
info.add( "name", ti.getThreadName() );
info.add( "state", ti.getThreadState().toString() );
if (ti.getLockName() != null) {
info.add( "lock", ti.getLockName() );
}
if (ti.isSuspended()) {
info.add( "suspended", true );
}
if (ti.isInNative()) {
info.add( "native", true );
}
if (tmbean.isThreadCpuTimeSupported()) {
info.add( "cpuTime", formatNanos(tmbean.getThreadCpuTime(tid)) );
info.add( "userTime", formatNanos(tmbean.getThreadUserTime(tid)) );
}
if (ti.getLockOwnerName() != null) {
SimpleOrderedMap<Object> owner = new SimpleOrderedMap<Object>();
owner.add( "name", ti.getLockOwnerName() );
owner.add( "id", ti.getLockOwnerId() );
}
// Add the stack trace
int i=0;
String[] trace = new String[ti.getStackTrace().length];
for( StackTraceElement ste : ti.getStackTrace()) {
trace[i++] = ste.toString();
}
info.add( "stackTrace", trace );
return info;
}
// in solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java
protected boolean handleMergeAction(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException {
SolrParams params = req.getParams();
String cname = params.required().get(CoreAdminParams.CORE);
SolrCore core = coreContainer.getCore(cname);
SolrQueryRequest wrappedReq = null;
SolrCore[] sourceCores = null;
RefCounted<SolrIndexSearcher>[] searchers = null;
// stores readers created from indexDir param values
DirectoryReader[] readersToBeClosed = null;
Directory[] dirsToBeReleased = null;
if (core != null) {
try {
String[] dirNames = params.getParams(CoreAdminParams.INDEX_DIR);
if (dirNames == null || dirNames.length == 0) {
String[] sources = params.getParams("srcCore");
if (sources == null || sources.length == 0)
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,
"At least one indexDir or srcCore must be specified");
sourceCores = new SolrCore[sources.length];
for (int i = 0; i < sources.length; i++) {
String source = sources[i];
SolrCore srcCore = coreContainer.getCore(source);
if (srcCore == null)
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Core: " + source + " does not exist");
sourceCores[i] = srcCore;
}
} else {
readersToBeClosed = new DirectoryReader[dirNames.length];
dirsToBeReleased = new Directory[dirNames.length];
DirectoryFactory dirFactory = core.getDirectoryFactory();
for (int i = 0; i < dirNames.length; i++) {
Directory dir = dirFactory.get(dirNames[i], core.getSolrConfig().indexConfig.lockType);
dirsToBeReleased[i] = dir;
// TODO: why doesn't this use the IR factory? what is going on here?
readersToBeClosed[i] = DirectoryReader.open(dir);
}
}
DirectoryReader[] readers = null;
if (readersToBeClosed != null) {
readers = readersToBeClosed;
} else {
readers = new DirectoryReader[sourceCores.length];
searchers = new RefCounted[sourceCores.length];
for (int i = 0; i < sourceCores.length; i++) {
SolrCore solrCore = sourceCores[i];
// record the searchers so that we can decref
searchers[i] = solrCore.getSearcher();
readers[i] = searchers[i].get().getIndexReader();
}
}
UpdateRequestProcessorChain processorChain =
core.getUpdateProcessingChain(params.get(UpdateParams.UPDATE_CHAIN));
wrappedReq = new LocalSolrQueryRequest(core, req.getParams());
UpdateRequestProcessor processor =
processorChain.createProcessor(wrappedReq, rsp);
processor.processMergeIndexes(new MergeIndexesCommand(readers, req));
} finally {
if (searchers != null) {
for (RefCounted<SolrIndexSearcher> searcher : searchers) {
if (searcher != null) searcher.decref();
}
}
if (sourceCores != null) {
for (SolrCore solrCore : sourceCores) {
if (solrCore != null) solrCore.close();
}
}
if (readersToBeClosed != null) IOUtils.closeWhileHandlingException(readersToBeClosed);
if (dirsToBeReleased != null) {
for (Directory dir : dirsToBeReleased) {
DirectoryFactory dirFactory = core.getDirectoryFactory();
dirFactory.release(dir);
}
}
if (wrappedReq != null) wrappedReq.close();
core.close();
}
}
return coreContainer.isPersistent();
}
// in solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java
protected void handleRequestRecoveryAction(SolrQueryRequest req,
SolrQueryResponse rsp) throws IOException {
final SolrParams params = req.getParams();
String cname = params.get(CoreAdminParams.CORE);
if (cname == null) {
cname = "";
}
SolrCore core = null;
try {
core = coreContainer.getCore(cname);
if (core != null) {
core.getUpdateHandler().getSolrCoreState().doRecovery(coreContainer, cname);
} else {
SolrException.log(log, "Cound not find core to call recovery:" + cname);
}
} finally {
// no recoveryStrat close for now
if (core != null) {
core.close();
}
}
}
// in solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java
protected void handleWaitForStateAction(SolrQueryRequest req,
SolrQueryResponse rsp) throws IOException, InterruptedException {
final SolrParams params = req.getParams();
String cname = params.get(CoreAdminParams.CORE);
if (cname == null) {
cname = "";
}
String nodeName = params.get("nodeName");
String coreNodeName = params.get("coreNodeName");
String waitForState = params.get("state");
Boolean checkLive = params.getBool("checkLive");
int pauseFor = params.getInt("pauseFor", 0);
String state = null;
boolean live = false;
int retry = 0;
while (true) {
SolrCore core = null;
try {
core = coreContainer.getCore(cname);
if (core == null && retry == 30) {
throw new SolrException(ErrorCode.BAD_REQUEST, "core not found:"
+ cname);
}
if (core != null) {
// wait until we are sure the recovering node is ready
// to accept updates
CloudDescriptor cloudDescriptor = core.getCoreDescriptor()
.getCloudDescriptor();
CloudState cloudState = coreContainer.getZkController()
.getCloudState();
String collection = cloudDescriptor.getCollectionName();
Slice slice = cloudState.getSlice(collection,
cloudDescriptor.getShardId());
if (slice != null) {
ZkNodeProps nodeProps = slice.getShards().get(coreNodeName);
if (nodeProps != null) {
state = nodeProps.get(ZkStateReader.STATE_PROP);
live = cloudState.liveNodesContain(nodeName);
if (nodeProps != null && state.equals(waitForState)) {
if (checkLive == null) {
break;
} else if (checkLive && live) {
break;
} else if (!checkLive && !live) {
break;
}
}
}
}
}
if (retry++ == 30) {
throw new SolrException(ErrorCode.BAD_REQUEST,
"I was asked to wait on state " + waitForState + " for "
+ nodeName
+ " but I still do not see the request state. I see state: "
+ state + " live:" + live);
}
} finally {
if (core != null) {
core.close();
}
}
Thread.sleep(1000);
}
// small safety net for any updates that started with state that
// kept it from sending the update to be buffered -
// pause for a while to let any outstanding updates finish
// System.out.println("I saw state:" + state + " sleep for " + pauseFor +
// " live:" + live);
Thread.sleep(pauseFor);
// solrcloud_debug
// try {;
// LocalSolrQueryRequest r = new LocalSolrQueryRequest(core, new
// ModifiableSolrParams());
// CommitUpdateCommand commitCmd = new CommitUpdateCommand(r, false);
// commitCmd.softCommit = true;
// core.getUpdateHandler().commit(commitCmd);
// RefCounted<SolrIndexSearcher> searchHolder =
// core.getNewestSearcher(false);
// SolrIndexSearcher searcher = searchHolder.get();
// try {
// System.out.println(core.getCoreDescriptor().getCoreContainer().getZkController().getNodeName()
// + " to replicate "
// + searcher.search(new MatchAllDocsQuery(), 1).totalHits + " gen:" +
// core.getDeletionPolicy().getLatestCommit().getGeneration() + " data:" +
// core.getDataDir());
// } finally {
// searchHolder.decref();
// }
// } catch (Exception e) {
//
// }
}
// in solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java
protected void handleDistribUrlAction(SolrQueryRequest req,
SolrQueryResponse rsp) throws IOException, InterruptedException, SolrServerException {
// TODO: finish this and tests
SolrParams params = req.getParams();
final ModifiableSolrParams newParams = new ModifiableSolrParams(params);
newParams.remove("action");
SolrParams required = params.required();
final String subAction = required.get("subAction");
String collection = required.get("collection");
newParams.set(CoreAdminParams.ACTION, subAction);
SolrCore core = req.getCore();
ZkController zkController = core.getCoreDescriptor().getCoreContainer()
.getZkController();
CloudState cloudState = zkController.getCloudState();
Map<String,Slice> slices = cloudState.getCollectionStates().get(collection);
for (Map.Entry<String,Slice> entry : slices.entrySet()) {
Slice slice = entry.getValue();
Map<String,ZkNodeProps> shards = slice.getShards();
Set<Map.Entry<String,ZkNodeProps>> shardEntries = shards.entrySet();
for (Map.Entry<String,ZkNodeProps> shardEntry : shardEntries) {
final ZkNodeProps node = shardEntry.getValue();
if (cloudState.liveNodesContain(node.get(ZkStateReader.NODE_NAME_PROP))) {
newParams.set(CoreAdminParams.CORE, node.get(ZkStateReader.CORE_NAME_PROP));
String replica = node.get(ZkStateReader.BASE_URL_PROP);
ShardRequest sreq = new ShardRequest();
newParams.set("qt", "/admin/cores");
sreq.purpose = 1;
// TODO: this sucks
if (replica.startsWith("http://"))
replica = replica.substring(7);
sreq.shards = new String[]{replica};
sreq.actualShards = sreq.shards;
sreq.params = newParams;
shardHandler.submit(sreq, replica, sreq.params);
}
}
}
ShardResponse srsp;
do {
srsp = shardHandler.takeCompletedOrError();
if (srsp != null) {
Throwable e = srsp.getException();
if (e != null) {
log.error("Error talking to shard: " + srsp.getShard(), e);
}
}
} while(srsp != null);
}
// in solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java
protected NamedList<Object> getCoreStatus(CoreContainer cores, String cname) throws IOException {
NamedList<Object> info = new SimpleOrderedMap<Object>();
SolrCore core = cores.getCore(cname);
if (core != null) {
try {
info.add("name", core.getName());
info.add("isDefaultCore", core.getName().equals(cores.getDefaultCoreName()));
info.add("instanceDir", normalizePath(core.getResourceLoader().getInstanceDir()));
info.add("dataDir", normalizePath(core.getDataDir()));
info.add("config", core.getConfigResource());
info.add("schema", core.getSchemaResource());
info.add("startTime", new Date(core.getStartTime()));
info.add("uptime", System.currentTimeMillis() - core.getStartTime());
RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
try {
SimpleOrderedMap<Object> indexInfo = LukeRequestHandler.getIndexInfo(searcher.get().getIndexReader());
long size = getIndexSize(core);
indexInfo.add("sizeInBytes", size);
indexInfo.add("size", NumberUtils.readableSize(size));
info.add("index", indexInfo);
} finally {
searcher.decref();
}
} finally {
core.close();
}
}
return info;
}
// in solr/core/src/java/org/apache/solr/handler/SnapShooter.java
public void copyFiles(Collection<String> files, File destDir) throws IOException {
for (String indexFile : files) {
File source = new File(solrCore.getIndexDir(), indexFile);
copyFile(source, new File(destDir, source.getName()), true);
}
}
// in solr/core/src/java/org/apache/solr/handler/SnapShooter.java
public void copyFile(File source, File destination, boolean preserveFileDate)
throws IOException {
// check source exists
if (!source.exists()) {
String message = "File " + source + " does not exist";
throw new FileNotFoundException(message);
}
// does destinations directory exist ?
if (destination.getParentFile() != null
&& !destination.getParentFile().exists()) {
destination.getParentFile().mkdirs();
}
// make sure we can write to destination
if (destination.exists() && !destination.canWrite()) {
String message = "Unable to open file " + destination + " for writing.";
throw new IOException(message);
}
FileInputStream input = null;
FileOutputStream output = null;
try {
input = new FileInputStream(source);
output = new FileOutputStream(destination);
int count = 0;
int n = 0;
int rcnt = 0;
while (-1 != (n = input.read(buffer))) {
output.write(buffer, 0, n);
count += n;
rcnt++;
/***
// reserve every 4.6875 MB
if (rcnt == 150) {
rcnt = 0;
delPolicy.setReserveDuration(indexCommit.getVersion(), reserveTime);
}
***/
}
} finally {
try {
IOUtils.closeQuietly(input);
} finally {
IOUtils.closeQuietly(output);
}
}
if (source.length() != destination.length()) {
String message = "Failed to copy full contents from " + source + " to "
+ destination;
throw new IOException(message);
}
if (preserveFileDate) {
// file copy should preserve file date
destination.setLastModified(source.lastModified());
}
}
// in solr/core/src/java/org/apache/solr/analysis/SynonymFilterFactory.java
private SynonymMap loadSolrSynonyms(ResourceLoader loader, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
final boolean expand = getBoolean("expand", true);
String synonyms = args.get("synonyms");
if (synonyms == null)
throw new InitializationException("Missing required argument 'synonyms'.");
CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT);
SolrSynonymParser parser = new SolrSynonymParser(dedup, expand, analyzer);
File synonymFile = new File(synonyms);
if (synonymFile.exists()) {
decoder.reset();
parser.add(new InputStreamReader(loader.openResource(synonyms), decoder));
} else {
List<String> files = StrUtils.splitFileNames(synonyms);
for (String file : files) {
decoder.reset();
parser.add(new InputStreamReader(loader.openResource(file), decoder));
}
}
return parser.build();
}
// in solr/core/src/java/org/apache/solr/analysis/SynonymFilterFactory.java
private SynonymMap loadWordnetSynonyms(ResourceLoader loader, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
final boolean expand = getBoolean("expand", true);
String synonyms = args.get("synonyms");
if (synonyms == null)
throw new InitializationException("Missing required argument 'synonyms'.");
CharsetDecoder decoder = Charset.forName("UTF-8").newDecoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT);
WordnetSynonymParser parser = new WordnetSynonymParser(dedup, expand, analyzer);
File synonymFile = new File(synonyms);
if (synonymFile.exists()) {
decoder.reset();
parser.add(new InputStreamReader(loader.openResource(synonyms), decoder));
} else {
List<String> files = StrUtils.splitFileNames(synonyms);
for (String file : files) {
decoder.reset();
parser.add(new InputStreamReader(loader.openResource(file), decoder));
}
}
return parser.build();
}
// in solr/core/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java
Override
public void reset(Reader input) throws IOException {
try {
super.reset(input);
input = super.input;
char[] buf = new char[32];
int len = input.read(buf);
this.startOfs = correctOffset(0);
this.endOfs = correctOffset(len);
String v = new String(buf, 0, len);
try {
switch (type) {
case INTEGER:
ts.setIntValue(Integer.parseInt(v));
break;
case FLOAT:
ts.setFloatValue(Float.parseFloat(v));
break;
case LONG:
ts.setLongValue(Long.parseLong(v));
break;
case DOUBLE:
ts.setDoubleValue(Double.parseDouble(v));
break;
case DATE:
ts.setLongValue(dateField.parseMath(null, v).getTime());
break;
default:
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field");
}
} catch (NumberFormatException nfe) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Invalid Number: " + v);
}
} catch (IOException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unable to create TrieIndexTokenizer", e);
}
}
// in solr/core/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java
Override
public void close() throws IOException {
super.close();
ts.close();
}
// in solr/core/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java
Override
public void reset() throws IOException {
super.reset();
ts.reset();
}
// in solr/core/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java
Override
public boolean incrementToken() throws IOException {
if (ts.incrementToken()) {
ofsAtt.setOffset(startOfs, endOfs);
return true;
}
return false;
}
// in solr/core/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java
Override
public void end() throws IOException {
ts.end();
ofsAtt.setOffset(endOfs, endOfs);
}
// in solr/core/src/java/org/apache/solr/analysis/ReversedWildcardFilter.java
Override
public boolean incrementToken() throws IOException {
if( save != null ) {
// clearAttributes(); // not currently necessary
restoreState(save);
save = null;
return true;
}
if (!input.incrementToken()) return false;
// pass through zero-length terms
int oldLen = termAtt.length();
if (oldLen ==0) return true;
int origOffset = posAtt.getPositionIncrement();
if (withOriginal == true){
posAtt.setPositionIncrement(0);
save = captureState();
}
char [] buffer = termAtt.resizeBuffer(oldLen + 1);
buffer[oldLen] = markerChar;
reverse(buffer, 0, oldLen + 1);
posAtt.setPositionIncrement(origOffset);
termAtt.copyBuffer(buffer, 0, oldLen +1);
return true;
}
// in solr/core/src/java/org/apache/solr/analysis/TokenizerChain.java
Override
protected void reset(Reader reader) throws IOException {
// the tokenizers are currently reset by the indexing process, so only
// the tokenizer needs to be reset.
Reader r = initReader(reader);
super.reset(r);
}
// in solr/core/src/java/org/apache/solr/analysis/LegacyHTMLStripCharFilter.java
public static void main(String[] args) throws IOException {
Reader in = new LegacyHTMLStripCharFilter(
CharReader.get(new InputStreamReader(System.in)));
int ch;
while ( (ch=in.read()) != -1 ) System.out.print((char)ch);
}
// in solr/core/src/java/org/apache/solr/analysis/LegacyHTMLStripCharFilter.java
private int next() throws IOException {
int len = pushed.length();
if (len>0) {
int ch = pushed.charAt(len-1);
pushed.setLength(len-1);
return ch;
}
numRead++;
return input.read();
}
// in solr/core/src/java/org/apache/solr/analysis/LegacyHTMLStripCharFilter.java
private int nextSkipWS() throws IOException {
int ch=next();
while(isSpace(ch)) ch=next();
return ch;
}
// in solr/core/src/java/org/apache/solr/analysis/LegacyHTMLStripCharFilter.java
private int peek() throws IOException {
int len = pushed.length();
if (len>0) {
return pushed.charAt(len-1);
}
numRead++;
int ch = input.read();
push(ch);
return ch;
}
// in solr/core/src/java/org/apache/solr/analysis/LegacyHTMLStripCharFilter.java
private void saveState() throws IOException {
lastMark = numRead;
input.mark(readAheadLimit);
}
// in solr/core/src/java/org/apache/solr/analysis/LegacyHTMLStripCharFilter.java
private void restoreState() throws IOException {
input.reset();
pushed.setLength(0);
}
// in solr/core/src/java/org/apache/solr/analysis/LegacyHTMLStripCharFilter.java
private int readNumericEntity() throws IOException {
// "&#" has already been read at this point
int eaten = 2;
// is this decimal, hex, or nothing at all.
int ch = next();
int base=10;
boolean invalid=false;
sb.setLength(0);
if (isDigit(ch)) {
// decimal character entity
sb.append((char)ch);
for (int i=0; i<10; i++) {
ch = next();
if (isDigit(ch)) {
sb.append((char)ch);
} else {
break;
}
}
} else if (ch=='x') {
eaten++;
// hex character entity
base=16;
sb.setLength(0);
for (int i=0; i<10; i++) {
ch = next();
if (isHex(ch)) {
sb.append((char)ch);
} else {
break;
}
}
} else {
return MISMATCH;
}
// In older HTML, an entity may not have always been terminated
// with a semicolon. We'll also treat EOF or whitespace as terminating
// the entity.
try {
if (ch==';' || ch==-1) {
// do not account for the eaten ";" due to the fact that we do output a char
numWhitespace = sb.length() + eaten;
return Integer.parseInt(sb.toString(), base);
}
// if whitespace terminated the entity, we need to return
// that whitespace on the next call to read().
if (isSpace(ch)) {
push(ch);
numWhitespace = sb.length() + eaten;
return Integer.parseInt(sb.toString(), base);
}
} catch (NumberFormatException e) {
return MISMATCH;
}
// Not an entity...
return MISMATCH;
}
// in solr/core/src/java/org/apache/solr/analysis/LegacyHTMLStripCharFilter.java
private int readEntity() throws IOException {
int ch = next();
if (ch=='#') return readNumericEntity();
//read an entity reference
// for an entity reference, require the ';' for safety.
// otherwise we may try and convert part of some company
// names to an entity. "Alpha&Beta Corp" for instance.
//
// TODO: perhaps I should special case some of the
// more common ones like & to make the ';' optional...
sb.setLength(0);
sb.append((char)ch);
for (int i=0; i< safeReadAheadLimit; i++) {
ch=next();
if (Character.isLetter(ch)) {
sb.append((char)ch);
} else {
break;
}
}
if (ch==';') {
String entity=sb.toString();
Character entityChar = entityTable.get(entity);
if (entityChar!=null) {
numWhitespace = entity.length() + 1 ;
return entityChar.charValue();
}
}
return MISMATCH;
}
// in solr/core/src/java/org/apache/solr/analysis/LegacyHTMLStripCharFilter.java
private int readBang(boolean inScript) throws IOException {
// at this point, "<!" has been read
int ret = readComment(inScript);
if (ret==MATCH) return MATCH;
if ((numRead - lastMark) < safeReadAheadLimit || peek() == '>' ) {
int ch = next();
if (ch=='>') return MATCH;
// if it starts with <! and isn't a comment,
// simply read until ">"
//since we did readComment already, it may be the case that we are already deep into the read ahead buffer
//so, we may need to abort sooner
while ((numRead - lastMark) < safeReadAheadLimit) {
ch = next();
if (ch=='>') {
return MATCH;
}
else if (ch<0) {
return MISMATCH;
}
}
}
return MISMATCH;
}
// in solr/core/src/java/org/apache/solr/analysis/LegacyHTMLStripCharFilter.java
private int readComment(boolean inScript) throws IOException {
// at this point "<!" has been read
int ch = next();
if (ch!='-') {
// not a comment
push(ch);
return MISMATCH;
}
ch = next();
if (ch!='-') {
// not a comment
push(ch);
push('-');
return MISMATCH;
}
/*two extra calls to next() here, so make sure we don't read past our mark*/
while ((numRead - lastMark) < safeReadAheadLimit -3 ) {
ch = next();
if (ch<0) return MISMATCH;
if (ch=='-') {
ch = next();
if (ch<0) return MISMATCH;
if (ch!='-') {
push(ch);
continue;
}
ch = next();
if (ch<0) return MISMATCH;
if (ch!='>') {
push(ch);
push('-');
continue;
}
return MATCH;
} else if ((ch=='\'' || ch=='"') && inScript) {
push(ch);
int ret=readScriptString();
// if this wasn't a string, there's not much we can do
// at this point without having a stack of stream states in
// order to "undo" just the latest.
} else if (ch=='<') {
eatSSI();
}
}
return MISMATCH;
}
// in solr/core/src/java/org/apache/solr/analysis/LegacyHTMLStripCharFilter.java
private int readTag() throws IOException {
// at this point '<' has already been read
int ch = next();
if (!isAlpha(ch)) {
push(ch);
return MISMATCH;
}
sb.setLength(0);
sb.append((char)ch);
while((numRead - lastMark) < safeReadAheadLimit) {
ch = next();
if (isIdChar(ch)) {
sb.append((char)ch);
} else if (ch=='/') {
// Hmmm, a tag can close with "/>" as well as "/ >"
// read end tag '/>' or '/ >', etc
return nextSkipWS()=='>' ? MATCH : MISMATCH;
} else {
break;
}
}
if (escapedTags!=null && escapedTags.contains(sb.toString())){
//if this is a reservedTag, then keep it
return MISMATCH;
}
// After the tag id, there needs to be either whitespace or
// '>'
if ( !(ch=='>' || isSpace(ch)) ) {
return MISMATCH;
}
if (ch!='>') {
// process attributes
while ((numRead - lastMark) < safeReadAheadLimit) {
ch=next();
if (isSpace(ch)) {
continue;
} else if (isFirstIdChar(ch)) {
push(ch);
int ret = readAttr2();
if (ret==MISMATCH) return ret;
} else if (ch=='/') {
// read end tag '/>' or '/ >', etc
return nextSkipWS()=='>' ? MATCH : MISMATCH;
} else if (ch=='>') {
break;
} else {
return MISMATCH;
}
}
if ((numRead - lastMark) >= safeReadAheadLimit){
return MISMATCH;//exit out if we exceeded the buffer
}
}
// We only get to this point after we have read the
// entire tag. Now let's see if it's a special tag.
String name=sb.toString();
if (name.equalsIgnoreCase("script") || name.equalsIgnoreCase("style")) {
// The content of script and style elements is
// CDATA in HTML 4 but PCDATA in XHTML.
/* From HTML4:
Although the STYLE and SCRIPT elements use CDATA for their data model,
for these elements, CDATA must be handled differently by user agents.
Markup and entities must be treated as raw text and passed to the application
as is. The first occurrence of the character sequence "</" (end-tag open
delimiter) is treated as terminating the end of the element's content. In
valid documents, this would be the end tag for the element.
*/
// discard everything until endtag is hit (except
// if it occurs in a comment.
// reset the stream mark to here, since we know that we sucessfully matched
// a tag, and if we can't find the end tag, this is where we will want
// to roll back to.
saveState();
pushed.setLength(0);
return findEndTag();
}
return MATCH;
}
// in solr/core/src/java/org/apache/solr/analysis/LegacyHTMLStripCharFilter.java
int findEndTag() throws IOException {
while ((numRead - lastMark) < safeReadAheadLimit) {
int ch = next();
if (ch=='<') {
ch = next();
// skip looking for end-tag in comments
if (ch=='!') {
int ret = readBang(true);
if (ret==MATCH) continue;
// yikes... what now? It wasn't a comment, but I can't get
// back to the state I was at. Just continue from where I
// am I guess...
continue;
}
// did we match "</"
if (ch!='/') {
push(ch);
continue;
}
int ret = readName(false);
if (ret==MISMATCH) return MISMATCH;
ch=nextSkipWS();
if (ch!='>') return MISMATCH;
return MATCH;
} else if (ch=='\'' || ch=='"') {
// read javascript string to avoid a false match.
push(ch);
int ret = readScriptString();
// what to do about a non-match (non-terminated string?)
// play it safe and index the rest of the data I guess...
if (ret==MISMATCH) return MISMATCH;
} else if (ch<0) {
return MISMATCH;
}
}
return MISMATCH;
}
// in solr/core/src/java/org/apache/solr/analysis/LegacyHTMLStripCharFilter.java
private int readScriptString() throws IOException {
int quoteChar = next();
if (quoteChar!='\'' && quoteChar!='"') return MISMATCH;
while((numRead - lastMark) < safeReadAheadLimit) {
int ch = next();
if (ch==quoteChar) return MATCH;
else if (ch=='\\') {
ch=next();
} else if (ch<0) {
return MISMATCH;
} else if (ch=='<') {
eatSSI();
}
}
return MISMATCH;
}
// in solr/core/src/java/org/apache/solr/analysis/LegacyHTMLStripCharFilter.java
private int readName(boolean checkEscaped) throws IOException {
StringBuilder builder = (checkEscaped && escapedTags!=null) ? new StringBuilder() : null;
int ch = next();
if (builder!=null) builder.append((char)ch);
if (!isFirstIdChar(ch)) return MISMATCH;
ch = next();
if (builder!=null) builder.append((char)ch);
while(isIdChar(ch)) {
ch=next();
if (builder!=null) builder.append((char)ch);
}
if (ch!=-1) {
push(ch);
}
//strip off the trailing >
if (builder!=null && escapedTags.contains(builder.substring(0, builder.length() - 1))){
return MISMATCH;
}
return MATCH;
}
// in solr/core/src/java/org/apache/solr/analysis/LegacyHTMLStripCharFilter.java
private int readAttr2() throws IOException {
if ((numRead - lastMark < safeReadAheadLimit)) {
int ch = next();
if (!isFirstIdChar(ch)) return MISMATCH;
ch = next();
while(isIdChar(ch) && ((numRead - lastMark) < safeReadAheadLimit)){
ch=next();
}
if (isSpace(ch)) ch = nextSkipWS();
// attributes may not have a value at all!
// if (ch != '=') return MISMATCH;
if (ch != '=') {
push(ch);
return MATCH;
}
int quoteChar = nextSkipWS();
if (quoteChar=='"' || quoteChar=='\'') {
while ((numRead - lastMark) < safeReadAheadLimit) {
ch = next();
if (ch<0) return MISMATCH;
else if (ch=='<') {
eatSSI();
}
else if (ch==quoteChar) {
return MATCH;
//} else if (ch=='<') {
// return MISMATCH;
}
}
} else {
// unquoted attribute
while ((numRead - lastMark) < safeReadAheadLimit) {
ch = next();
if (ch<0) return MISMATCH;
else if (isSpace(ch)) {
push(ch);
return MATCH;
} else if (ch=='>') {
push(ch);
return MATCH;
} else if (ch=='<') {
eatSSI();
}
}
}
}
return MISMATCH;
}
// in solr/core/src/java/org/apache/solr/analysis/LegacyHTMLStripCharFilter.java
private int eatSSI() throws IOException {
// at this point, only a "<" was read.
// on a mismatch, push back the last char so that if it was
// a quote that closes the attribute, it will be re-read and matched.
int ch = next();
if (ch!='!') {
push(ch);
return MISMATCH;
}
ch=next();
if (ch!='-') {
push(ch);
return MISMATCH;
}
ch=next();
if (ch!='-') {
push(ch);
return MISMATCH;
}
ch=next();
if (ch!='#') {
push(ch);
return MISMATCH;
}
push('#'); push('-'); push('-');
return readComment(false);
}
// in solr/core/src/java/org/apache/solr/analysis/LegacyHTMLStripCharFilter.java
private int readProcessingInstruction() throws IOException {
// "<?" has already been read
while ((numRead - lastMark) < safeReadAheadLimit) {
int ch = next();
if (ch=='?' && peek()=='>') {
next();
return MATCH;
} else if (ch==-1) {
return MISMATCH;
}
}
return MISMATCH;
}
// in solr/core/src/java/org/apache/solr/analysis/LegacyHTMLStripCharFilter.java
Override
public int read() throws IOException {
// TODO: Do we ever want to preserve CDATA sections?
// where do we have to worry about them?
// <![ CDATA [ unescaped markup ]]>
if (numWhitespace > 0){
numEaten += numWhitespace;
addOffCorrectMap(numReturned, numEaten);
numWhitespace = 0;
}
numReturned++;
//do not limit this one by the READAHEAD
while(true) {
int lastNumRead = numRead;
int ch = next();
switch (ch) {
case '&':
saveState();
ch = readEntity();
if (ch>=0) return ch;
if (ch==MISMATCH) {
restoreState();
return '&';
}
break;
case '<':
saveState();
ch = next();
int ret = MISMATCH;
if (ch=='!') {
ret = readBang(false);
} else if (ch=='/') {
ret = readName(true);
if (ret==MATCH) {
ch=nextSkipWS();
ret= ch=='>' ? MATCH : MISMATCH;
}
} else if (isAlpha(ch)) {
push(ch);
ret = readTag();
} else if (ch=='?') {
ret = readProcessingInstruction();
}
// matched something to be discarded, so break
// from this case and continue in the loop
if (ret==MATCH) {
//break;//was
//return whitespace from
numWhitespace = (numRead - lastNumRead) - 1;//tack on the -1 since we are returning a space right now
return ' ';
}
// didn't match any HTML constructs, so roll back
// the stream state and just return '<'
restoreState();
return '<';
default: return ch;
}
}
}
// in solr/core/src/java/org/apache/solr/analysis/LegacyHTMLStripCharFilter.java
Override
public int read(char cbuf[], int off, int len) throws IOException {
int i=0;
for (i=0; i<len; i++) {
int ch = read();
if (ch==-1) break;
cbuf[off++] = (char)ch;
}
if (i==0) {
if (len==0) return 0;
return -1;
}
return i;
}
// in solr/core/src/java/org/apache/solr/analysis/LegacyHTMLStripCharFilter.java
Override
public void close() throws IOException {
input.close();
}
// in solr/core/src/java/org/apache/solr/client/solrj/embedded/EmbeddedSolrServer.java
Override
public NamedList<Object> request(SolrRequest request) throws SolrServerException, IOException
{
String path = request.getPath();
if( path == null || !path.startsWith( "/" ) ) {
path = "/select";
}
// Check for cores action
SolrCore core = coreContainer.getCore( coreName );
if( core == null ) {
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
"No such core: " + coreName );
}
SolrParams params = request.getParams();
if( params == null ) {
params = new ModifiableSolrParams();
}
// Extract the handler from the path or params
SolrRequestHandler handler = core.getRequestHandler( path );
if( handler == null ) {
if( "/select".equals( path ) || "/select/".equalsIgnoreCase( path) ) {
String qt = params.get( CommonParams.QT );
handler = core.getRequestHandler( qt );
if( handler == null ) {
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "unknown handler: "+qt);
}
}
// Perhaps the path is to manage the cores
if( handler == null &&
coreContainer != null &&
path.equals( coreContainer.getAdminPath() ) ) {
handler = coreContainer.getMultiCoreHandler();
}
}
if( handler == null ) {
core.close();
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "unknown handler: "+path );
}
SolrQueryRequest req = null;
try {
req = _parser.buildRequestFrom( core, params, request.getContentStreams() );
req.getContext().put( "path", path );
SolrQueryResponse rsp = new SolrQueryResponse();
SolrRequestInfo.setRequestInfo(new SolrRequestInfo(req, rsp));
core.execute( handler, req, rsp );
if( rsp.getException() != null ) {
if(rsp.getException() instanceof SolrException) {
throw rsp.getException();
}
throw new SolrServerException( rsp.getException() );
}
// Check if this should stream results
if( request.getStreamingResponseCallback() != null ) {
try {
final StreamingResponseCallback callback = request.getStreamingResponseCallback();
BinaryResponseWriter.Resolver resolver =
new BinaryResponseWriter.Resolver( req, rsp.getReturnFields())
{
@Override
public void writeResults(ResultContext ctx, JavaBinCodec codec) throws IOException {
// write an empty list...
SolrDocumentList docs = new SolrDocumentList();
docs.setNumFound( ctx.docs.matches() );
docs.setStart( ctx.docs.offset() );
docs.setMaxScore( ctx.docs.maxScore() );
codec.writeSolrDocumentList( docs );
// This will transform
writeResultsBody( ctx, codec );
}
};
ByteArrayOutputStream out = new ByteArrayOutputStream();
new JavaBinCodec(resolver) {
@Override
public void writeSolrDocument(SolrDocument doc) throws IOException {
callback.streamSolrDocument( doc );
//super.writeSolrDocument( doc, fields );
}
@Override
public void writeSolrDocumentList(SolrDocumentList docs) throws IOException {
if( docs.size() > 0 ) {
SolrDocumentList tmp = new SolrDocumentList();
tmp.setMaxScore( docs.getMaxScore() );
tmp.setNumFound( docs.getNumFound() );
tmp.setStart( docs.getStart() );
docs = tmp;
}
callback.streamDocListInfo( docs.getNumFound(), docs.getStart(), docs.getMaxScore() );
super.writeSolrDocumentList(docs);
}
}.marshal(rsp.getValues(), out);
InputStream in = new ByteArrayInputStream(out.toByteArray());
return (NamedList<Object>) new JavaBinCodec(resolver).unmarshal(in);
}
catch (Exception ex) {
throw new RuntimeException(ex);
}
}
// Now write it out
NamedList<Object> normalized = getParsedResponse(req, rsp);
return normalized;
}
catch( IOException iox ) {
throw iox;
}
catch( SolrException sx ) {
throw sx;
}
catch( Exception ex ) {
throw new SolrServerException( ex );
}
finally {
if (req != null) req.close();
core.close();
SolrRequestInfo.clearRequestInfo();
}
}
// in solr/core/src/java/org/apache/solr/client/solrj/embedded/EmbeddedSolrServer.java
Override
public void writeResults(ResultContext ctx, JavaBinCodec codec) throws IOException {
// write an empty list...
SolrDocumentList docs = new SolrDocumentList();
docs.setNumFound( ctx.docs.matches() );
docs.setStart( ctx.docs.offset() );
docs.setMaxScore( ctx.docs.maxScore() );
codec.writeSolrDocumentList( docs );
// This will transform
writeResultsBody( ctx, codec );
}
// in solr/core/src/java/org/apache/solr/client/solrj/embedded/EmbeddedSolrServer.java
Override
public void writeSolrDocument(SolrDocument doc) throws IOException {
callback.streamSolrDocument( doc );
//super.writeSolrDocument( doc, fields );
}
// in solr/core/src/java/org/apache/solr/client/solrj/embedded/EmbeddedSolrServer.java
Override
public void writeSolrDocumentList(SolrDocumentList docs) throws IOException {
if( docs.size() > 0 ) {
SolrDocumentList tmp = new SolrDocumentList();
tmp.setMaxScore( docs.getMaxScore() );
tmp.setNumFound( docs.getNumFound() );
tmp.setStart( docs.getStart() );
docs = tmp;
}
callback.streamDocListInfo( docs.getNumFound(), docs.getStart(), docs.getMaxScore() );
super.writeSolrDocumentList(docs);
}
// in solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java
Override
public void service(HttpServletRequest req, HttpServletResponse res)
throws IOException {
res.sendError(404, "Can not find: " + req.getRequestURI());
}
// in solr/core/src/java/org/apache/solr/response/TextResponseWriter.java
public void close() throws IOException {
writer.flushBuffer();
}
// in solr/core/src/java/org/apache/solr/response/TextResponseWriter.java
public void indent() throws IOException {
if (doIndent) indent(level);
}
// in solr/core/src/java/org/apache/solr/response/TextResponseWriter.java
public void indent(int lev) throws IOException {
writer.write(indentChars, 0, Math.min((lev<<1)+1, indentChars.length));
}
// in solr/core/src/java/org/apache/solr/response/TextResponseWriter.java
public final void writeVal(String name, Object val) throws IOException {
// if there get to be enough types, perhaps hashing on the type
// to get a handler might be faster (but types must be exact to do that...)
// go in order of most common to least common
if (val==null) {
writeNull(name);
} else if (val instanceof String) {
writeStr(name, val.toString(), true);
// micro-optimization... using toString() avoids a cast first
} else if (val instanceof IndexableField) {
IndexableField f = (IndexableField)val;
SchemaField sf = schema.getFieldOrNull( f.name() );
if( sf != null ) {
sf.getType().write(this, name, f);
}
else {
writeStr(name, f.stringValue(), true);
}
} else if (val instanceof Number) {
if (val instanceof Integer) {
writeInt(name, val.toString());
} else if (val instanceof Long) {
writeLong(name, val.toString());
} else if (val instanceof Float) {
// we pass the float instead of using toString() because
// it may need special formatting. same for double.
writeFloat(name, ((Float)val).floatValue());
} else if (val instanceof Double) {
writeDouble(name, ((Double)val).doubleValue());
} else if (val instanceof Short) {
writeInt(name, val.toString());
} else if (val instanceof Byte) {
writeInt(name, val.toString());
} else {
// default... for debugging only
writeStr(name, val.getClass().getName() + ':' + val.toString(), true);
}
} else if (val instanceof Boolean) {
writeBool(name, val.toString());
} else if (val instanceof Date) {
writeDate(name,(Date)val);
} else if (val instanceof Document) {
SolrDocument doc = toSolrDocument( (Document)val );
DocTransformer transformer = returnFields.getTransformer();
if( transformer != null ) {
TransformContext context = new TransformContext();
context.req = req;
transformer.setContext(context);
transformer.transform(doc, -1);
}
writeSolrDocument(name, doc, returnFields, 0 );
} else if (val instanceof SolrDocument) {
writeSolrDocument(name, (SolrDocument)val, returnFields, 0);
} else if (val instanceof ResultContext) {
// requires access to IndexReader
writeDocuments(name, (ResultContext)val, returnFields);
} else if (val instanceof DocList) {
// Should not happen normally
ResultContext ctx = new ResultContext();
ctx.docs = (DocList)val;
writeDocuments(name, ctx, returnFields);
// }
// else if (val instanceof DocSet) {
// how do we know what fields to read?
// todo: have a DocList/DocSet wrapper that
// restricts the fields to write...?
} else if (val instanceof SolrDocumentList) {
writeSolrDocumentList(name, (SolrDocumentList)val, returnFields);
} else if (val instanceof Map) {
writeMap(name, (Map)val, false, true);
} else if (val instanceof NamedList) {
writeNamedList(name, (NamedList)val);
} else if (val instanceof Iterable) {
writeArray(name,((Iterable)val).iterator());
} else if (val instanceof Object[]) {
writeArray(name,(Object[])val);
} else if (val instanceof Iterator) {
writeArray(name,(Iterator)val);
} else if (val instanceof byte[]) {
byte[] arr = (byte[])val;
writeByteArr(name, arr, 0, arr.length);
} else if (val instanceof BytesRef) {
BytesRef arr = (BytesRef)val;
writeByteArr(name, arr.bytes, arr.offset, arr.length);
} else {
// default... for debugging only
writeStr(name, val.getClass().getName() + ':' + val.toString(), true);
}
}
// in solr/core/src/java/org/apache/solr/response/TextResponseWriter.java
public final void writeSolrDocumentList(String name, SolrDocumentList docs, ReturnFields returnFields) throws IOException
{
writeStartDocumentList(name, docs.getStart(), docs.size(), docs.getNumFound(), docs.getMaxScore() );
for( int i=0; i<docs.size(); i++ ) {
writeSolrDocument( null, docs.get(i), returnFields, i );
}
writeEndDocumentList();
}
// in solr/core/src/java/org/apache/solr/response/TextResponseWriter.java
public final void writeDocuments(String name, ResultContext res, ReturnFields fields ) throws IOException {
DocList ids = res.docs;
TransformContext context = new TransformContext();
context.query = res.query;
context.wantsScores = fields.wantsScore() && ids.hasScores();
context.req = req;
writeStartDocumentList(name, ids.offset(), ids.size(), ids.matches(),
context.wantsScores ? new Float(ids.maxScore()) : null );
DocTransformer transformer = fields.getTransformer();
context.searcher = req.getSearcher();
context.iterator = ids.iterator();
if( transformer != null ) {
transformer.setContext( context );
}
int sz = ids.size();
Set<String> fnames = fields.getLuceneFieldNames();
for (int i=0; i<sz; i++) {
int id = context.iterator.nextDoc();
Document doc = context.searcher.doc(id, fnames);
SolrDocument sdoc = toSolrDocument( doc );
if( transformer != null ) {
transformer.transform( sdoc, id);
}
writeSolrDocument( null, sdoc, returnFields, i );
}
if( transformer != null ) {
transformer.setContext( null );
}
writeEndDocumentList();
}
// in solr/core/src/java/org/apache/solr/response/TextResponseWriter.java
public void writeArray(String name, Object[] val) throws IOException {
writeArray(name, Arrays.asList(val).iterator());
}
// in solr/core/src/java/org/apache/solr/response/TextResponseWriter.java
public void writeInt(String name, int val) throws IOException {
writeInt(name,Integer.toString(val));
}
// in solr/core/src/java/org/apache/solr/response/TextResponseWriter.java
public void writeLong(String name, long val) throws IOException {
writeLong(name,Long.toString(val));
}
// in solr/core/src/java/org/apache/solr/response/TextResponseWriter.java
public void writeBool(String name, boolean val) throws IOException {
writeBool(name,Boolean.toString(val));
}
// in solr/core/src/java/org/apache/solr/response/TextResponseWriter.java
public void writeFloat(String name, float val) throws IOException {
String s = Float.toString(val);
// If it's not a normal number, write the value as a string instead.
// The following test also handles NaN since comparisons are always false.
if (val > Float.NEGATIVE_INFINITY && val < Float.POSITIVE_INFINITY) {
writeFloat(name,s);
} else {
writeStr(name,s,false);
}
}
// in solr/core/src/java/org/apache/solr/response/TextResponseWriter.java
public void writeDouble(String name, double val) throws IOException {
String s = Double.toString(val);
// If it's not a normal number, write the value as a string instead.
// The following test also handles NaN since comparisons are always false.
if (val > Double.NEGATIVE_INFINITY && val < Double.POSITIVE_INFINITY) {
writeDouble(name,s);
} else {
writeStr(name,s,false);
}
}
// in solr/core/src/java/org/apache/solr/response/TextResponseWriter.java
public void writeDate(String name, Date val) throws IOException {
writeDate(name, DateField.formatExternal(val));
}
// in solr/core/src/java/org/apache/solr/response/TextResponseWriter.java
public void writeByteArr(String name, byte[] buf, int offset, int len) throws IOException {
writeStr(name, Base64.byteArrayToBase64(buf, offset, len), false);
}
// in solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java
public void write(Writer writer, SolrQueryRequest req, SolrQueryResponse rsp) throws IOException {
JSONWriter w = new JSONWriter(writer, req, rsp);
try {
w.writeResponse();
} finally {
w.close();
}
}
// in solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java
public void writeResponse() throws IOException {
if(wrapperFunction!=null) {
writer.write(wrapperFunction + "(");
}
Boolean omitHeader = req.getParams().getBool(CommonParams.OMIT_HEADER);
if(omitHeader != null && omitHeader) rsp.getValues().remove("responseHeader");
writeNamedList(null, rsp.getValues());
if(wrapperFunction!=null) {
writer.write(')');
}
if (doIndent) writer.write('\n'); // ending with a newline looks much better from the command line
}
// in solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java
protected void writeKey(String fname, boolean needsEscaping) throws IOException {
writeStr(null, fname, needsEscaping);
writer.write(':');
}
// in solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java
protected void writeNamedListAsMapMangled(String name, NamedList val) throws IOException {
int sz = val.size();
writeMapOpener(sz);
incLevel();
// In JSON objects (maps) we can't have null keys or duplicates...
// map null to "" and append a qualifier to duplicates.
//
// a=123,a=456 will be mapped to {a=1,a__1=456}
// Disad: this is ambiguous since a real key could be called a__1
//
// Another possible mapping could aggregate multiple keys to an array:
// a=123,a=456 maps to a=[123,456]
// Disad: this is ambiguous with a real single value that happens to be an array
//
// Both of these mappings have ambiguities.
HashMap<String,Integer> repeats = new HashMap<String,Integer>(4);
boolean first=true;
for (int i=0; i<sz; i++) {
String key = val.getName(i);
if (key==null) key="";
if (first) {
first=false;
repeats.put(key,0);
} else {
writeMapSeparator();
Integer repeatCount = repeats.get(key);
if (repeatCount==null) {
repeats.put(key,0);
} else {
String newKey = key;
int newCount = repeatCount;
do { // avoid generated key clashing with a real key
newKey = key + ' ' + (++newCount);
repeatCount = repeats.get(newKey);
} while (repeatCount != null);
repeats.put(key,newCount);
key = newKey;
}
}
indent();
writeKey(key, true);
writeVal(key,val.getVal(i));
}
decLevel();
writeMapCloser();
}
// in solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java
protected void writeNamedListAsMapWithDups(String name, NamedList val) throws IOException {
int sz = val.size();
writeMapOpener(sz);
incLevel();
for (int i=0; i<sz; i++) {
if (i!=0) {
writeMapSeparator();
}
String key = val.getName(i);
if (key==null) key="";
indent();
writeKey(key, true);
writeVal(key,val.getVal(i));
}
decLevel();
writeMapCloser();
}
// in solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java
protected void writeNamedListAsArrMap(String name, NamedList val) throws IOException {
int sz = val.size();
indent();
writeArrayOpener(sz);
incLevel();
boolean first=true;
for (int i=0; i<sz; i++) {
String key = val.getName(i);
if (first) {
first=false;
} else {
writeArraySeparator();
}
indent();
if (key==null) {
writeVal(null,val.getVal(i));
} else {
writeMapOpener(1);
writeKey(key, true);
writeVal(key,val.getVal(i));
writeMapCloser();
}
}
decLevel();
writeArrayCloser();
}
// in solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java
protected void writeNamedListAsArrArr(String name, NamedList val) throws IOException {
int sz = val.size();
indent();
writeArrayOpener(sz);
incLevel();
boolean first=true;
for (int i=0; i<sz; i++) {
String key = val.getName(i);
if (first) {
first=false;
} else {
writeArraySeparator();
}
indent();
/*** if key is null, just write value???
if (key==null) {
writeVal(null,val.getVal(i));
} else {
***/
writeArrayOpener(1);
incLevel();
if (key==null) {
writeNull(null);
} else {
writeStr(null, key, true);
}
writeArraySeparator();
writeVal(key,val.getVal(i));
decLevel();
writeArrayCloser();
}
decLevel();
writeArrayCloser();
}
// in solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java
protected void writeNamedListAsFlat(String name, NamedList val) throws IOException {
int sz = val.size();
writeArrayOpener(sz);
incLevel();
for (int i=0; i<sz; i++) {
if (i!=0) {
writeArraySeparator();
}
String key = val.getName(i);
indent();
if (key==null) {
writeNull(null);
} else {
writeStr(null, key, true);
}
writeArraySeparator();
writeVal(key, val.getVal(i));
}
decLevel();
writeArrayCloser();
}
// in solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java
Override
public void writeNamedList(String name, NamedList val) throws IOException {
if (val instanceof SimpleOrderedMap) {
writeNamedListAsMapWithDups(name,val);
} else if (namedListStyle==JSON_NL_FLAT) {
writeNamedListAsFlat(name,val);
} else if (namedListStyle==JSON_NL_MAP){
writeNamedListAsMapWithDups(name,val);
} else if (namedListStyle==JSON_NL_ARROFARR) {
writeNamedListAsArrArr(name,val);
} else if (namedListStyle==JSON_NL_ARROFMAP) {
writeNamedListAsArrMap(name,val);
}
}
// in solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java
Override
public void writeSolrDocument(String name, SolrDocument doc, ReturnFields returnFields, int idx) throws IOException {
if( idx > 0 ) {
writeArraySeparator();
}
indent();
writeMapOpener(doc.size());
incLevel();
boolean first=true;
for (String fname : doc.getFieldNames()) {
if (!returnFields.wantsField(fname)) {
continue;
}
if (first) {
first=false;
}
else {
writeMapSeparator();
}
indent();
writeKey(fname, true);
Object val = doc.getFieldValue(fname);
if (val instanceof Collection) {
writeVal(fname, val);
} else {
// if multivalued field, write single value as an array
SchemaField sf = schema.getFieldOrNull(fname);
if (sf != null && sf.multiValued()) {
writeArrayOpener(-1); // no trivial way to determine array size
writeVal(fname, val);
writeArrayCloser();
} else {
writeVal(fname, val);
}
}
}
decLevel();
writeMapCloser();
}
// in solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java
Override
public void writeStartDocumentList(String name,
long start, int size, long numFound, Float maxScore) throws IOException
{
writeMapOpener((maxScore==null) ? 3 : 4);
incLevel();
writeKey("numFound",false);
writeLong(null,numFound);
writeMapSeparator();
writeKey("start",false);
writeLong(null,start);
if (maxScore!=null) {
writeMapSeparator();
writeKey("maxScore",false);
writeFloat(null,maxScore);
}
writeMapSeparator();
// indent();
writeKey("docs",false);
writeArrayOpener(size);
incLevel();
}
// in solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java
Override
public void writeEndDocumentList() throws IOException
{
decLevel();
writeArrayCloser();
decLevel();
indent();
writeMapCloser();
}
// in solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java
public void writeMapOpener(int size) throws IOException, IllegalArgumentException {
writer.write('{');
}
// in solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java
public void writeMapSeparator() throws IOException {
writer.write(',');
}
// in solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java
public void writeMapCloser() throws IOException {
writer.write('}');
}
// in solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java
public void writeArrayOpener(int size) throws IOException, IllegalArgumentException {
writer.write('[');
}
// in solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java
public void writeArraySeparator() throws IOException {
writer.write(',');
}
// in solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java
public void writeArrayCloser() throws IOException {
writer.write(']');
}
// in solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java
Override
public void writeStr(String name, String val, boolean needsEscaping) throws IOException {
// it might be more efficient to use a stringbuilder or write substrings
// if writing chars to the stream is slow.
if (needsEscaping) {
/* http://www.ietf.org/internet-drafts/draft-crockford-jsonorg-json-04.txt
All Unicode characters may be placed within
the quotation marks except for the characters which must be
escaped: quotation mark, reverse solidus, and the control
characters (U+0000 through U+001F).
*/
writer.write('"');
for (int i=0; i<val.length(); i++) {
char ch = val.charAt(i);
if ((ch > '#' && ch != '\\' && ch < '\u2028') || ch == ' ') { // fast path
writer.write(ch);
continue;
}
switch(ch) {
case '"':
case '\\':
writer.write('\\');
writer.write(ch);
break;
case '\r': writer.write('\\'); writer.write('r'); break;
case '\n': writer.write('\\'); writer.write('n'); break;
case '\t': writer.write('\\'); writer.write('t'); break;
case '\b': writer.write('\\'); writer.write('b'); break;
case '\f': writer.write('\\'); writer.write('f'); break;
case '\u2028': // fallthrough
case '\u2029':
unicodeEscape(writer,ch);
break;
// case '/':
default: {
if (ch <= 0x1F) {
unicodeEscape(writer,ch);
} else {
writer.write(ch);
}
}
}
}
writer.write('"');
} else {
writer.write('"');
writer.write(val);
writer.write('"');
}
}
// in solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java
Override
public void writeMap(String name, Map val, boolean excludeOuter, boolean isFirstVal) throws IOException {
if (!excludeOuter) {
writeMapOpener(val.size());
incLevel();
isFirstVal=true;
}
boolean doIndent = excludeOuter || val.size() > 1;
for (Map.Entry entry : (Set<Map.Entry>)val.entrySet()) {
Object e = entry.getKey();
String k = e==null ? "" : e.toString();
Object v = entry.getValue();
if (isFirstVal) {
isFirstVal=false;
} else {
writeMapSeparator();
}
if (doIndent) indent();
writeKey(k,true);
writeVal(k,v);
}
if (!excludeOuter) {
decLevel();
writeMapCloser();
}
}
// in solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java
Override
public void writeArray(String name, Iterator val) throws IOException {
writeArrayOpener(-1); // no trivial way to determine array size
incLevel();
boolean first=true;
while( val.hasNext() ) {
if( !first ) indent();
writeVal(null, val.next());
if( val.hasNext() ) {
writeArraySeparator();
}
first=false;
}
decLevel();
writeArrayCloser();
}
// in solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java
Override
public void writeNull(String name) throws IOException {
writer.write("null");
}
// in solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java
Override
public void writeInt(String name, String val) throws IOException {
writer.write(val);
}
// in solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java
Override
public void writeLong(String name, String val) throws IOException {
writer.write(val);
}
// in solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java
Override
public void writeBool(String name, String val) throws IOException {
writer.write(val);
}
// in solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java
Override
public void writeFloat(String name, String val) throws IOException {
writer.write(val);
}
// in solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java
Override
public void writeDouble(String name, String val) throws IOException {
writer.write(val);
}
// in solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java
Override
public void writeDate(String name, String val) throws IOException {
writeStr(name, val, false);
}
// in solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java
protected static void unicodeEscape(Appendable out, int ch) throws IOException {
out.append('\\');
out.append('u');
out.append(hexdigits[(ch>>>12) ]);
out.append(hexdigits[(ch>>>8) & 0xf]);
out.append(hexdigits[(ch>>>4) & 0xf]);
out.append(hexdigits[(ch) & 0xf]);
}
// in solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java
Override
public void writeFloat(String name, float val) throws IOException {
if (Float.isNaN(val)) {
writer.write(getNaN());
} else if (Float.isInfinite(val)) {
if (val < 0.0f)
writer.write('-');
writer.write(getInf());
} else {
writeFloat(name, Float.toString(val));
}
}
// in solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java
Override
public void writeDouble(String name, double val) throws IOException {
if (Double.isNaN(val)) {
writer.write(getNaN());
} else if (Double.isInfinite(val)) {
if (val < 0.0)
writer.write('-');
writer.write(getInf());
} else {
writeDouble(name, Double.toString(val));
}
}
// in solr/core/src/java/org/apache/solr/response/PHPResponseWriter.java
public void write(Writer writer, SolrQueryRequest req, SolrQueryResponse rsp) throws IOException {
PHPWriter w = new PHPWriter(writer, req, rsp);
try {
w.writeResponse();
} finally {
w.close();
}
}
// in solr/core/src/java/org/apache/solr/response/PHPResponseWriter.java
Override
public void writeNamedList(String name, NamedList val) throws IOException {
writeNamedListAsMapMangled(name,val);
}
// in solr/core/src/java/org/apache/solr/response/PHPResponseWriter.java
Override
public void writeMapOpener(int size) throws IOException {
writer.write("array(");
}
// in solr/core/src/java/org/apache/solr/response/PHPResponseWriter.java
Override
public void writeMapCloser() throws IOException {
writer.write(')');
}
// in solr/core/src/java/org/apache/solr/response/PHPResponseWriter.java
Override
public void writeArrayOpener(int size) throws IOException {
writer.write("array(");
}
// in solr/core/src/java/org/apache/solr/response/PHPResponseWriter.java
Override
public void writeArrayCloser() throws IOException {
writer.write(')');
}
// in solr/core/src/java/org/apache/solr/response/PHPResponseWriter.java
Override
public void writeNull(String name) throws IOException {
writer.write("null");
}
// in solr/core/src/java/org/apache/solr/response/PHPResponseWriter.java
Override
protected void writeKey(String fname, boolean needsEscaping) throws IOException {
writeStr(null, fname, needsEscaping);
writer.write('=');
writer.write('>');
}
// in solr/core/src/java/org/apache/solr/response/PHPResponseWriter.java
Override
public void writeStr(String name, String val, boolean needsEscaping) throws IOException {
if (needsEscaping) {
writer.write('\'');
for (int i=0; i<val.length(); i++) {
char ch = val.charAt(i);
switch (ch) {
case '\'':
case '\\': writer.write('\\'); writer.write(ch); break;
default:
writer.write(ch);
}
}
writer.write('\'');
} else {
writer.write('\'');
writer.write(val);
writer.write('\'');
}
}
// in solr/core/src/java/org/apache/solr/response/CSVResponseWriter.java
public void write(Writer writer, SolrQueryRequest req, SolrQueryResponse rsp) throws IOException {
CSVWriter w = new CSVWriter(writer, req, rsp);
try {
w.writeResponse();
} finally {
w.close();
}
}
// in solr/core/src/java/org/apache/solr/response/CSVResponseWriter.java
public void freeze() throws IOException {
if (cw.size() > 0) {
flush();
result = cw.getInternalBuf();
resultLen = cw.size();
} else {
result = buf;
resultLen = pos;
}
}
// in solr/core/src/java/org/apache/solr/response/CSVResponseWriter.java
public void writeResponse() throws IOException {
SolrParams params = req.getParams();
strategy = new CSVStrategy(',', '"', CSVStrategy.COMMENTS_DISABLED, CSVStrategy.ESCAPE_DISABLED, false, false, false, true);
CSVStrategy strat = strategy;
String sep = params.get(CSV_SEPARATOR);
if (sep!=null) {
if (sep.length()!=1) throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Invalid separator:'"+sep+"'");
strat.setDelimiter(sep.charAt(0));
}
String nl = params.get(CSV_NEWLINE);
if (nl!=null) {
if (nl.length()==0) throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Invalid newline:'"+nl+"'");
strat.setPrinterNewline(nl);
}
String encapsulator = params.get(CSV_ENCAPSULATOR);
String escape = params.get(CSV_ESCAPE);
if (encapsulator!=null) {
if (encapsulator.length()!=1) throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Invalid encapsulator:'"+encapsulator+"'");
strat.setEncapsulator(encapsulator.charAt(0));
}
if (escape!=null) {
if (escape.length()!=1) throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Invalid escape:'"+escape+"'");
strat.setEscape(escape.charAt(0));
if (encapsulator == null) {
strat.setEncapsulator( CSVStrategy.ENCAPSULATOR_DISABLED);
}
}
if (strat.getEscape() == '\\') {
// If the escape is the standard backslash, then also enable
// unicode escapes (it's harmless since 'u' would not otherwise
// be escaped.
strat.setUnicodeEscapeInterpretation(true);
}
printer = new CSVPrinter(writer, strategy);
CSVStrategy mvStrategy = new CSVStrategy(strategy.getDelimiter(), CSVStrategy.ENCAPSULATOR_DISABLED, CSVStrategy.COMMENTS_DISABLED, '\\', false, false, false, false);
strat = mvStrategy;
sep = params.get(MV_SEPARATOR);
if (sep!=null) {
if (sep.length()!=1) throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Invalid mv separator:'"+sep+"'");
strat.setDelimiter(sep.charAt(0));
}
encapsulator = params.get(MV_ENCAPSULATOR);
escape = params.get(MV_ESCAPE);
if (encapsulator!=null) {
if (encapsulator.length()!=1) throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Invalid mv encapsulator:'"+encapsulator+"'");
strat.setEncapsulator(encapsulator.charAt(0));
if (escape == null) {
strat.setEscape(CSVStrategy.ESCAPE_DISABLED);
}
}
escape = params.get(MV_ESCAPE);
if (escape!=null) {
if (escape.length()!=1) throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Invalid mv escape:'"+escape+"'");
strat.setEscape(escape.charAt(0));
// encapsulator will already be disabled if it wasn't specified
}
Collection<String> fields = returnFields.getLuceneFieldNames();
Object responseObj = rsp.getValues().get("response");
boolean returnOnlyStored = false;
if (fields==null) {
if (responseObj instanceof SolrDocumentList) {
// get the list of fields from the SolrDocumentList
fields = new LinkedHashSet<String>();
for (SolrDocument sdoc: (SolrDocumentList)responseObj) {
fields.addAll(sdoc.getFieldNames());
}
} else {
// get the list of fields from the index
fields = req.getSearcher().getFieldNames();
}
if (returnFields.wantsScore()) {
fields.add("score");
} else {
fields.remove("score");
}
returnOnlyStored = true;
}
CSVSharedBufPrinter csvPrinterMV = new CSVSharedBufPrinter(mvWriter, mvStrategy);
for (String field : fields) {
if (!returnFields.wantsField(field)) {
continue;
}
if (field.equals("score")) {
CSVField csvField = new CSVField();
csvField.name = "score";
csvFields.put("score", csvField);
continue;
}
SchemaField sf = schema.getFieldOrNull(field);
if (sf == null) {
FieldType ft = new StrField();
sf = new SchemaField(field, ft);
}
// Return only stored fields, unless an explicit field list is specified
if (returnOnlyStored && sf != null && !sf.stored()) {
continue;
}
// check for per-field overrides
sep = params.get("f." + field + '.' + CSV_SEPARATOR);
encapsulator = params.get("f." + field + '.' + CSV_ENCAPSULATOR);
escape = params.get("f." + field + '.' + CSV_ESCAPE);
CSVSharedBufPrinter csvPrinter = csvPrinterMV;
if (sep != null || encapsulator != null || escape != null) {
// create a new strategy + printer if there were any per-field overrides
strat = (CSVStrategy)mvStrategy.clone();
if (sep!=null) {
if (sep.length()!=1) throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Invalid mv separator:'"+sep+"'");
strat.setDelimiter(sep.charAt(0));
}
if (encapsulator!=null) {
if (encapsulator.length()!=1) throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Invalid mv encapsulator:'"+encapsulator+"'");
strat.setEncapsulator(encapsulator.charAt(0));
if (escape == null) {
strat.setEscape(CSVStrategy.ESCAPE_DISABLED);
}
}
if (escape!=null) {
if (escape.length()!=1) throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Invalid mv escape:'"+escape+"'");
strat.setEscape(escape.charAt(0));
if (encapsulator == null) {
strat.setEncapsulator(CSVStrategy.ENCAPSULATOR_DISABLED);
}
}
csvPrinter = new CSVSharedBufPrinter(mvWriter, strat);
}
CSVField csvField = new CSVField();
csvField.name = field;
csvField.sf = sf;
csvField.mvPrinter = csvPrinter;
csvFields.put(field, csvField);
}
NullValue = params.get(CSV_NULL, "");
if (params.getBool(CSV_HEADER, true)) {
for (CSVField csvField : csvFields.values()) {
printer.print(csvField.name);
}
printer.println();
}
if (responseObj instanceof ResultContext ) {
writeDocuments(null, (ResultContext)responseObj, returnFields );
}
else if (responseObj instanceof DocList) {
ResultContext ctx = new ResultContext();
ctx.docs = (DocList)responseObj;
writeDocuments(null, ctx, returnFields );
} else if (responseObj instanceof SolrDocumentList) {
writeSolrDocumentList(null, (SolrDocumentList)responseObj, returnFields );
}
}
// in solr/core/src/java/org/apache/solr/response/CSVResponseWriter.java
Override
public void close() throws IOException {
if (printer != null) printer.flush();
super.close();
}
// in solr/core/src/java/org/apache/solr/response/CSVResponseWriter.java
Override
public void writeNamedList(String name, NamedList val) throws IOException {
}
// in solr/core/src/java/org/apache/solr/response/CSVResponseWriter.java
public void writeStartDocumentList(String name,
long start, int size, long numFound, Float maxScore) throws IOException
{
// nothing
}
// in solr/core/src/java/org/apache/solr/response/CSVResponseWriter.java
public void writeEndDocumentList() throws IOException
{
// nothing
}
// in solr/core/src/java/org/apache/solr/response/CSVResponseWriter.java
Override
public void writeSolrDocument(String name, SolrDocument doc, ReturnFields returnFields, int idx ) throws IOException {
if (tmpList == null) {
tmpList = new ArrayList(1);
tmpList.add(null);
}
for (CSVField csvField : csvFields.values()) {
Object val = doc.getFieldValue(csvField.name);
int nVals = val instanceof Collection ? ((Collection)val).size() : (val==null ? 0 : 1);
if (nVals == 0) {
writeNull(csvField.name);
continue;
}
if ((csvField.sf != null && csvField.sf.multiValued()) || nVals > 1) {
Collection values;
// normalize to a collection
if (val instanceof Collection) {
values = (Collection)val;
} else {
tmpList.set(0, val);
values = tmpList;
}
mvWriter.reset();
csvField.mvPrinter.reset();
// switch the printer to use the multi-valued one
CSVPrinter tmp = printer;
printer = csvField.mvPrinter;
for (Object fval : values) {
writeVal(csvField.name, fval);
}
printer = tmp; // restore the original printer
mvWriter.freeze();
printer.print(mvWriter.getFrozenBuf(), 0, mvWriter.getFrozenSize(), true);
} else {
// normalize to first value
if (val instanceof Collection) {
Collection values = (Collection)val;
val = values.iterator().next();
}
writeVal(csvField.name, val);
}
}
printer.println();
}
// in solr/core/src/java/org/apache/solr/response/CSVResponseWriter.java
Override
public void writeStr(String name, String val, boolean needsEscaping) throws IOException {
printer.print(val, needsEscaping);
}
// in solr/core/src/java/org/apache/solr/response/CSVResponseWriter.java
Override
public void writeMap(String name, Map val, boolean excludeOuter, boolean isFirstVal) throws IOException {
}
// in solr/core/src/java/org/apache/solr/response/CSVResponseWriter.java
Override
public void writeArray(String name, Iterator val) throws IOException {
}
// in solr/core/src/java/org/apache/solr/response/CSVResponseWriter.java
Override
public void writeNull(String name) throws IOException {
printer.print(NullValue);
}
// in solr/core/src/java/org/apache/solr/response/CSVResponseWriter.java
Override
public void writeInt(String name, String val) throws IOException {
printer.print(val, false);
}
// in solr/core/src/java/org/apache/solr/response/CSVResponseWriter.java
Override
public void writeLong(String name, String val) throws IOException {
printer.print(val, false);
}
// in solr/core/src/java/org/apache/solr/response/CSVResponseWriter.java
Override
public void writeBool(String name, String val) throws IOException {
printer.print(val, false);
}
// in solr/core/src/java/org/apache/solr/response/CSVResponseWriter.java
Override
public void writeFloat(String name, String val) throws IOException {
printer.print(val, false);
}
// in solr/core/src/java/org/apache/solr/response/CSVResponseWriter.java
Override
public void writeDouble(String name, String val) throws IOException {
printer.print(val, false);
}
// in solr/core/src/java/org/apache/solr/response/CSVResponseWriter.java
Override
public void writeDate(String name, Date val) throws IOException {
StringBuilder sb = new StringBuilder(25);
cal = DateUtil.formatDate(val, cal, sb);
writeDate(name, sb.toString());
}
// in solr/core/src/java/org/apache/solr/response/CSVResponseWriter.java
Override
public void writeDate(String name, String val) throws IOException {
printer.print(val, false);
}
// in solr/core/src/java/org/apache/solr/response/PythonResponseWriter.java
public void write(Writer writer, SolrQueryRequest req, SolrQueryResponse rsp) throws IOException {
PythonWriter w = new PythonWriter(writer, req, rsp);
try {
w.writeResponse();
} finally {
w.close();
}
}
// in solr/core/src/java/org/apache/solr/response/PythonResponseWriter.java
Override
public void writeNull(String name) throws IOException {
writer.write("None");
}
// in solr/core/src/java/org/apache/solr/response/PythonResponseWriter.java
Override
public void writeBool(String name, boolean val) throws IOException {
writer.write(val ? "True" : "False");
}
// in solr/core/src/java/org/apache/solr/response/PythonResponseWriter.java
Override
public void writeBool(String name, String val) throws IOException {
writeBool(name,val.charAt(0)=='t');
}
// in solr/core/src/java/org/apache/solr/response/PythonResponseWriter.java
Override
public void writeStr(String name, String val, boolean needsEscaping) throws IOException {
if (!needsEscaping) {
writer.write('\'');
writer.write(val);
writer.write('\'');
return;
}
// use python unicode strings...
// python doesn't tolerate newlines in strings in it's eval(), so we must escape them.
StringBuilder sb = new StringBuilder(val.length());
boolean needUnicode=false;
for (int i=0; i<val.length(); i++) {
char ch = val.charAt(i);
switch(ch) {
case '\'':
case '\\': sb.append('\\'); sb.append(ch); break;
case '\r': sb.append("\\r"); break;
case '\n': sb.append("\\n"); break;
case '\t': sb.append("\\t"); break;
default:
// we don't strictly have to escape these chars, but it will probably increase
// portability to stick to visible ascii
if (ch<' ' || ch>127) {
unicodeEscape(sb, ch);
needUnicode=true;
} else {
sb.append(ch);
}
}
}
if (needUnicode) {
writer.write('u');
}
writer.write('\'');
writer.append(sb);
writer.write('\'');
}
// in solr/core/src/java/org/apache/solr/response/RubyResponseWriter.java
public void write(Writer writer, SolrQueryRequest req, SolrQueryResponse rsp) throws IOException {
RubyWriter w = new RubyWriter(writer, req, rsp);
try {
w.writeResponse();
} finally {
w.close();
}
}
// in solr/core/src/java/org/apache/solr/response/RubyResponseWriter.java
Override
public void writeNull(String name) throws IOException {
writer.write("nil");
}
// in solr/core/src/java/org/apache/solr/response/RubyResponseWriter.java
Override
protected void writeKey(String fname, boolean needsEscaping) throws IOException {
writeStr(null, fname, needsEscaping);
writer.write('=');
writer.write('>');
}
// in solr/core/src/java/org/apache/solr/response/RubyResponseWriter.java
Override
public void writeStr(String name, String val, boolean needsEscaping) throws IOException {
// Ruby doesn't do unicode escapes... so let the servlet container write raw UTF-8
// bytes into the string.
//
// Use single quoted strings for safety since no evaluation is done within them.
// Also, there are very few escapes recognized in a single quoted string, so
// only escape the backslash and single quote.
writer.write('\'');
if (needsEscaping) {
for (int i=0; i<val.length(); i++) {
char ch = val.charAt(i);
if (ch=='\'' || ch=='\\') {
writer.write('\\');
}
writer.write(ch);
}
} else {
writer.write(val);
}
writer.write('\'');
}
// in solr/core/src/java/org/apache/solr/response/XSLTResponseWriter.java
public void write(Writer writer, SolrQueryRequest request, SolrQueryResponse response) throws IOException {
final Transformer t = getTransformer(request);
// capture the output of the XMLWriter
final CharArrayWriter w = new CharArrayWriter();
XMLWriter.writeResponse(w,request,response);
// and write transformed result to our writer
final Reader r = new BufferedReader(new CharArrayReader(w.toCharArray()));
final StreamSource source = new StreamSource(r);
final StreamResult result = new StreamResult(writer);
try {
t.transform(source, result);
} catch(TransformerException te) {
final IOException ioe = new IOException("XSLT transformation error");
ioe.initCause(te);
throw ioe;
}
}
// in solr/core/src/java/org/apache/solr/response/XSLTResponseWriter.java
protected Transformer getTransformer(SolrQueryRequest request) throws IOException {
final String xslt = request.getParams().get(CommonParams.TR,null);
if(xslt==null) {
throw new IOException("'" + CommonParams.TR + "' request parameter is required to use the XSLTResponseWriter");
}
// not the cleanest way to achieve this
SolrConfig solrConfig = request.getCore().getSolrConfig();
// no need to synchronize access to context, right?
// Nothing else happens with it at the same time
final Map<Object,Object> ctx = request.getContext();
Transformer result = (Transformer)ctx.get(CONTEXT_TRANSFORMER_KEY);
if(result==null) {
result = TransformerProvider.instance.getTransformer(solrConfig, xslt,xsltCacheLifetimeSeconds.intValue());
result.setErrorListener(xmllog);
ctx.put(CONTEXT_TRANSFORMER_KEY,result);
}
return result;
}
// in solr/core/src/java/org/apache/solr/response/XMLWriter.java
public static void writeResponse(Writer writer, SolrQueryRequest req, SolrQueryResponse rsp) throws IOException {
XMLWriter xmlWriter = null;
try {
xmlWriter = new XMLWriter(writer, req, rsp);
xmlWriter.writeResponse();
} finally {
xmlWriter.close();
}
}
// in solr/core/src/java/org/apache/solr/response/XMLWriter.java
public void writeResponse() throws IOException {
writer.write(XML_START1);
String stylesheet = req.getParams().get("stylesheet");
if (stylesheet != null && stylesheet.length() > 0) {
writer.write(XML_STYLESHEET);
XML.escapeAttributeValue(stylesheet, writer);
writer.write(XML_STYLESHEET_END);
}
/***
String noSchema = req.getParams().get("noSchema");
// todo - change when schema becomes available?
if (false && noSchema == null)
writer.write(XML_START2_SCHEMA);
else
writer.write(XML_START2_NOSCHEMA);
***/
writer.write(XML_START2_NOSCHEMA);
// dump response values
NamedList<?> lst = rsp.getValues();
Boolean omitHeader = req.getParams().getBool(CommonParams.OMIT_HEADER);
if(omitHeader != null && omitHeader) lst.remove("responseHeader");
int sz = lst.size();
int start=0;
for (int i=start; i<sz; i++) {
writeVal(lst.getName(i),lst.getVal(i));
}
writer.write("\n</response>\n");
}
// in solr/core/src/java/org/apache/solr/response/XMLWriter.java
private void writeAttr(String name, String val) throws IOException {
writeAttr(name, val, true);
}
// in solr/core/src/java/org/apache/solr/response/XMLWriter.java
public void writeAttr(String name, String val, boolean escape) throws IOException{
if (val != null) {
writer.write(' ');
writer.write(name);
writer.write("=\"");
if(escape){
XML.escapeAttributeValue(val, writer);
} else {
writer.write(val);
}
writer.write('"');
}
}
// in solr/core/src/java/org/apache/solr/response/XMLWriter.java
void startTag(String tag, String name, boolean closeTag) throws IOException {
if (doIndent) indent();
writer.write('<');
writer.write(tag);
if (name!=null) {
writeAttr("name", name);
if (closeTag) {
writer.write("/>");
} else {
writer.write(">");
}
} else {
if (closeTag) {
writer.write("/>");
} else {
writer.write('>');
}
}
}
// in solr/core/src/java/org/apache/solr/response/XMLWriter.java
Override
public void writeStartDocumentList(String name,
long start, int size, long numFound, Float maxScore) throws IOException
{
if (doIndent) indent();
writer.write("<result");
writeAttr("name",name);
writeAttr("numFound",Long.toString(numFound));
writeAttr("start",Long.toString(start));
if(maxScore!=null) {
writeAttr("maxScore",Float.toString(maxScore));
}
writer.write(">");
incLevel();
}
// in solr/core/src/java/org/apache/solr/response/XMLWriter.java
Override
public void writeSolrDocument(String name, SolrDocument doc, ReturnFields returnFields, int idx ) throws IOException {
startTag("doc", name, false);
incLevel();
for (String fname : doc.getFieldNames()) {
if (!returnFields.wantsField(fname)) {
continue;
}
Object val = doc.getFieldValue(fname);
if( "_explain_".equals( fname ) ) {
System.out.println( val );
}
writeVal(fname, val);
}
decLevel();
writer.write("</doc>");
}
// in solr/core/src/java/org/apache/solr/response/XMLWriter.java
Override
public void writeEndDocumentList() throws IOException
{
decLevel();
if (doIndent) indent();
writer.write("</result>");
}
// in solr/core/src/java/org/apache/solr/response/XMLWriter.java
Override
public void writeNamedList(String name, NamedList val) throws IOException {
int sz = val.size();
startTag("lst", name, sz<=0);
incLevel();
for (int i=0; i<sz; i++) {
writeVal(val.getName(i),val.getVal(i));
}
decLevel();
if (sz > 0) {
if (doIndent) indent();
writer.write("</lst>");
}
}
// in solr/core/src/java/org/apache/solr/response/XMLWriter.java
Override
public void writeMap(String name, Map map, boolean excludeOuter, boolean isFirstVal) throws IOException {
int sz = map.size();
if (!excludeOuter) {
startTag("lst", name, sz<=0);
incLevel();
}
for (Map.Entry entry : (Set<Map.Entry>)map.entrySet()) {
Object k = entry.getKey();
Object v = entry.getValue();
// if (sz<indentThreshold) indent();
writeVal( null == k ? null : k.toString(), v);
}
if (!excludeOuter) {
decLevel();
if (sz > 0) {
if (doIndent) indent();
writer.write("</lst>");
}
}
}
// in solr/core/src/java/org/apache/solr/response/XMLWriter.java
Override
public void writeArray(String name, Object[] val) throws IOException {
writeArray(name, Arrays.asList(val).iterator());
}
// in solr/core/src/java/org/apache/solr/response/XMLWriter.java
Override
public void writeArray(String name, Iterator iter) throws IOException {
if( iter.hasNext() ) {
startTag("arr", name, false );
incLevel();
while( iter.hasNext() ) {
writeVal(null, iter.next());
}
decLevel();
if (doIndent) indent();
writer.write("</arr>");
}
else {
startTag("arr", name, true );
}
}
// in solr/core/src/java/org/apache/solr/response/XMLWriter.java
Override
public void writeNull(String name) throws IOException {
writePrim("null",name,"",false);
}
// in solr/core/src/java/org/apache/solr/response/XMLWriter.java
Override
public void writeStr(String name, String val, boolean escape) throws IOException {
writePrim("str",name,val,escape);
}
// in solr/core/src/java/org/apache/solr/response/XMLWriter.java
Override
public void writeInt(String name, String val) throws IOException {
writePrim("int",name,val,false);
}
// in solr/core/src/java/org/apache/solr/response/XMLWriter.java
Override
public void writeLong(String name, String val) throws IOException {
writePrim("long",name,val,false);
}
// in solr/core/src/java/org/apache/solr/response/XMLWriter.java
Override
public void writeBool(String name, String val) throws IOException {
writePrim("bool",name,val,false);
}
// in solr/core/src/java/org/apache/solr/response/XMLWriter.java
Override
public void writeFloat(String name, String val) throws IOException {
writePrim("float",name,val,false);
}
// in solr/core/src/java/org/apache/solr/response/XMLWriter.java
Override
public void writeFloat(String name, float val) throws IOException {
writeFloat(name,Float.toString(val));
}
// in solr/core/src/java/org/apache/solr/response/XMLWriter.java
Override
public void writeDouble(String name, String val) throws IOException {
writePrim("double",name,val,false);
}
// in solr/core/src/java/org/apache/solr/response/XMLWriter.java
Override
public void writeDouble(String name, double val) throws IOException {
writeDouble(name,Double.toString(val));
}
// in solr/core/src/java/org/apache/solr/response/XMLWriter.java
Override
public void writeDate(String name, String val) throws IOException {
writePrim("date",name,val,false);
}
// in solr/core/src/java/org/apache/solr/response/XMLWriter.java
private void writePrim(String tag, String name, String val, boolean escape) throws IOException {
int contentLen = val==null ? 0 : val.length();
startTag(tag, name, contentLen==0);
if (contentLen==0) return;
if (escape) {
XML.escapeCharData(val,writer);
} else {
writer.write(val,0,contentLen);
}
writer.write('<');
writer.write('/');
writer.write(tag);
writer.write('>');
}
// in solr/core/src/java/org/apache/solr/response/XMLResponseWriter.java
public void write(Writer writer, SolrQueryRequest req, SolrQueryResponse rsp) throws IOException {
XMLWriter w = new XMLWriter(writer, req, rsp);
try {
w.writeResponse();
} finally {
w.close();
}
}
// in solr/core/src/java/org/apache/solr/response/transform/DocTransformers.java
Override
public void transform(SolrDocument doc, int docid) throws IOException {
for( DocTransformer a : children ) {
a.transform( doc, docid);
}
}
// in solr/core/src/java/org/apache/solr/response/BinaryResponseWriter.java
public void write(OutputStream out, SolrQueryRequest req, SolrQueryResponse response) throws IOException {
Resolver resolver = new Resolver(req, response.getReturnFields());
Boolean omitHeader = req.getParams().getBool(CommonParams.OMIT_HEADER);
if (omitHeader != null && omitHeader) response.getValues().remove("responseHeader");
JavaBinCodec codec = new JavaBinCodec(resolver);
codec.marshal(response.getValues(), out);
}
// in solr/core/src/java/org/apache/solr/response/BinaryResponseWriter.java
public void write(Writer writer, SolrQueryRequest request, SolrQueryResponse response) throws IOException {
throw new RuntimeException("This is a binary writer , Cannot write to a characterstream");
}
// in solr/core/src/java/org/apache/solr/response/BinaryResponseWriter.java
public Object resolve(Object o, JavaBinCodec codec) throws IOException {
if (o instanceof ResultContext) {
writeResults((ResultContext) o, codec);
return null; // null means we completely handled it
}
if (o instanceof DocList) {
ResultContext ctx = new ResultContext();
ctx.docs = (DocList) o;
writeResults(ctx, codec);
return null; // null means we completely handled it
}
if( o instanceof IndexableField ) {
if(schema == null) schema = solrQueryRequest.getSchema();
IndexableField f = (IndexableField)o;
SchemaField sf = schema.getFieldOrNull(f.name());
try {
o = getValue(sf, f);
}
catch (Exception e) {
LOG.warn("Error reading a field : " + o, e);
}
}
if (o instanceof SolrDocument) {
// Remove any fields that were not requested.
// This typically happens when distributed search adds
// extra fields to an internal request
SolrDocument doc = (SolrDocument)o;
Iterator<Map.Entry<String, Object>> i = doc.iterator();
while ( i.hasNext() ) {
String fname = i.next().getKey();
if ( !returnFields.wantsField( fname ) ) {
i.remove();
}
}
return doc;
}
return o;
}
// in solr/core/src/java/org/apache/solr/response/BinaryResponseWriter.java
protected void writeResultsBody( ResultContext res, JavaBinCodec codec ) throws IOException
{
DocList ids = res.docs;
int sz = ids.size();
codec.writeTag(JavaBinCodec.ARR, sz);
if(searcher == null) searcher = solrQueryRequest.getSearcher();
if(schema == null) schema = solrQueryRequest.getSchema();
DocTransformer transformer = returnFields.getTransformer();
TransformContext context = new TransformContext();
context.query = res.query;
context.wantsScores = returnFields.wantsScore() && ids.hasScores();
context.req = solrQueryRequest;
context.searcher = searcher;
if( transformer != null ) {
transformer.setContext( context );
}
Set<String> fnames = returnFields.getLuceneFieldNames();
context.iterator = ids.iterator();
for (int i = 0; i < sz; i++) {
int id = context.iterator.nextDoc();
Document doc = searcher.doc(id, fnames);
SolrDocument sdoc = getDoc(doc);
if( transformer != null ) {
transformer.transform(sdoc, id);
}
codec.writeSolrDocument(sdoc);
}
if( transformer != null ) {
transformer.setContext( null );
}
}
// in solr/core/src/java/org/apache/solr/response/BinaryResponseWriter.java
public void writeResults(ResultContext ctx, JavaBinCodec codec) throws IOException {
codec.writeTag(JavaBinCodec.SOLRDOCLST);
boolean wantsScores = returnFields.wantsScore() && ctx.docs.hasScores();
List l = new ArrayList(3);
l.add((long) ctx.docs.matches());
l.add((long) ctx.docs.offset());
Float maxScore = null;
if (wantsScores) {
maxScore = ctx.docs.maxScore();
}
l.add(maxScore);
codec.writeArray(l);
// this is a seprate function so that streaming responses can use just that part
writeResultsBody( ctx, codec );
}
// in solr/core/src/java/org/apache/solr/response/RawResponseWriter.java
public void write(Writer writer, SolrQueryRequest request, SolrQueryResponse response) throws IOException
{
Object obj = response.getValues().get( CONTENT );
if( obj != null && (obj instanceof ContentStream ) ) {
// copy the contents to the writer...
ContentStream content = (ContentStream)obj;
Reader reader = content.getReader();
try {
IOUtils.copy( reader, writer );
} finally {
reader.close();
}
}
else {
getBaseWriter( request ).write( writer, request, response );
}
}
// in solr/core/src/java/org/apache/solr/response/RawResponseWriter.java
public void write(OutputStream out, SolrQueryRequest request,
SolrQueryResponse response) throws IOException {
Object obj = response.getValues().get( CONTENT );
if( obj != null && (obj instanceof ContentStream ) ) {
// copy the contents to the writer...
ContentStream content = (ContentStream)obj;
java.io.InputStream in = content.getStream();
try {
IOUtils.copy( in, out );
} finally {
in.close();
}
}
else {
//getBaseWriter( request ).write( writer, request, response );
throw new IOException("did not find a CONTENT object");
}
}
// in solr/core/src/java/org/apache/solr/response/PHPSerializedResponseWriter.java
public void write(Writer writer, SolrQueryRequest req, SolrQueryResponse rsp) throws IOException {
PHPSerializedWriter w = new PHPSerializedWriter(writer, req, rsp);
try {
w.writeResponse();
} finally {
w.close();
}
}
// in solr/core/src/java/org/apache/solr/response/PHPSerializedResponseWriter.java
Override
public void writeResponse() throws IOException {
Boolean omitHeader = req.getParams().getBool(CommonParams.OMIT_HEADER);
if(omitHeader != null && omitHeader) rsp.getValues().remove("responseHeader");
writeNamedList(null, rsp.getValues());
}
// in solr/core/src/java/org/apache/solr/response/PHPSerializedResponseWriter.java
Override
public void writeNamedList(String name, NamedList val) throws IOException {
writeNamedListAsMapMangled(name,val);
}
// in solr/core/src/java/org/apache/solr/response/PHPSerializedResponseWriter.java
public void writeStartDocumentList(String name,
long start, int size, long numFound, Float maxScore) throws IOException
{
writeMapOpener((maxScore==null) ? 3 : 4);
writeKey("numFound",false);
writeLong(null,numFound);
writeKey("start",false);
writeLong(null,start);
if (maxScore!=null) {
writeKey("maxScore",false);
writeFloat(null,maxScore);
}
writeKey("docs",false);
writeArrayOpener(size);
}
// in solr/core/src/java/org/apache/solr/response/PHPSerializedResponseWriter.java
public void writeEndDocumentList() throws IOException
{
writeArrayCloser(); // doc list
writeMapCloser();
}
// in solr/core/src/java/org/apache/solr/response/PHPSerializedResponseWriter.java
Override
public void writeSolrDocument(String name, SolrDocument doc, ReturnFields returnFields, int idx) throws IOException
{
writeKey(idx, false);
LinkedHashMap <String,Object> single = new LinkedHashMap<String, Object>();
LinkedHashMap <String,Object> multi = new LinkedHashMap<String, Object>();
for (String fname : doc.getFieldNames()) {
if(!returnFields.wantsField(fname)){
continue;
}
Object val = doc.getFieldValue(fname);
if (val instanceof Collection) {
multi.put(fname, val);
}else{
single.put(fname, val);
}
}
writeMapOpener(single.size() + multi.size());
for(String fname: single.keySet()){
Object val = single.get(fname);
writeKey(fname, true);
writeVal(fname, val);
}
for(String fname: multi.keySet()){
writeKey(fname, true);
Object val = multi.get(fname);
if (!(val instanceof Collection)) {
// should never be reached if multivalued fields are stored as a Collection
// so I'm assuming a size of 1 just to wrap the single value
writeArrayOpener(1);
writeVal(fname, val);
writeArrayCloser();
}else{
writeVal(fname, val);
}
}
writeMapCloser();
}
// in solr/core/src/java/org/apache/solr/response/PHPSerializedResponseWriter.java
Override
public void writeArray(String name, Object[] val) throws IOException {
writeMapOpener(val.length);
for(int i=0; i < val.length; i++) {
writeKey(i, false);
writeVal(String.valueOf(i), val[i]);
}
writeMapCloser();
}
// in solr/core/src/java/org/apache/solr/response/PHPSerializedResponseWriter.java
Override
public void writeArray(String name, Iterator val) throws IOException {
ArrayList vals = new ArrayList();
while( val.hasNext() ) {
vals.add(val.next());
}
writeArray(name, vals.toArray());
}
// in solr/core/src/java/org/apache/solr/response/PHPSerializedResponseWriter.java
Override
public void writeMapOpener(int size) throws IOException, IllegalArgumentException {
// negative size value indicates that something has gone wrong
if (size < 0) {
throw new IllegalArgumentException("Map size must not be negative");
}
writer.write("a:"+size+":{");
}
// in solr/core/src/java/org/apache/solr/response/PHPSerializedResponseWriter.java
Override
public void writeMapSeparator() throws IOException {
/* NOOP */
}
// in solr/core/src/java/org/apache/solr/response/PHPSerializedResponseWriter.java
Override
public void writeMapCloser() throws IOException {
writer.write('}');
}
// in solr/core/src/java/org/apache/solr/response/PHPSerializedResponseWriter.java
Override
public void writeArrayOpener(int size) throws IOException, IllegalArgumentException {
// negative size value indicates that something has gone wrong
if (size < 0) {
throw new IllegalArgumentException("Array size must not be negative");
}
writer.write("a:"+size+":{");
}
// in solr/core/src/java/org/apache/solr/response/PHPSerializedResponseWriter.java
Override
public void writeArraySeparator() throws IOException {
/* NOOP */
}
// in solr/core/src/java/org/apache/solr/response/PHPSerializedResponseWriter.java
Override
public void writeArrayCloser() throws IOException {
writer.write('}');
}
// in solr/core/src/java/org/apache/solr/response/PHPSerializedResponseWriter.java
Override
public void writeNull(String name) throws IOException {
writer.write("N;");
}
// in solr/core/src/java/org/apache/solr/response/PHPSerializedResponseWriter.java
Override
protected void writeKey(String fname, boolean needsEscaping) throws IOException {
writeStr(null, fname, needsEscaping);
}
// in solr/core/src/java/org/apache/solr/response/PHPSerializedResponseWriter.java
void writeKey(int val, boolean needsEscaping) throws IOException {
writeInt(null, String.valueOf(val));
}
// in solr/core/src/java/org/apache/solr/response/PHPSerializedResponseWriter.java
Override
public void writeBool(String name, boolean val) throws IOException {
writer.write(val ? "b:1;" : "b:0;");
}
// in solr/core/src/java/org/apache/solr/response/PHPSerializedResponseWriter.java
Override
public void writeBool(String name, String val) throws IOException {
writeBool(name, val.charAt(0) == 't');
}
// in solr/core/src/java/org/apache/solr/response/PHPSerializedResponseWriter.java
Override
public void writeInt(String name, String val) throws IOException {
writer.write("i:"+val+";");
}
// in solr/core/src/java/org/apache/solr/response/PHPSerializedResponseWriter.java
Override
public void writeLong(String name, String val) throws IOException {
writeInt(name,val);
}
// in solr/core/src/java/org/apache/solr/response/PHPSerializedResponseWriter.java
Override
public void writeFloat(String name, String val) throws IOException {
writeDouble(name,val);
}
// in solr/core/src/java/org/apache/solr/response/PHPSerializedResponseWriter.java
Override
public void writeDouble(String name, String val) throws IOException {
writer.write("d:"+val+";");
}
// in solr/core/src/java/org/apache/solr/response/PHPSerializedResponseWriter.java
Override
public void writeStr(String name, String val, boolean needsEscaping) throws IOException {
// serialized PHP strings don't need to be escaped at all, however the
// string size reported needs be the number of bytes rather than chars.
UnicodeUtil.UTF16toUTF8(val, 0, val.length(), utf8);
int nBytes = utf8.length;
writer.write("s:");
writer.write(Integer.toString(nBytes));
writer.write(":\"");
writer.write(val);
writer.write("\";");
}
// in solr/core/src/java/org/apache/solr/request/PerSegmentSingleValuedFaceting.java
NamedList<Integer> getFacetCounts(Executor executor) throws IOException {
CompletionService<SegFacet> completionService = new ExecutorCompletionService<SegFacet>(executor);
// reuse the translation logic to go from top level set to per-segment set
baseSet = docs.getTopFilter();
final AtomicReaderContext[] leaves = searcher.getTopReaderContext().leaves();
// The list of pending tasks that aren't immediately submitted
// TODO: Is there a completion service, or a delegating executor that can
// limit the number of concurrent tasks submitted to a bigger executor?
LinkedList<Callable<SegFacet>> pending = new LinkedList<Callable<SegFacet>>();
int threads = nThreads <= 0 ? Integer.MAX_VALUE : nThreads;
for (int i=0; i<leaves.length; i++) {
final SegFacet segFacet = new SegFacet(leaves[i]);
Callable<SegFacet> task = new Callable<SegFacet>() {
public SegFacet call() throws Exception {
segFacet.countTerms();
return segFacet;
}
};
// TODO: if limiting threads, submit by largest segment first?
if (--threads >= 0) {
completionService.submit(task);
} else {
pending.add(task);
}
}
// now merge the per-segment results
PriorityQueue<SegFacet> queue = new PriorityQueue<SegFacet>(leaves.length) {
@Override
protected boolean lessThan(SegFacet a, SegFacet b) {
return a.tempBR.compareTo(b.tempBR) < 0;
}
};
boolean hasMissingCount=false;
int missingCount=0;
for (int i=0; i<leaves.length; i++) {
SegFacet seg = null;
try {
Future<SegFacet> future = completionService.take();
seg = future.get();
if (!pending.isEmpty()) {
completionService.submit(pending.removeFirst());
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
} catch (ExecutionException e) {
Throwable cause = e.getCause();
if (cause instanceof RuntimeException) {
throw (RuntimeException)cause;
} else {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error in per-segment faceting on field: " + fieldName, cause);
}
}
if (seg.startTermIndex < seg.endTermIndex) {
if (seg.startTermIndex==0) {
hasMissingCount=true;
missingCount += seg.counts[0];
seg.pos = 1;
} else {
seg.pos = seg.startTermIndex;
}
if (seg.pos < seg.endTermIndex) {
seg.tenum = seg.si.getTermsEnum();
seg.tenum.seekExact(seg.pos);
seg.tempBR = seg.tenum.term();
queue.add(seg);
}
}
}
FacetCollector collector;
if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
collector = new CountSortedFacetCollector(offset, limit, mincount);
} else {
collector = new IndexSortedFacetCollector(offset, limit, mincount);
}
BytesRef val = new BytesRef();
while (queue.size() > 0) {
SegFacet seg = queue.top();
// make a shallow copy
val.bytes = seg.tempBR.bytes;
val.offset = seg.tempBR.offset;
val.length = seg.tempBR.length;
int count = 0;
do {
count += seg.counts[seg.pos - seg.startTermIndex];
// TODO: OPTIMIZATION...
// if mincount>0 then seg.pos++ can skip ahead to the next non-zero entry.
seg.pos++;
if (seg.pos >= seg.endTermIndex) {
queue.pop();
seg = queue.top();
} else {
seg.tempBR = seg.tenum.next();
seg = queue.updateTop();
}
} while (seg != null && val.compareTo(seg.tempBR) == 0);
boolean stop = collector.collect(val, count);
if (stop) break;
}
NamedList<Integer> res = collector.getFacetCounts();
// convert labels to readable form
FieldType ft = searcher.getSchema().getFieldType(fieldName);
int sz = res.size();
for (int i=0; i<sz; i++) {
res.setName(i, ft.indexedToReadable(res.getName(i)));
}
if (missing) {
if (!hasMissingCount) {
missingCount = SimpleFacets.getFieldMissingCount(searcher,docs,fieldName);
}
res.add(null, missingCount);
}
return res;
}
// in solr/core/src/java/org/apache/solr/request/PerSegmentSingleValuedFaceting.java
void countTerms() throws IOException {
si = FieldCache.DEFAULT.getTermsIndex(context.reader(), fieldName);
// SolrCore.log.info("reader= " + reader + " FC=" + System.identityHashCode(si));
if (prefix!=null) {
BytesRef prefixRef = new BytesRef(prefix);
startTermIndex = si.binarySearchLookup(prefixRef, tempBR);
if (startTermIndex<0) startTermIndex=-startTermIndex-1;
prefixRef.append(UnicodeUtil.BIG_TERM);
// TODO: we could constrain the lower endpoint if we had a binarySearch method that allowed passing start/end
endTermIndex = si.binarySearchLookup(prefixRef, tempBR);
assert endTermIndex < 0;
endTermIndex = -endTermIndex-1;
} else {
startTermIndex=0;
endTermIndex=si.numOrd();
}
final int nTerms=endTermIndex-startTermIndex;
if (nTerms>0) {
// count collection array only needs to be as big as the number of terms we are
// going to collect counts for.
final int[] counts = this.counts = new int[nTerms];
DocIdSet idSet = baseSet.getDocIdSet(context, null); // this set only includes live docs
DocIdSetIterator iter = idSet.iterator();
////
PackedInts.Reader ordReader = si.getDocToOrd();
int doc;
final Object arr;
if (ordReader.hasArray()) {
arr = ordReader.getArray();
} else {
arr = null;
}
if (arr instanceof int[]) {
int[] ords = (int[]) arr;
if (prefix==null) {
while ((doc = iter.nextDoc()) < DocIdSetIterator.NO_MORE_DOCS) {
counts[ords[doc]]++;
}
} else {
while ((doc = iter.nextDoc()) < DocIdSetIterator.NO_MORE_DOCS) {
int term = ords[doc];
int arrIdx = term-startTermIndex;
if (arrIdx>=0 && arrIdx<nTerms) counts[arrIdx]++;
}
}
} else if (arr instanceof short[]) {
short[] ords = (short[]) arr;
if (prefix==null) {
while ((doc = iter.nextDoc()) < DocIdSetIterator.NO_MORE_DOCS) {
counts[ords[doc] & 0xffff]++;
}
} else {
while ((doc = iter.nextDoc()) < DocIdSetIterator.NO_MORE_DOCS) {
int term = ords[doc] & 0xffff;
int arrIdx = term-startTermIndex;
if (arrIdx>=0 && arrIdx<nTerms) counts[arrIdx]++;
}
}
} else if (arr instanceof byte[]) {
byte[] ords = (byte[]) arr;
if (prefix==null) {
while ((doc = iter.nextDoc()) < DocIdSetIterator.NO_MORE_DOCS) {
counts[ords[doc] & 0xff]++;
}
} else {
while ((doc = iter.nextDoc()) < DocIdSetIterator.NO_MORE_DOCS) {
int term = ords[doc] & 0xff;
int arrIdx = term-startTermIndex;
if (arrIdx>=0 && arrIdx<nTerms) counts[arrIdx]++;
}
}
} else {
if (prefix==null) {
// specialized version when collecting counts for all terms
while ((doc = iter.nextDoc()) < DocIdSetIterator.NO_MORE_DOCS) {
counts[si.getOrd(doc)]++;
}
} else {
// version that adjusts term numbers because we aren't collecting the full range
while ((doc = iter.nextDoc()) < DocIdSetIterator.NO_MORE_DOCS) {
int term = si.getOrd(doc);
int arrIdx = term-startTermIndex;
if (arrIdx>=0 && arrIdx<nTerms) counts[arrIdx]++;
}
}
}
}
}
// in solr/core/src/java/org/apache/solr/request/SimpleFacets.java
void parseParams(String type, String param) throws ParseException, IOException {
localParams = QueryParsing.getLocalParams(param, req.getParams());
base = docs;
facetValue = param;
key = param;
threads = -1;
if (localParams == null) return;
// remove local params unless it's a query
if (type != FacetParams.FACET_QUERY) { // TODO Cut over to an Enum here
facetValue = localParams.get(CommonParams.VALUE);
}
// reset set the default key now that localParams have been removed
key = facetValue;
// allow explicit set of the key
key = localParams.get(CommonParams.OUTPUT_KEY, key);
String threadStr = localParams.get(CommonParams.THREADS);
if (threadStr != null) {
threads = Integer.parseInt(threadStr);
}
// figure out if we need a new base DocSet
String excludeStr = localParams.get(CommonParams.EXCLUDE);
if (excludeStr == null) return;
Map<?,?> tagMap = (Map<?,?>)req.getContext().get("tags");
if (tagMap != null && rb != null) {
List<String> excludeTagList = StrUtils.splitSmart(excludeStr,',');
IdentityHashMap<Query,Boolean> excludeSet = new IdentityHashMap<Query,Boolean>();
for (String excludeTag : excludeTagList) {
Object olst = tagMap.get(excludeTag);
// tagMap has entries of List<String,List<QParser>>, but subject to change in the future
if (!(olst instanceof Collection)) continue;
for (Object o : (Collection<?>)olst) {
if (!(o instanceof QParser)) continue;
QParser qp = (QParser)o;
excludeSet.put(qp.getQuery(), Boolean.TRUE);
}
}
if (excludeSet.size() == 0) return;
List<Query> qlist = new ArrayList<Query>();
// add the base query
if (!excludeSet.containsKey(rb.getQuery())) {
qlist.add(rb.getQuery());
}
// add the filters
if (rb.getFilters() != null) {
for (Query q : rb.getFilters()) {
if (!excludeSet.containsKey(q)) {
qlist.add(q);
}
}
}
// get the new base docset for this facet
DocSet base = searcher.getDocSet(qlist);
if (rb.grouping() && rb.getGroupingSpec().isTruncateGroups()) {
Grouping grouping = new Grouping(searcher, null, rb.getQueryCommand(), false, 0, false);
if (rb.getGroupingSpec().getFields().length > 0) {
grouping.addFieldCommand(rb.getGroupingSpec().getFields()[0], req);
} else if (rb.getGroupingSpec().getFunctions().length > 0) {
grouping.addFunctionCommand(rb.getGroupingSpec().getFunctions()[0], req);
} else {
this.base = base;
return;
}
AbstractAllGroupHeadsCollector allGroupHeadsCollector = grouping.getCommands().get(0).createAllGroupCollector();
searcher.search(new MatchAllDocsQuery(), base.getTopFilter(), allGroupHeadsCollector);
int maxDoc = searcher.maxDoc();
FixedBitSet fixedBitSet = allGroupHeadsCollector.retrieveGroupHeads(maxDoc);
long[] bits = fixedBitSet.getBits();
this.base = new BitDocSet(new OpenBitSet(bits, bits.length));
} else {
this.base = base;
}
}
}
// in solr/core/src/java/org/apache/solr/request/SimpleFacets.java
public NamedList<Integer> getFacetQueryCounts() throws IOException,ParseException {
NamedList<Integer> res = new SimpleOrderedMap<Integer>();
/* Ignore CommonParams.DF - could have init param facet.query assuming
* the schema default with query param DF intented to only affect Q.
* If user doesn't want schema default for facet.query, they should be
* explicit.
*/
// SolrQueryParser qp = searcher.getSchema().getSolrQueryParser(null);
String[] facetQs = params.getParams(FacetParams.FACET_QUERY);
if (null != facetQs && 0 != facetQs.length) {
for (String q : facetQs) {
parseParams(FacetParams.FACET_QUERY, q);
// TODO: slight optimization would prevent double-parsing of any localParams
Query qobj = QParser.getParser(q, null, req).getQuery();
res.add(key, searcher.numDocs(qobj, base));
}
}
return res;
}
// in solr/core/src/java/org/apache/solr/request/SimpleFacets.java
public NamedList<Integer> getTermCounts(String field) throws IOException {
int offset = params.getFieldInt(field, FacetParams.FACET_OFFSET, 0);
int limit = params.getFieldInt(field, FacetParams.FACET_LIMIT, 100);
if (limit == 0) return new NamedList<Integer>();
Integer mincount = params.getFieldInt(field, FacetParams.FACET_MINCOUNT);
if (mincount==null) {
Boolean zeros = params.getFieldBool(field, FacetParams.FACET_ZEROS);
// mincount = (zeros!=null && zeros) ? 0 : 1;
mincount = (zeros!=null && !zeros) ? 1 : 0;
// current default is to include zeros.
}
boolean missing = params.getFieldBool(field, FacetParams.FACET_MISSING, false);
// default to sorting if there is a limit.
String sort = params.getFieldParam(field, FacetParams.FACET_SORT, limit>0 ? FacetParams.FACET_SORT_COUNT : FacetParams.FACET_SORT_INDEX);
String prefix = params.getFieldParam(field,FacetParams.FACET_PREFIX);
NamedList<Integer> counts;
SchemaField sf = searcher.getSchema().getField(field);
FieldType ft = sf.getType();
// determine what type of faceting method to use
String method = params.getFieldParam(field, FacetParams.FACET_METHOD);
boolean enumMethod = FacetParams.FACET_METHOD_enum.equals(method);
// TODO: default to per-segment or not?
boolean per_segment = FacetParams.FACET_METHOD_fcs.equals(method);
if (method == null && ft instanceof BoolField) {
// Always use filters for booleans... we know the number of values is very small.
enumMethod = true;
}
boolean multiToken = sf.multiValued() || ft.multiValuedFieldCache();
if (TrieField.getMainValuePrefix(ft) != null) {
// A TrieField with multiple parts indexed per value... currently only
// UnInvertedField can handle this case, so force it's use.
enumMethod = false;
multiToken = true;
}
if (params.getFieldBool(field, GroupParams.GROUP_FACET, false)) {
counts = getGroupedCounts(searcher, base, field, multiToken, offset,limit, mincount, missing, sort, prefix);
} else {
// unless the enum method is explicitly specified, use a counting method.
if (enumMethod) {
counts = getFacetTermEnumCounts(searcher, base, field, offset, limit, mincount,missing,sort,prefix);
} else {
if (multiToken) {
UnInvertedField uif = UnInvertedField.getUnInvertedField(field, searcher);
counts = uif.getCounts(searcher, base, offset, limit, mincount,missing,sort,prefix);
} else {
// TODO: future logic could use filters instead of the fieldcache if
// the number of terms in the field is small enough.
if (per_segment) {
PerSegmentSingleValuedFaceting ps = new PerSegmentSingleValuedFaceting(searcher, base, field, offset,limit, mincount, missing, sort, prefix);
Executor executor = threads == 0 ? directExecutor : facetExecutor;
ps.setNumThreads(threads);
counts = ps.getFacetCounts(executor);
} else {
counts = getFieldCacheCounts(searcher, base, field, offset,limit, mincount, missing, sort, prefix);
}
}
}
}
return counts;
}
// in solr/core/src/java/org/apache/solr/request/SimpleFacets.java
public NamedList<Integer> getGroupedCounts(SolrIndexSearcher searcher,
DocSet base,
String field,
boolean multiToken,
int offset,
int limit,
int mincount,
boolean missing,
String sort,
String prefix) throws IOException {
GroupingSpecification groupingSpecification = rb.getGroupingSpec();
String groupField = groupingSpecification != null ? groupingSpecification.getFields()[0] : null;
if (groupField == null) {
throw new SolrException (
SolrException.ErrorCode.BAD_REQUEST,
"Specify the group.field as parameter or local parameter"
);
}
BytesRef prefixBR = prefix != null ? new BytesRef(prefix) : null;
TermGroupFacetCollector collector = TermGroupFacetCollector.createTermGroupFacetCollector(groupField, field, multiToken, prefixBR, 128);
searcher.search(new MatchAllDocsQuery(), base.getTopFilter(), collector);
boolean orderByCount = sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY);
TermGroupFacetCollector.GroupedFacetResult result = collector.mergeSegmentResults(offset + limit, mincount, orderByCount);
CharsRef charsRef = new CharsRef();
FieldType facetFieldType = searcher.getSchema().getFieldType(field);
NamedList<Integer> facetCounts = new NamedList<Integer>();
List<TermGroupFacetCollector.FacetEntry> scopedEntries = result.getFacetEntries(offset, limit);
for (TermGroupFacetCollector.FacetEntry facetEntry : scopedEntries) {
facetFieldType.indexedToReadable(facetEntry.getValue(), charsRef);
facetCounts.add(charsRef.toString(), facetEntry.getCount());
}
if (missing) {
facetCounts.add(null, result.getTotalMissingCount());
}
return facetCounts;
}
// in solr/core/src/java/org/apache/solr/request/SimpleFacets.java
public NamedList<Object> getFacetFieldCounts()
throws IOException, ParseException {
NamedList<Object> res = new SimpleOrderedMap<Object>();
String[] facetFs = params.getParams(FacetParams.FACET_FIELD);
if (null != facetFs) {
for (String f : facetFs) {
parseParams(FacetParams.FACET_FIELD, f);
String termList = localParams == null ? null : localParams.get(CommonParams.TERMS);
if (termList != null) {
res.add(key, getListedTermCounts(facetValue, termList));
} else {
res.add(key, getTermCounts(facetValue));
}
}
}
return res;
}
// in solr/core/src/java/org/apache/solr/request/SimpleFacets.java
private NamedList<Integer> getListedTermCounts(String field, String termList) throws IOException {
FieldType ft = searcher.getSchema().getFieldType(field);
List<String> terms = StrUtils.splitSmart(termList, ",", true);
NamedList<Integer> res = new NamedList<Integer>();
for (String term : terms) {
String internal = ft.toInternal(term);
int count = searcher.numDocs(new TermQuery(new Term(field, internal)), base);
res.add(term, count);
}
return res;
}
// in solr/core/src/java/org/apache/solr/request/SimpleFacets.java
public static int getFieldMissingCount(SolrIndexSearcher searcher, DocSet docs, String fieldName)
throws IOException {
DocSet hasVal = searcher.getDocSet
(new TermRangeQuery(fieldName, null, null, false, false));
return docs.andNotSize(hasVal);
}
// in solr/core/src/java/org/apache/solr/request/SimpleFacets.java
public static NamedList<Integer> getFieldCacheCounts(SolrIndexSearcher searcher, DocSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort, String prefix) throws IOException {
// TODO: If the number of terms is high compared to docs.size(), and zeros==false,
// we should use an alternate strategy to avoid
// 1) creating another huge int[] for the counts
// 2) looping over that huge int[] looking for the rare non-zeros.
//
// Yet another variation: if docs.size() is small and termvectors are stored,
// then use them instead of the FieldCache.
//
// TODO: this function is too big and could use some refactoring, but
// we also need a facet cache, and refactoring of SimpleFacets instead of
// trying to pass all the various params around.
FieldType ft = searcher.getSchema().getFieldType(fieldName);
NamedList<Integer> res = new NamedList<Integer>();
FieldCache.DocTermsIndex si = FieldCache.DEFAULT.getTermsIndex(searcher.getAtomicReader(), fieldName);
final BytesRef prefixRef;
if (prefix == null) {
prefixRef = null;
} else if (prefix.length()==0) {
prefix = null;
prefixRef = null;
} else {
prefixRef = new BytesRef(prefix);
}
final BytesRef br = new BytesRef();
int startTermIndex, endTermIndex;
if (prefix!=null) {
startTermIndex = si.binarySearchLookup(prefixRef, br);
if (startTermIndex<0) startTermIndex=-startTermIndex-1;
prefixRef.append(UnicodeUtil.BIG_TERM);
endTermIndex = si.binarySearchLookup(prefixRef, br);
assert endTermIndex < 0;
endTermIndex = -endTermIndex-1;
} else {
startTermIndex=0;
endTermIndex=si.numOrd();
}
final int nTerms=endTermIndex-startTermIndex;
int missingCount = -1;
final CharsRef charsRef = new CharsRef(10);
if (nTerms>0 && docs.size() >= mincount) {
// count collection array only needs to be as big as the number of terms we are
// going to collect counts for.
final int[] counts = new int[nTerms];
DocIterator iter = docs.iterator();
PackedInts.Reader ordReader = si.getDocToOrd();
final Object arr;
if (ordReader.hasArray()) {
arr = ordReader.getArray();
} else {
arr = null;
}
if (arr instanceof int[]) {
int[] ords = (int[]) arr;
if (prefix==null) {
while (iter.hasNext()) {
counts[ords[iter.nextDoc()]]++;
}
} else {
while (iter.hasNext()) {
int term = ords[iter.nextDoc()];
int arrIdx = term-startTermIndex;
if (arrIdx>=0 && arrIdx<nTerms) counts[arrIdx]++;
}
}
} else if (arr instanceof short[]) {
short[] ords = (short[]) arr;
if (prefix==null) {
while (iter.hasNext()) {
counts[ords[iter.nextDoc()] & 0xffff]++;
}
} else {
while (iter.hasNext()) {
int term = ords[iter.nextDoc()] & 0xffff;
int arrIdx = term-startTermIndex;
if (arrIdx>=0 && arrIdx<nTerms) counts[arrIdx]++;
}
}
} else if (arr instanceof byte[]) {
byte[] ords = (byte[]) arr;
if (prefix==null) {
while (iter.hasNext()) {
counts[ords[iter.nextDoc()] & 0xff]++;
}
} else {
while (iter.hasNext()) {
int term = ords[iter.nextDoc()] & 0xff;
int arrIdx = term-startTermIndex;
if (arrIdx>=0 && arrIdx<nTerms) counts[arrIdx]++;
}
}
} else {
while (iter.hasNext()) {
int term = si.getOrd(iter.nextDoc());
int arrIdx = term-startTermIndex;
if (arrIdx>=0 && arrIdx<nTerms) counts[arrIdx]++;
}
}
if (startTermIndex == 0) {
missingCount = counts[0];
}
// IDEA: we could also maintain a count of "other"... everything that fell outside
// of the top 'N'
int off=offset;
int lim=limit>=0 ? limit : Integer.MAX_VALUE;
if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
int maxsize = limit>0 ? offset+limit : Integer.MAX_VALUE-1;
maxsize = Math.min(maxsize, nTerms);
LongPriorityQueue queue = new LongPriorityQueue(Math.min(maxsize,1000), maxsize, Long.MIN_VALUE);
int min=mincount-1; // the smallest value in the top 'N' values
for (int i=(startTermIndex==0)?1:0; i<nTerms; i++) {
int c = counts[i];
if (c>min) {
// NOTE: we use c>min rather than c>=min as an optimization because we are going in
// index order, so we already know that the keys are ordered. This can be very
// important if a lot of the counts are repeated (like zero counts would be).
// smaller term numbers sort higher, so subtract the term number instead
long pair = (((long)c)<<32) + (Integer.MAX_VALUE - i);
boolean displaced = queue.insert(pair);
if (displaced) min=(int)(queue.top() >>> 32);
}
}
// if we are deep paging, we don't have to order the highest "offset" counts.
int collectCount = Math.max(0, queue.size() - off);
assert collectCount <= lim;
// the start and end indexes of our list "sorted" (starting with the highest value)
int sortedIdxStart = queue.size() - (collectCount - 1);
int sortedIdxEnd = queue.size() + 1;
final long[] sorted = queue.sort(collectCount);
for (int i=sortedIdxStart; i<sortedIdxEnd; i++) {
long pair = sorted[i];
int c = (int)(pair >>> 32);
int tnum = Integer.MAX_VALUE - (int)pair;
ft.indexedToReadable(si.lookup(startTermIndex+tnum, br), charsRef);
res.add(charsRef.toString(), c);
}
} else {
// add results in index order
int i=(startTermIndex==0)?1:0;
if (mincount<=0) {
// if mincount<=0, then we won't discard any terms and we know exactly
// where to start.
i+=off;
off=0;
}
for (; i<nTerms; i++) {
int c = counts[i];
if (c<mincount || --off>=0) continue;
if (--lim<0) break;
ft.indexedToReadable(si.lookup(startTermIndex+i, br), charsRef);
res.add(charsRef.toString(), c);
}
}
}
if (missing) {
if (missingCount < 0) {
missingCount = getFieldMissingCount(searcher,docs,fieldName);
}
res.add(null, missingCount);
}
return res;
}
// in solr/core/src/java/org/apache/solr/request/SimpleFacets.java
public NamedList<Integer> getFacetTermEnumCounts(SolrIndexSearcher searcher, DocSet docs, String field, int offset, int limit, int mincount, boolean missing, String sort, String prefix)
throws IOException {
/* :TODO: potential optimization...
* cache the Terms with the highest docFreq and try them first
* don't enum if we get our max from them
*/
// Minimum term docFreq in order to use the filterCache for that term.
int minDfFilterCache = params.getFieldInt(field, FacetParams.FACET_ENUM_CACHE_MINDF, 0);
// make sure we have a set that is fast for random access, if we will use it for that
DocSet fastForRandomSet = docs;
if (minDfFilterCache>0 && docs instanceof SortedIntDocSet) {
SortedIntDocSet sset = (SortedIntDocSet)docs;
fastForRandomSet = new HashDocSet(sset.getDocs(), 0, sset.size());
}
IndexSchema schema = searcher.getSchema();
AtomicReader r = searcher.getAtomicReader();
FieldType ft = schema.getFieldType(field);
boolean sortByCount = sort.equals("count") || sort.equals("true");
final int maxsize = limit>=0 ? offset+limit : Integer.MAX_VALUE-1;
final BoundedTreeSet<CountPair<BytesRef,Integer>> queue = sortByCount ? new BoundedTreeSet<CountPair<BytesRef,Integer>>(maxsize) : null;
final NamedList<Integer> res = new NamedList<Integer>();
int min=mincount-1; // the smallest value in the top 'N' values
int off=offset;
int lim=limit>=0 ? limit : Integer.MAX_VALUE;
BytesRef startTermBytes = null;
if (prefix != null) {
String indexedPrefix = ft.toInternal(prefix);
startTermBytes = new BytesRef(indexedPrefix);
}
Fields fields = r.fields();
Terms terms = fields==null ? null : fields.terms(field);
TermsEnum termsEnum = null;
SolrIndexSearcher.DocsEnumState deState = null;
BytesRef term = null;
if (terms != null) {
termsEnum = terms.iterator(null);
// TODO: OPT: if seek(ord) is supported for this termsEnum, then we could use it for
// facet.offset when sorting by index order.
if (startTermBytes != null) {
if (termsEnum.seekCeil(startTermBytes, true) == TermsEnum.SeekStatus.END) {
termsEnum = null;
} else {
term = termsEnum.term();
}
} else {
// position termsEnum on first term
term = termsEnum.next();
}
}
DocsEnum docsEnum = null;
CharsRef charsRef = new CharsRef(10);
if (docs.size() >= mincount) {
while (term != null) {
if (startTermBytes != null && !StringHelper.startsWith(term, startTermBytes))
break;
int df = termsEnum.docFreq();
// If we are sorting, we can use df>min (rather than >=) since we
// are going in index order. For certain term distributions this can
// make a large difference (for example, many terms with df=1).
if (df>0 && df>min) {
int c;
if (df >= minDfFilterCache) {
// use the filter cache
if (deState==null) {
deState = new SolrIndexSearcher.DocsEnumState();
deState.fieldName = field;
deState.liveDocs = r.getLiveDocs();
deState.termsEnum = termsEnum;
deState.docsEnum = docsEnum;
}
c = searcher.numDocs(docs, deState);
docsEnum = deState.docsEnum;
} else {
// iterate over TermDocs to calculate the intersection
// TODO: specialize when base docset is a bitset or hash set (skipDocs)? or does it matter for this?
// TODO: do this per-segment for better efficiency (MultiDocsEnum just uses base class impl)
// TODO: would passing deleted docs lead to better efficiency over checking the fastForRandomSet?
docsEnum = termsEnum.docs(null, docsEnum, false);
c=0;
if (docsEnum instanceof MultiDocsEnum) {
MultiDocsEnum.EnumWithSlice[] subs = ((MultiDocsEnum)docsEnum).getSubs();
int numSubs = ((MultiDocsEnum)docsEnum).getNumSubs();
for (int subindex = 0; subindex<numSubs; subindex++) {
MultiDocsEnum.EnumWithSlice sub = subs[subindex];
if (sub.docsEnum == null) continue;
int base = sub.slice.start;
int docid;
while ((docid = sub.docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (fastForRandomSet.exists(docid+base)) c++;
}
}
} else {
int docid;
while ((docid = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (fastForRandomSet.exists(docid)) c++;
}
}
}
if (sortByCount) {
if (c>min) {
BytesRef termCopy = BytesRef.deepCopyOf(term);
queue.add(new CountPair<BytesRef,Integer>(termCopy, c));
if (queue.size()>=maxsize) min=queue.last().val;
}
} else {
if (c >= mincount && --off<0) {
if (--lim<0) break;
ft.indexedToReadable(term, charsRef);
res.add(charsRef.toString(), c);
}
}
}
term = termsEnum.next();
}
}
if (sortByCount) {
for (CountPair<BytesRef,Integer> p : queue) {
if (--off>=0) continue;
if (--lim<0) break;
ft.indexedToReadable(p.key, charsRef);
res.add(charsRef.toString(), p.val);
}
}
if (missing) {
res.add(null, getFieldMissingCount(searcher,docs,field));
}
return res;
}
// in solr/core/src/java/org/apache/solr/request/SimpleFacets.java
Deprecated
public NamedList<Object> getFacetDateCounts()
throws IOException, ParseException {
final NamedList<Object> resOuter = new SimpleOrderedMap<Object>();
final String[] fields = params.getParams(FacetParams.FACET_DATE);
if (null == fields || 0 == fields.length) return resOuter;
for (String f : fields) {
getFacetDateCounts(f, resOuter);
}
return resOuter;
}
// in solr/core/src/java/org/apache/solr/request/SimpleFacets.java
Deprecated
public void getFacetDateCounts(String dateFacet, NamedList<Object> resOuter)
throws IOException, ParseException {
final IndexSchema schema = searcher.getSchema();
parseParams(FacetParams.FACET_DATE, dateFacet);
String f = facetValue;
final NamedList<Object> resInner = new SimpleOrderedMap<Object>();
resOuter.add(key, resInner);
final SchemaField sf = schema.getField(f);
if (! (sf.getType() instanceof DateField)) {
throw new SolrException
(SolrException.ErrorCode.BAD_REQUEST,
"Can not date facet on a field which is not a DateField: " + f);
}
final DateField ft = (DateField) sf.getType();
final String startS
= required.getFieldParam(f,FacetParams.FACET_DATE_START);
final Date start;
try {
start = ft.parseMath(null, startS);
} catch (SolrException e) {
throw new SolrException
(SolrException.ErrorCode.BAD_REQUEST,
"date facet 'start' is not a valid Date string: " + startS, e);
}
final String endS
= required.getFieldParam(f,FacetParams.FACET_DATE_END);
Date end; // not final, hardend may change this
try {
end = ft.parseMath(null, endS);
} catch (SolrException e) {
throw new SolrException
(SolrException.ErrorCode.BAD_REQUEST,
"date facet 'end' is not a valid Date string: " + endS, e);
}
if (end.before(start)) {
throw new SolrException
(SolrException.ErrorCode.BAD_REQUEST,
"date facet 'end' comes before 'start': "+endS+" < "+startS);
}
final String gap = required.getFieldParam(f,FacetParams.FACET_DATE_GAP);
final DateMathParser dmp = new DateMathParser();
final int minCount = params.getFieldInt(f,FacetParams.FACET_MINCOUNT, 0);
String[] iStrs = params.getFieldParams(f,FacetParams.FACET_DATE_INCLUDE);
// Legacy support for default of [lower,upper,edge] for date faceting
// this is not handled by FacetRangeInclude.parseParam because
// range faceting has differnet defaults
final EnumSet<FacetRangeInclude> include =
(null == iStrs || 0 == iStrs.length ) ?
EnumSet.of(FacetRangeInclude.LOWER,
FacetRangeInclude.UPPER,
FacetRangeInclude.EDGE)
: FacetRangeInclude.parseParam(iStrs);
try {
Date low = start;
while (low.before(end)) {
dmp.setNow(low);
String label = ft.toExternal(low);
Date high = dmp.parseMath(gap);
if (end.before(high)) {
if (params.getFieldBool(f,FacetParams.FACET_DATE_HARD_END,false)) {
high = end;
} else {
end = high;
}
}
if (high.before(low)) {
throw new SolrException
(SolrException.ErrorCode.BAD_REQUEST,
"date facet infinite loop (is gap negative?)");
}
final boolean includeLower =
(include.contains(FacetRangeInclude.LOWER) ||
(include.contains(FacetRangeInclude.EDGE) && low.equals(start)));
final boolean includeUpper =
(include.contains(FacetRangeInclude.UPPER) ||
(include.contains(FacetRangeInclude.EDGE) && high.equals(end)));
final int count = rangeCount(sf,low,high,includeLower,includeUpper);
if (count >= minCount) {
resInner.add(label, count);
}
low = high;
}
} catch (java.text.ParseException e) {
throw new SolrException
(SolrException.ErrorCode.BAD_REQUEST,
"date facet 'gap' is not a valid Date Math string: " + gap, e);
}
// explicitly return the gap and end so all the counts
// (including before/after/between) are meaningful - even if mincount
// has removed the neighboring ranges
resInner.add("gap", gap);
resInner.add("start", start);
resInner.add("end", end);
final String[] othersP =
params.getFieldParams(f,FacetParams.FACET_DATE_OTHER);
if (null != othersP && 0 < othersP.length ) {
final Set<FacetRangeOther> others = EnumSet.noneOf(FacetRangeOther.class);
for (final String o : othersP) {
others.add(FacetRangeOther.get(o));
}
// no matter what other values are listed, we don't do
// anything if "none" is specified.
if (! others.contains(FacetRangeOther.NONE) ) {
boolean all = others.contains(FacetRangeOther.ALL);
if (all || others.contains(FacetRangeOther.BEFORE)) {
// include upper bound if "outer" or if first gap doesn't already include it
resInner.add(FacetRangeOther.BEFORE.toString(),
rangeCount(sf,null,start,
false,
(include.contains(FacetRangeInclude.OUTER) ||
(! (include.contains(FacetRangeInclude.LOWER) ||
include.contains(FacetRangeInclude.EDGE))))));
}
if (all || others.contains(FacetRangeOther.AFTER)) {
// include lower bound if "outer" or if last gap doesn't already include it
resInner.add(FacetRangeOther.AFTER.toString(),
rangeCount(sf,end,null,
(include.contains(FacetRangeInclude.OUTER) ||
(! (include.contains(FacetRangeInclude.UPPER) ||
include.contains(FacetRangeInclude.EDGE)))),
false));
}
if (all || others.contains(FacetRangeOther.BETWEEN)) {
resInner.add(FacetRangeOther.BETWEEN.toString(),
rangeCount(sf,start,end,
(include.contains(FacetRangeInclude.LOWER) ||
include.contains(FacetRangeInclude.EDGE)),
(include.contains(FacetRangeInclude.UPPER) ||
include.contains(FacetRangeInclude.EDGE))));
}
}
}
}
// in solr/core/src/java/org/apache/solr/request/SimpleFacets.java
public NamedList<Object> getFacetRangeCounts() throws IOException, ParseException {
final NamedList<Object> resOuter = new SimpleOrderedMap<Object>();
final String[] fields = params.getParams(FacetParams.FACET_RANGE);
if (null == fields || 0 == fields.length) return resOuter;
for (String f : fields) {
getFacetRangeCounts(f, resOuter);
}
return resOuter;
}
// in solr/core/src/java/org/apache/solr/request/SimpleFacets.java
void getFacetRangeCounts(String facetRange, NamedList<Object> resOuter)
throws IOException, ParseException {
final IndexSchema schema = searcher.getSchema();
parseParams(FacetParams.FACET_RANGE, facetRange);
String f = facetValue;
final SchemaField sf = schema.getField(f);
final FieldType ft = sf.getType();
RangeEndpointCalculator<?> calc = null;
if (ft instanceof TrieField) {
final TrieField trie = (TrieField)ft;
switch (trie.getType()) {
case FLOAT:
calc = new FloatRangeEndpointCalculator(sf);
break;
case DOUBLE:
calc = new DoubleRangeEndpointCalculator(sf);
break;
case INTEGER:
calc = new IntegerRangeEndpointCalculator(sf);
break;
case LONG:
calc = new LongRangeEndpointCalculator(sf);
break;
default:
throw new SolrException
(SolrException.ErrorCode.BAD_REQUEST,
"Unable to range facet on tried field of unexpected type:" + f);
}
} else if (ft instanceof DateField) {
calc = new DateRangeEndpointCalculator(sf, null);
} else if (ft instanceof SortableIntField) {
calc = new IntegerRangeEndpointCalculator(sf);
} else if (ft instanceof SortableLongField) {
calc = new LongRangeEndpointCalculator(sf);
} else if (ft instanceof SortableFloatField) {
calc = new FloatRangeEndpointCalculator(sf);
} else if (ft instanceof SortableDoubleField) {
calc = new DoubleRangeEndpointCalculator(sf);
} else {
throw new SolrException
(SolrException.ErrorCode.BAD_REQUEST,
"Unable to range facet on field:" + sf);
}
resOuter.add(key, getFacetRangeCounts(sf, calc));
}
// in solr/core/src/java/org/apache/solr/request/SimpleFacets.java
private <T extends Comparable<T>> NamedList getFacetRangeCounts
(final SchemaField sf,
final RangeEndpointCalculator<T> calc) throws IOException {
final String f = sf.getName();
final NamedList<Object> res = new SimpleOrderedMap<Object>();
final NamedList<Integer> counts = new NamedList<Integer>();
res.add("counts", counts);
final T start = calc.getValue(required.getFieldParam(f,FacetParams.FACET_RANGE_START));
// not final, hardend may change this
T end = calc.getValue(required.getFieldParam(f,FacetParams.FACET_RANGE_END));
if (end.compareTo(start) < 0) {
throw new SolrException
(SolrException.ErrorCode.BAD_REQUEST,
"range facet 'end' comes before 'start': "+end+" < "+start);
}
final String gap = required.getFieldParam(f, FacetParams.FACET_RANGE_GAP);
// explicitly return the gap. compute this early so we are more
// likely to catch parse errors before attempting math
res.add("gap", calc.getGap(gap));
final int minCount = params.getFieldInt(f,FacetParams.FACET_MINCOUNT, 0);
final EnumSet<FacetRangeInclude> include = FacetRangeInclude.parseParam
(params.getFieldParams(f,FacetParams.FACET_RANGE_INCLUDE));
T low = start;
while (low.compareTo(end) < 0) {
T high = calc.addGap(low, gap);
if (end.compareTo(high) < 0) {
if (params.getFieldBool(f,FacetParams.FACET_RANGE_HARD_END,false)) {
high = end;
} else {
end = high;
}
}
if (high.compareTo(low) < 0) {
throw new SolrException
(SolrException.ErrorCode.BAD_REQUEST,
"range facet infinite loop (is gap negative? did the math overflow?)");
}
final boolean includeLower =
(include.contains(FacetRangeInclude.LOWER) ||
(include.contains(FacetRangeInclude.EDGE) &&
0 == low.compareTo(start)));
final boolean includeUpper =
(include.contains(FacetRangeInclude.UPPER) ||
(include.contains(FacetRangeInclude.EDGE) &&
0 == high.compareTo(end)));
final String lowS = calc.formatValue(low);
final String highS = calc.formatValue(high);
final int count = rangeCount(sf, lowS, highS,
includeLower,includeUpper);
if (count >= minCount) {
counts.add(lowS, count);
}
low = high;
}
// explicitly return the start and end so all the counts
// (including before/after/between) are meaningful - even if mincount
// has removed the neighboring ranges
res.add("start", start);
res.add("end", end);
final String[] othersP =
params.getFieldParams(f,FacetParams.FACET_RANGE_OTHER);
if (null != othersP && 0 < othersP.length ) {
Set<FacetRangeOther> others = EnumSet.noneOf(FacetRangeOther.class);
for (final String o : othersP) {
others.add(FacetRangeOther.get(o));
}
// no matter what other values are listed, we don't do
// anything if "none" is specified.
if (! others.contains(FacetRangeOther.NONE) ) {
boolean all = others.contains(FacetRangeOther.ALL);
final String startS = calc.formatValue(start);
final String endS = calc.formatValue(end);
if (all || others.contains(FacetRangeOther.BEFORE)) {
// include upper bound if "outer" or if first gap doesn't already include it
res.add(FacetRangeOther.BEFORE.toString(),
rangeCount(sf,null,startS,
false,
(include.contains(FacetRangeInclude.OUTER) ||
(! (include.contains(FacetRangeInclude.LOWER) ||
include.contains(FacetRangeInclude.EDGE))))));
}
if (all || others.contains(FacetRangeOther.AFTER)) {
// include lower bound if "outer" or if last gap doesn't already include it
res.add(FacetRangeOther.AFTER.toString(),
rangeCount(sf,endS,null,
(include.contains(FacetRangeInclude.OUTER) ||
(! (include.contains(FacetRangeInclude.UPPER) ||
include.contains(FacetRangeInclude.EDGE)))),
false));
}
if (all || others.contains(FacetRangeOther.BETWEEN)) {
res.add(FacetRangeOther.BETWEEN.toString(),
rangeCount(sf,startS,endS,
(include.contains(FacetRangeInclude.LOWER) ||
include.contains(FacetRangeInclude.EDGE)),
(include.contains(FacetRangeInclude.UPPER) ||
include.contains(FacetRangeInclude.EDGE))));
}
}
}
return res;
}
// in solr/core/src/java/org/apache/solr/request/SimpleFacets.java
protected int rangeCount(SchemaField sf, String low, String high,
boolean iLow, boolean iHigh) throws IOException {
Query rangeQ = sf.getType().getRangeQuery(null, sf,low,high,iLow,iHigh);
return searcher.numDocs(rangeQ ,base);
}
// in solr/core/src/java/org/apache/solr/request/SimpleFacets.java
Deprecated
protected int rangeCount(SchemaField sf, Date low, Date high,
boolean iLow, boolean iHigh) throws IOException {
Query rangeQ = ((DateField)(sf.getType())).getRangeQuery(null, sf,low,high,iLow,iHigh);
return searcher.numDocs(rangeQ ,base);
}
// in solr/core/src/java/org/apache/solr/request/UnInvertedField.java
Override
protected void visitTerm(TermsEnum te, int termNum) throws IOException {
if (termNum >= maxTermCounts.length) {
// resize by doubling - for very large number of unique terms, expanding
// by 4K and resultant GC will dominate uninvert times. Resize at end if material
int[] newMaxTermCounts = new int[maxTermCounts.length*2];
System.arraycopy(maxTermCounts, 0, newMaxTermCounts, 0, termNum);
maxTermCounts = newMaxTermCounts;
}
final BytesRef term = te.term();
if (te.docFreq() > maxTermDocFreq) {
TopTerm topTerm = new TopTerm();
topTerm.term = BytesRef.deepCopyOf(term);
topTerm.termNum = termNum;
bigTerms.put(topTerm.termNum, topTerm);
if (deState == null) {
deState = new SolrIndexSearcher.DocsEnumState();
deState.fieldName = field;
// deState.termsEnum = te.tenum;
deState.termsEnum = te; // TODO: check for MultiTermsEnum in SolrIndexSearcher could now fail?
deState.docsEnum = docsEnum;
deState.minSetSizeCached = maxTermDocFreq;
}
docsEnum = deState.docsEnum;
DocSet set = searcher.getDocSet(deState);
maxTermCounts[termNum] = set.size();
}
}
// in solr/core/src/java/org/apache/solr/request/UnInvertedField.java
public NamedList<Integer> getCounts(SolrIndexSearcher searcher, DocSet baseDocs, int offset, int limit, Integer mincount, boolean missing, String sort, String prefix) throws IOException {
use.incrementAndGet();
FieldType ft = searcher.getSchema().getFieldType(field);
NamedList<Integer> res = new NamedList<Integer>(); // order is important
DocSet docs = baseDocs;
int baseSize = docs.size();
int maxDoc = searcher.maxDoc();
//System.out.println("GET COUNTS field=" + field + " baseSize=" + baseSize + " minCount=" + mincount + " maxDoc=" + maxDoc + " numTermsInField=" + numTermsInField);
if (baseSize >= mincount) {
final int[] index = this.index;
// tricky: we add more more element than we need because we will reuse this array later
// for ordering term ords before converting to term labels.
final int[] counts = new int[numTermsInField + 1];
//
// If there is prefix, find it's start and end term numbers
//
int startTerm = 0;
int endTerm = numTermsInField; // one past the end
TermsEnum te = getOrdTermsEnum(searcher.getAtomicReader());
if (te != null && prefix != null && prefix.length() > 0) {
final BytesRef prefixBr = new BytesRef(prefix);
if (te.seekCeil(prefixBr, true) == TermsEnum.SeekStatus.END) {
startTerm = numTermsInField;
} else {
startTerm = (int) te.ord();
}
prefixBr.append(UnicodeUtil.BIG_TERM);
if (te.seekCeil(prefixBr, true) == TermsEnum.SeekStatus.END) {
endTerm = numTermsInField;
} else {
endTerm = (int) te.ord();
}
}
/***********
// Alternative 2: get the docSet of the prefix (could take a while) and
// then do the intersection with the baseDocSet first.
if (prefix != null && prefix.length() > 0) {
docs = searcher.getDocSet(new ConstantScorePrefixQuery(new Term(field, ft.toInternal(prefix))), docs);
// The issue with this method are problems of returning 0 counts for terms w/o
// the prefix. We can't just filter out those terms later because it may
// mean that we didn't collect enough terms in the queue (in the sorted case).
}
***********/
boolean doNegative = baseSize > maxDoc >> 1 && termInstances > 0
&& startTerm==0 && endTerm==numTermsInField
&& docs instanceof BitDocSet;
if (doNegative) {
OpenBitSet bs = (OpenBitSet)((BitDocSet)docs).getBits().clone();
bs.flip(0, maxDoc);
// TODO: when iterator across negative elements is available, use that
// instead of creating a new bitset and inverting.
docs = new BitDocSet(bs, maxDoc - baseSize);
// simply negating will mean that we have deleted docs in the set.
// that should be OK, as their entries in our table should be empty.
//System.out.println(" NEG");
}
// For the biggest terms, do straight set intersections
for (TopTerm tt : bigTerms.values()) {
//System.out.println(" do big termNum=" + tt.termNum + " term=" + tt.term.utf8ToString());
// TODO: counts could be deferred if sorted==false
if (tt.termNum >= startTerm && tt.termNum < endTerm) {
counts[tt.termNum] = searcher.numDocs(new TermQuery(new Term(field, tt.term)), docs);
//System.out.println(" count=" + counts[tt.termNum]);
} else {
//System.out.println("SKIP term=" + tt.termNum);
}
}
// TODO: we could short-circuit counting altogether for sorted faceting
// where we already have enough terms from the bigTerms
// TODO: we could shrink the size of the collection array, and
// additionally break when the termNumber got above endTerm, but
// it would require two extra conditionals in the inner loop (although
// they would be predictable for the non-prefix case).
// Perhaps a different copy of the code would be warranted.
if (termInstances > 0) {
DocIterator iter = docs.iterator();
while (iter.hasNext()) {
int doc = iter.nextDoc();
//System.out.println("iter doc=" + doc);
int code = index[doc];
if ((code & 0xff)==1) {
//System.out.println(" ptr");
int pos = code>>>8;
int whichArray = (doc >>> 16) & 0xff;
byte[] arr = tnums[whichArray];
int tnum = 0;
for(;;) {
int delta = 0;
for(;;) {
byte b = arr[pos++];
delta = (delta << 7) | (b & 0x7f);
if ((b & 0x80) == 0) break;
}
if (delta == 0) break;
tnum += delta - TNUM_OFFSET;
//System.out.println(" tnum=" + tnum);
counts[tnum]++;
}
} else {
//System.out.println(" inlined");
int tnum = 0;
int delta = 0;
for (;;) {
delta = (delta << 7) | (code & 0x7f);
if ((code & 0x80)==0) {
if (delta==0) break;
tnum += delta - TNUM_OFFSET;
//System.out.println(" tnum=" + tnum);
counts[tnum]++;
delta = 0;
}
code >>>= 8;
}
}
}
}
final CharsRef charsRef = new CharsRef();
int off=offset;
int lim=limit>=0 ? limit : Integer.MAX_VALUE;
if (sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
int maxsize = limit>0 ? offset+limit : Integer.MAX_VALUE-1;
maxsize = Math.min(maxsize, numTermsInField);
LongPriorityQueue queue = new LongPriorityQueue(Math.min(maxsize,1000), maxsize, Long.MIN_VALUE);
int min=mincount-1; // the smallest value in the top 'N' values
//System.out.println("START=" + startTerm + " END=" + endTerm);
for (int i=startTerm; i<endTerm; i++) {
int c = doNegative ? maxTermCounts[i] - counts[i] : counts[i];
if (c>min) {
// NOTE: we use c>min rather than c>=min as an optimization because we are going in
// index order, so we already know that the keys are ordered. This can be very
// important if a lot of the counts are repeated (like zero counts would be).
// smaller term numbers sort higher, so subtract the term number instead
long pair = (((long)c)<<32) + (Integer.MAX_VALUE - i);
boolean displaced = queue.insert(pair);
if (displaced) min=(int)(queue.top() >>> 32);
}
}
// now select the right page from the results
// if we are deep paging, we don't have to order the highest "offset" counts.
int collectCount = Math.max(0, queue.size() - off);
assert collectCount <= lim;
// the start and end indexes of our list "sorted" (starting with the highest value)
int sortedIdxStart = queue.size() - (collectCount - 1);
int sortedIdxEnd = queue.size() + 1;
final long[] sorted = queue.sort(collectCount);
final int[] indirect = counts; // reuse the counts array for the index into the tnums array
assert indirect.length >= sortedIdxEnd;
for (int i=sortedIdxStart; i<sortedIdxEnd; i++) {
long pair = sorted[i];
int c = (int)(pair >>> 32);
int tnum = Integer.MAX_VALUE - (int)pair;
indirect[i] = i; // store the index for indirect sorting
sorted[i] = tnum; // reuse the "sorted" array to store the term numbers for indirect sorting
// add a null label for now... we'll fill it in later.
res.add(null, c);
}
// now sort the indexes by the term numbers
PrimUtils.sort(sortedIdxStart, sortedIdxEnd, indirect, new PrimUtils.IntComparator() {
@Override
public int compare(int a, int b) {
return (int)sorted[a] - (int)sorted[b];
}
@Override
public boolean lessThan(int a, int b) {
return sorted[a] < sorted[b];
}
@Override
public boolean equals(int a, int b) {
return sorted[a] == sorted[b];
}
});
// convert the term numbers to term values and set
// as the label
//System.out.println("sortStart=" + sortedIdxStart + " end=" + sortedIdxEnd);
for (int i=sortedIdxStart; i<sortedIdxEnd; i++) {
int idx = indirect[i];
int tnum = (int)sorted[idx];
final String label = getReadableValue(getTermValue(te, tnum), ft, charsRef);
//System.out.println(" label=" + label);
res.setName(idx - sortedIdxStart, label);
}
} else {
// add results in index order
int i=startTerm;
if (mincount<=0) {
// if mincount<=0, then we won't discard any terms and we know exactly
// where to start.
i=startTerm+off;
off=0;
}
for (; i<endTerm; i++) {
int c = doNegative ? maxTermCounts[i] - counts[i] : counts[i];
if (c<mincount || --off>=0) continue;
if (--lim<0) break;
final String label = getReadableValue(getTermValue(te, i), ft, charsRef);
res.add(label, c);
}
}
}
// in solr/core/src/java/org/apache/solr/request/UnInvertedField.java
public StatsValues getStats(SolrIndexSearcher searcher, DocSet baseDocs, String[] facet) throws IOException {
//this function is ripped off nearly wholesale from the getCounts function to use
//for multiValued fields within the StatsComponent. may be useful to find common
//functionality between the two and refactor code somewhat
use.incrementAndGet();
SchemaField sf = searcher.getSchema().getField(field);
// FieldType ft = sf.getType();
StatsValues allstats = StatsValuesFactory.createStatsValues(sf);
DocSet docs = baseDocs;
int baseSize = docs.size();
int maxDoc = searcher.maxDoc();
if (baseSize <= 0) return allstats;
DocSet missing = docs.andNot( searcher.getDocSet(new TermRangeQuery(field, null, null, false, false)) );
int i = 0;
final FieldFacetStats[] finfo = new FieldFacetStats[facet.length];
//Initialize facetstats, if facets have been passed in
FieldCache.DocTermsIndex si;
for (String f : facet) {
SchemaField facet_sf = searcher.getSchema().getField(f);
try {
si = FieldCache.DEFAULT.getTermsIndex(searcher.getAtomicReader(), f);
}
catch (IOException e) {
throw new RuntimeException("failed to open field cache for: " + f, e);
}
finfo[i] = new FieldFacetStats(f, si, sf, facet_sf, numTermsInField);
i++;
}
final int[] index = this.index;
final int[] counts = new int[numTermsInField];//keep track of the number of times we see each word in the field for all the documents in the docset
TermsEnum te = getOrdTermsEnum(searcher.getAtomicReader());
boolean doNegative = false;
if (finfo.length == 0) {
//if we're collecting statistics with a facet field, can't do inverted counting
doNegative = baseSize > maxDoc >> 1 && termInstances > 0
&& docs instanceof BitDocSet;
}
if (doNegative) {
OpenBitSet bs = (OpenBitSet) ((BitDocSet) docs).getBits().clone();
bs.flip(0, maxDoc);
// TODO: when iterator across negative elements is available, use that
// instead of creating a new bitset and inverting.
docs = new BitDocSet(bs, maxDoc - baseSize);
// simply negating will mean that we have deleted docs in the set.
// that should be OK, as their entries in our table should be empty.
}
// For the biggest terms, do straight set intersections
for (TopTerm tt : bigTerms.values()) {
// TODO: counts could be deferred if sorted==false
if (tt.termNum >= 0 && tt.termNum < numTermsInField) {
final Term t = new Term(field, tt.term);
if (finfo.length == 0) {
counts[tt.termNum] = searcher.numDocs(new TermQuery(t), docs);
} else {
//COULD BE VERY SLOW
//if we're collecting stats for facet fields, we need to iterate on all matching documents
DocSet bigTermDocSet = searcher.getDocSet(new TermQuery(t)).intersection(docs);
DocIterator iter = bigTermDocSet.iterator();
while (iter.hasNext()) {
int doc = iter.nextDoc();
counts[tt.termNum]++;
for (FieldFacetStats f : finfo) {
f.facetTermNum(doc, tt.termNum);
}
}
}
}
}
if (termInstances > 0) {
DocIterator iter = docs.iterator();
while (iter.hasNext()) {
int doc = iter.nextDoc();
int code = index[doc];
if ((code & 0xff) == 1) {
int pos = code >>> 8;
int whichArray = (doc >>> 16) & 0xff;
byte[] arr = tnums[whichArray];
int tnum = 0;
for (; ;) {
int delta = 0;
for (; ;) {
byte b = arr[pos++];
delta = (delta << 7) | (b & 0x7f);
if ((b & 0x80) == 0) break;
}
if (delta == 0) break;
tnum += delta - TNUM_OFFSET;
counts[tnum]++;
for (FieldFacetStats f : finfo) {
f.facetTermNum(doc, tnum);
}
}
} else {
int tnum = 0;
int delta = 0;
for (; ;) {
delta = (delta << 7) | (code & 0x7f);
if ((code & 0x80) == 0) {
if (delta == 0) break;
tnum += delta - TNUM_OFFSET;
counts[tnum]++;
for (FieldFacetStats f : finfo) {
f.facetTermNum(doc, tnum);
}
delta = 0;
}
code >>>= 8;
}
}
}
}
// add results in index order
for (i = 0; i < numTermsInField; i++) {
int c = doNegative ? maxTermCounts[i] - counts[i] : counts[i];
if (c == 0) continue;
BytesRef value = getTermValue(te, i);
allstats.accumulate(value, c);
//as we've parsed the termnum into a value, lets also accumulate fieldfacet statistics
for (FieldFacetStats f : finfo) {
f.accumulateTermNum(i, value);
}
}
int c = missing.size();
allstats.addMissing(c);
if (finfo.length > 0) {
for (FieldFacetStats f : finfo) {
Map<String, StatsValues> facetStatsValues = f.facetStatsValues;
FieldType facetType = searcher.getSchema().getFieldType(f.name);
for (Map.Entry<String,StatsValues> entry : facetStatsValues.entrySet()) {
String termLabel = entry.getKey();
int missingCount = searcher.numDocs(new TermQuery(new Term(f.name, facetType.toInternal(termLabel))), missing);
entry.getValue().addMissing(missingCount);
}
allstats.addFacet(f.name, facetStatsValues);
}
}
return allstats;
}
// in solr/core/src/java/org/apache/solr/request/UnInvertedField.java
BytesRef getTermValue(TermsEnum te, int termNum) throws IOException {
//System.out.println("getTermValue termNum=" + termNum + " this=" + this + " numTerms=" + numTermsInField);
if (bigTerms.size() > 0) {
// see if the term is one of our big terms.
TopTerm tt = bigTerms.get(termNum);
if (tt != null) {
//System.out.println(" return big " + tt.term);
return tt.term;
}
}
return lookupTerm(te, termNum);
}
// in solr/core/src/java/org/apache/solr/request/UnInvertedField.java
public static UnInvertedField getUnInvertedField(String field, SolrIndexSearcher searcher) throws IOException {
SolrCache<String,UnInvertedField> cache = searcher.getFieldValueCache();
if (cache == null) {
return new UnInvertedField(field, searcher);
}
UnInvertedField uif = cache.get(field);
if (uif == null) {
synchronized (cache) {
uif = cache.get(field);
if (uif == null) {
uif = new UnInvertedField(field, searcher);
cache.put(field, uif);
}
}
}
return uif;
}
// in solr/core/src/java/org/apache/solr/servlet/SolrDispatchFilter.java
public void doFilter(ServletRequest request, ServletResponse response, FilterChain chain) throws IOException, ServletException {
if( abortErrorMessage != null ) {
((HttpServletResponse)response).sendError( 500, abortErrorMessage );
return;
}
if (this.cores == null) {
((HttpServletResponse)response).sendError( 403, "Server is shutting down" );
return;
}
CoreContainer cores = this.cores;
SolrCore core = null;
SolrQueryRequest solrReq = null;
if( request instanceof HttpServletRequest) {
HttpServletRequest req = (HttpServletRequest)request;
HttpServletResponse resp = (HttpServletResponse)response;
SolrRequestHandler handler = null;
String corename = "";
try {
// put the core container in request attribute
req.setAttribute("org.apache.solr.CoreContainer", cores);
String path = req.getServletPath();
if( req.getPathInfo() != null ) {
// this lets you handle /update/commit when /update is a servlet
path += req.getPathInfo();
}
if( pathPrefix != null && path.startsWith( pathPrefix ) ) {
path = path.substring( pathPrefix.length() );
}
// check for management path
String alternate = cores.getManagementPath();
if (alternate != null && path.startsWith(alternate)) {
path = path.substring(0, alternate.length());
}
// unused feature ?
int idx = path.indexOf( ':' );
if( idx > 0 ) {
// save the portion after the ':' for a 'handler' path parameter
path = path.substring( 0, idx );
}
// Check for the core admin page
if( path.equals( cores.getAdminPath() ) ) {
handler = cores.getMultiCoreHandler();
solrReq = adminRequestParser.parse(null,path, req);
handleAdminRequest(req, response, handler, solrReq);
return;
}
else {
//otherwise, we should find a core from the path
idx = path.indexOf( "/", 1 );
if( idx > 1 ) {
// try to get the corename as a request parameter first
corename = path.substring( 1, idx );
core = cores.getCore(corename);
if (core != null) {
path = path.substring( idx );
}
}
if (core == null) {
if (!cores.isZooKeeperAware() ) {
core = cores.getCore("");
}
}
}
if (core == null && cores.isZooKeeperAware()) {
// we couldn't find the core - lets make sure a collection was not specified instead
core = getCoreByCollection(cores, corename, path);
if (core != null) {
// we found a core, update the path
path = path.substring( idx );
} else {
// try the default core
core = cores.getCore("");
}
// TODO: if we couldn't find it locally, look on other nodes
}
// With a valid core...
if( core != null ) {
final SolrConfig config = core.getSolrConfig();
// get or create/cache the parser for the core
SolrRequestParsers parser = null;
parser = parsers.get(config);
if( parser == null ) {
parser = new SolrRequestParsers(config);
parsers.put(config, parser );
}
// Determine the handler from the url path if not set
// (we might already have selected the cores handler)
if( handler == null && path.length() > 1 ) { // don't match "" or "/" as valid path
handler = core.getRequestHandler( path );
// no handler yet but allowed to handle select; let's check
if( handler == null && parser.isHandleSelect() ) {
if( "/select".equals( path ) || "/select/".equals( path ) ) {
solrReq = parser.parse( core, path, req );
String qt = solrReq.getParams().get( CommonParams.QT );
handler = core.getRequestHandler( qt );
if( handler == null ) {
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "unknown handler: "+qt);
}
if( qt != null && qt.startsWith("/") && (handler instanceof ContentStreamHandlerBase)) {
//For security reasons it's a bad idea to allow a leading '/', ex: /select?qt=/update see SOLR-3161
//There was no restriction from Solr 1.4 thru 3.5 and it's not supported for update handlers.
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Invalid query type. Do not use /select to access: "+qt);
}
}
}
}
// With a valid handler and a valid core...
if( handler != null ) {
// if not a /select, create the request
if( solrReq == null ) {
solrReq = parser.parse( core, path, req );
}
final Method reqMethod = Method.getMethod(req.getMethod());
HttpCacheHeaderUtil.setCacheControlHeader(config, resp, reqMethod);
// unless we have been explicitly told not to, do cache validation
// if we fail cache validation, execute the query
if (config.getHttpCachingConfig().isNever304() ||
!HttpCacheHeaderUtil.doCacheHeaderValidation(solrReq, req, reqMethod, resp)) {
SolrQueryResponse solrRsp = new SolrQueryResponse();
/* even for HEAD requests, we need to execute the handler to
* ensure we don't get an error (and to make sure the correct
* QueryResponseWriter is selected and we get the correct
* Content-Type)
*/
SolrRequestInfo.setRequestInfo(new SolrRequestInfo(solrReq, solrRsp));
this.execute( req, handler, solrReq, solrRsp );
HttpCacheHeaderUtil.checkHttpCachingVeto(solrRsp, resp, reqMethod);
// add info to http headers
//TODO: See SOLR-232 and SOLR-267.
/*try {
NamedList solrRspHeader = solrRsp.getResponseHeader();
for (int i=0; i<solrRspHeader.size(); i++) {
((javax.servlet.http.HttpServletResponse) response).addHeader(("Solr-" + solrRspHeader.getName(i)), String.valueOf(solrRspHeader.getVal(i)));
}
} catch (ClassCastException cce) {
log.log(Level.WARNING, "exception adding response header log information", cce);
}*/
QueryResponseWriter responseWriter = core.getQueryResponseWriter(solrReq);
writeResponse(solrRsp, response, responseWriter, solrReq, reqMethod);
}
return; // we are done with a valid handler
}
}
log.debug("no handler or core retrieved for " + path + ", follow through...");
}
catch (Throwable ex) {
sendError( core, solrReq, request, (HttpServletResponse)response, ex );
return;
}
finally {
if( solrReq != null ) {
solrReq.close();
}
if (core != null) {
core.close();
}
SolrRequestInfo.clearRequestInfo();
}
}
// Otherwise let the webapp handle the request
chain.doFilter(request, response);
}
// in solr/core/src/java/org/apache/solr/servlet/SolrDispatchFilter.java
private void handleAdminRequest(HttpServletRequest req, ServletResponse response, SolrRequestHandler handler,
SolrQueryRequest solrReq) throws IOException {
SolrQueryResponse solrResp = new SolrQueryResponse();
final NamedList<Object> responseHeader = new SimpleOrderedMap<Object>();
solrResp.add("responseHeader", responseHeader);
NamedList toLog = solrResp.getToLog();
toLog.add("webapp", req.getContextPath());
toLog.add("path", solrReq.getContext().get("path"));
toLog.add("params", "{" + solrReq.getParamString() + "}");
handler.handleRequest(solrReq, solrResp);
SolrCore.setResponseHeaderValues(handler, solrReq, solrResp);
StringBuilder sb = new StringBuilder();
for (int i = 0; i < toLog.size(); i++) {
String name = toLog.getName(i);
Object val = toLog.getVal(i);
sb.append(name).append("=").append(val).append(" ");
}
QueryResponseWriter respWriter = SolrCore.DEFAULT_RESPONSE_WRITERS.get(solrReq.getParams().get(CommonParams.WT));
if (respWriter == null) respWriter = SolrCore.DEFAULT_RESPONSE_WRITERS.get("standard");
writeResponse(solrResp, response, respWriter, solrReq, Method.getMethod(req.getMethod()));
}
// in solr/core/src/java/org/apache/solr/servlet/SolrDispatchFilter.java
private void writeResponse(SolrQueryResponse solrRsp, ServletResponse response,
QueryResponseWriter responseWriter, SolrQueryRequest solrReq, Method reqMethod)
throws IOException {
// Now write it out
final String ct = responseWriter.getContentType(solrReq, solrRsp);
// don't call setContentType on null
if (null != ct) response.setContentType(ct);
if (solrRsp.getException() != null) {
NamedList info = new SimpleOrderedMap();
int code = getErrorInfo(solrRsp.getException(),info);
solrRsp.add("error", info);
((HttpServletResponse) response).setStatus(code);
}
if (Method.HEAD != reqMethod) {
if (responseWriter instanceof BinaryQueryResponseWriter) {
BinaryQueryResponseWriter binWriter = (BinaryQueryResponseWriter) responseWriter;
binWriter.write(response.getOutputStream(), solrReq, solrRsp);
} else {
String charset = ContentStreamBase.getCharsetFromContentType(ct);
Writer out = (charset == null || charset.equalsIgnoreCase("UTF-8"))
? new OutputStreamWriter(response.getOutputStream(), UTF8)
: new OutputStreamWriter(response.getOutputStream(), charset);
out = new FastWriter(out);
responseWriter.write(out, solrReq, solrRsp);
out.flush();
}
}
//else http HEAD request, nothing to write out, waited this long just to get ContentType
}
// in solr/core/src/java/org/apache/solr/servlet/SolrDispatchFilter.java
protected void sendError(SolrCore core,
SolrQueryRequest req,
ServletRequest request,
HttpServletResponse response,
Throwable ex) throws IOException {
try {
SolrQueryResponse solrResp = new SolrQueryResponse();
if(ex instanceof Exception) {
solrResp.setException((Exception)ex);
}
else {
solrResp.setException(new RuntimeException(ex));
}
if(core==null) {
core = cores.getCore(""); // default core
}
if(req==null) {
req = new SolrQueryRequestBase(core,new ServletSolrParams(request)) {};
}
QueryResponseWriter writer = core.getQueryResponseWriter(req);
writeResponse(solrResp, response, writer, req, Method.GET);
}
catch( Throwable t ) { // This error really does not matter
SimpleOrderedMap info = new SimpleOrderedMap();
int code=getErrorInfo(ex, info);
response.sendError( code, info.toString() );
}
}
// in solr/core/src/java/org/apache/solr/servlet/ZookeeperInfoServlet.java
Override
public void doGet(HttpServletRequest request,
HttpServletResponse response)
throws IOException, ServletException {
response.setCharacterEncoding("UTF-8");
response.setContentType("application/json");
// This attribute is set by the SolrDispatchFilter
CoreContainer cores = (CoreContainer) request.getAttribute("org.apache.solr.CoreContainer");
String path = request.getParameter("path");
String addr = request.getParameter("addr");
if (addr != null && addr.length() == 0) {
addr = null;
}
String detailS = request.getParameter("detail");
boolean detail = detailS != null && detailS.equals("true");
String dumpS = request.getParameter("dump");
boolean dump = dumpS != null && dumpS.equals("true");
PrintWriter out = response.getWriter();
ZKPrinter printer = new ZKPrinter(response, out, cores.getZkController(), addr);
printer.detail = detail;
printer.dump = dump;
try {
printer.print(path);
} finally {
printer.close();
}
}
// in solr/core/src/java/org/apache/solr/servlet/ZookeeperInfoServlet.java
Override
public void doPost(HttpServletRequest request,
HttpServletResponse response)
throws IOException, ServletException {
doGet(request, response);
}
// in solr/core/src/java/org/apache/solr/servlet/ZookeeperInfoServlet.java
void print(String path) throws IOException {
if (zkClient == null) {
return;
}
// normalize path
if (path == null) {
path = "/";
} else {
path.trim();
if (path.length() == 0) {
path = "/";
}
}
if (path.endsWith("/") && path.length() > 1) {
path = path.substring(0, path.length() - 1);
}
int idx = path.lastIndexOf('/');
String parent = idx >= 0 ? path.substring(0, idx) : path;
if (parent.length() == 0) {
parent = "/";
}
CharArr chars = new CharArr();
JSONWriter json = new JSONWriter(chars, 2);
json.startObject();
if (detail) {
if (!printZnode(json, path)) {
return;
}
json.writeValueSeparator();
}
json.writeString("tree");
json.writeNameSeparator();
json.startArray();
if (!printTree(json, path)) {
return; // there was an error
}
json.endArray();
json.endObject();
out.println(chars.toString());
}
// in solr/core/src/java/org/apache/solr/servlet/ZookeeperInfoServlet.java
boolean printTree(JSONWriter json, String path) throws IOException {
String label = path;
if (!fullpath) {
int idx = path.lastIndexOf('/');
label = idx > 0 ? path.substring(idx + 1) : path;
}
json.startObject();
//writeKeyValue(json, "data", label, true );
json.writeString("data");
json.writeNameSeparator();
json.startObject();
writeKeyValue(json, "title", label, true);
json.writeValueSeparator();
json.writeString("attr");
json.writeNameSeparator();
json.startObject();
writeKeyValue(json, "href", "zookeeper?detail=true&path=" + URLEncoder.encode(path, "UTF-8"), true);
json.endObject();
json.endObject();
Stat stat = new Stat();
try {
// Trickily, the call to zkClient.getData fills in the stat variable
byte[] data = zkClient.getData(path, null, stat, true);
if (stat.getEphemeralOwner() != 0) {
writeKeyValue(json, "ephemeral", true, false);
writeKeyValue(json, "version", stat.getVersion(), false);
}
if (dump) {
json.writeValueSeparator();
printZnode(json, path);
}
/*
if (stat.getNumChildren() != 0)
{
writeKeyValue(json, "children_count", stat.getNumChildren(), false );
out.println(", \"children_count\" : \"" + stat.getNumChildren() + "\"");
}
*/
//if (stat.getDataLength() != 0)
if (data != null) {
String str = new BytesRef(data).utf8ToString();
//?? writeKeyValue(json, "content", str, false );
// Does nothing now, but on the assumption this will be used later we'll leave it in. If it comes out
// the catches below need to be restructured.
}
} catch (IllegalArgumentException e) {
// path doesn't exist (must have been removed)
writeKeyValue(json, "warning", "(path gone)", false);
} catch (KeeperException e) {
writeKeyValue(json, "warning", e.toString(), false);
log.warn("Keeper Exception", e);
} catch (InterruptedException e) {
writeKeyValue(json, "warning", e.toString(), false);
log.warn("InterruptedException", e);
}
if (stat.getNumChildren() > 0) {
json.writeValueSeparator();
if (indent) {
json.indent();
}
json.writeString("children");
json.writeNameSeparator();
json.startArray();
try {
List<String> children = zkClient.getChildren(path, null, true);
java.util.Collections.sort(children);
boolean first = true;
for (String child : children) {
if (!first) {
json.writeValueSeparator();
}
String childPath = path + (path.endsWith("/") ? "" : "/") + child;
if (!printTree(json, childPath)) {
return false;
}
first = false;
}
} catch (KeeperException e) {
writeError(500, e.toString());
return false;
} catch (InterruptedException e) {
writeError(500, e.toString());
return false;
} catch (IllegalArgumentException e) {
// path doesn't exist (must have been removed)
json.writeString("(children gone)");
}
json.endArray();
}
json.endObject();
return true;
}
// in solr/core/src/java/org/apache/solr/servlet/ZookeeperInfoServlet.java
boolean printZnode(JSONWriter json, String path) throws IOException {
try {
Stat stat = new Stat();
// Trickily, the call to zkClient.getData fills in the stat variable
byte[] data = zkClient.getData(path, null, stat, true);
json.writeString("znode");
json.writeNameSeparator();
json.startObject();
writeKeyValue(json, "path", path, true);
json.writeValueSeparator();
json.writeString("prop");
json.writeNameSeparator();
json.startObject();
writeKeyValue(json, "version", stat.getVersion(), true);
writeKeyValue(json, "aversion", stat.getAversion(), false);
writeKeyValue(json, "children_count", stat.getNumChildren(), false);
writeKeyValue(json, "ctime", time(stat.getCtime()), false);
writeKeyValue(json, "cversion", stat.getCversion(), false);
writeKeyValue(json, "czxid", stat.getCzxid(), false);
writeKeyValue(json, "dataLength", stat.getDataLength(), false);
writeKeyValue(json, "ephemeralOwner", stat.getEphemeralOwner(), false);
writeKeyValue(json, "mtime", time(stat.getMtime()), false);
writeKeyValue(json, "mzxid", stat.getMzxid(), false);
writeKeyValue(json, "pzxid", stat.getPzxid(), false);
json.endObject();
if (data != null) {
writeKeyValue(json, "data", new BytesRef(data).utf8ToString(), false);
}
json.endObject();
} catch (KeeperException e) {
writeError(500, e.toString());
return false;
} catch (InterruptedException e) {
writeError(500, e.toString());
return false;
}
return true;
}
// in solr/core/src/java/org/apache/solr/servlet/SolrRequestParsers.java
public InputStream getStream() throws IOException {
return req.getInputStream();
}
// in solr/core/src/java/org/apache/solr/servlet/SolrRequestParsers.java
public InputStream getStream() throws IOException {
return item.getInputStream();
}
// in solr/core/src/java/org/apache/solr/servlet/LoadAdminUiServlet.java
Override
public void doGet(HttpServletRequest request,
HttpServletResponse response)
throws IOException, ServletException {
response.setCharacterEncoding("UTF-8");
response.setContentType("text/html");
PrintWriter out = response.getWriter();
InputStream in = getServletContext().getResourceAsStream("/admin.html");
if(in != null) {
try {
// This attribute is set by the SolrDispatchFilter
CoreContainer cores = (CoreContainer) request.getAttribute("org.apache.solr.CoreContainer");
String html = IOUtils.toString(in, "UTF-8");
String[] search = new String[] {
"${contextPath}",
"${adminPath}"
};
String[] replace = new String[] {
StringEscapeUtils.escapeJavaScript(request.getContextPath()),
StringEscapeUtils.escapeJavaScript(cores.getAdminPath())
};
out.println( StringUtils.replaceEach(html, search, replace) );
} finally {
IOUtils.closeQuietly(in);
}
} else {
out.println("solr");
}
}
// in solr/core/src/java/org/apache/solr/servlet/LoadAdminUiServlet.java
Override
public void doPost(HttpServletRequest request,
HttpServletResponse response)
throws IOException, ServletException {
doGet(request, response);
}
// in solr/core/src/java/org/apache/solr/servlet/RedirectServlet.java
public void doGet(HttpServletRequest req, HttpServletResponse res)
throws ServletException,IOException {
res.setStatus(code);
res.setHeader("Location", destination);
}
// in solr/core/src/java/org/apache/solr/servlet/RedirectServlet.java
public void doPost(HttpServletRequest req, HttpServletResponse res)
throws ServletException,IOException {
doGet(req,res);
}
// in solr/core/src/java/org/apache/solr/servlet/cache/HttpCacheHeaderUtil.java
public static void sendNotModified(HttpServletResponse res)
throws IOException {
res.setStatus(HttpServletResponse.SC_NOT_MODIFIED);
}
// in solr/core/src/java/org/apache/solr/servlet/cache/HttpCacheHeaderUtil.java
public static void sendPreconditionFailed(HttpServletResponse res)
throws IOException {
res.setStatus(HttpServletResponse.SC_PRECONDITION_FAILED);
}
// in solr/core/src/java/org/apache/solr/servlet/cache/HttpCacheHeaderUtil.java
public static boolean doCacheHeaderValidation(final SolrQueryRequest solrReq,
final HttpServletRequest req,
final Method reqMethod,
final HttpServletResponse resp)
throws IOException {
if (Method.POST==reqMethod || Method.OTHER==reqMethod) {
return false;
}
final long lastMod = HttpCacheHeaderUtil.calcLastModified(solrReq);
final String etag = HttpCacheHeaderUtil.calcEtag(solrReq);
resp.setDateHeader("Last-Modified", lastMod);
resp.setHeader("ETag", etag);
if (checkETagValidators(req, resp, reqMethod, etag)) {
return true;
}
if (checkLastModValidators(req, resp, lastMod)) {
return true;
}
return false;
}
// in solr/core/src/java/org/apache/solr/servlet/cache/HttpCacheHeaderUtil.java
public static boolean checkLastModValidators(final HttpServletRequest req,
final HttpServletResponse resp,
final long lastMod)
throws IOException {
try {
// First check for If-Modified-Since because this is the common
// used header by HTTP clients
final long modifiedSince = req.getDateHeader("If-Modified-Since");
if (modifiedSince != -1L && lastMod <= modifiedSince) {
// Send a "not-modified"
sendNotModified(resp);
return true;
}
final long unmodifiedSince = req.getDateHeader("If-Unmodified-Since");
if (unmodifiedSince != -1L && lastMod > unmodifiedSince) {
// Send a "precondition failed"
sendPreconditionFailed(resp);
return true;
}
} catch (IllegalArgumentException iae) {
// one of our date headers was not formated properly, ignore it
/* NOOP */
}
return false;
}
// in solr/core/src/java/org/apache/solr/schema/BCDIntField.java
Override
public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException {
writer.writeInt(name,toExternal(f));
}
// in solr/core/src/java/org/apache/solr/schema/SortableIntField.java
Override
public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException {
String sval = f.stringValue();
writer.writeInt(name, NumberUtils.SortableStr2int(sval,0,sval.length()));
}
// in solr/core/src/java/org/apache/solr/schema/SortableIntField.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
final int def = defVal;
return new DocTermsIndexDocValues(this, readerContext, field) {
private final BytesRef spare = new BytesRef();
@Override
protected String toTerm(String readableValue) {
return NumberUtils.int2sortableStr(readableValue);
}
@Override
public float floatVal(int doc) {
return (float)intVal(doc);
}
@Override
public int intVal(int doc) {
int ord=termsIndex.getOrd(doc);
return ord==0 ? def : NumberUtils.SortableStr2int(termsIndex.lookup(ord, spare),0,3);
}
@Override
public long longVal(int doc) {
return (long)intVal(doc);
}
@Override
public double doubleVal(int doc) {
return (double)intVal(doc);
}
@Override
public String strVal(int doc) {
return Integer.toString(intVal(doc));
}
@Override
public String toString(int doc) {
return description() + '=' + intVal(doc);
}
@Override
public Object objectVal(int doc) {
int ord=termsIndex.getOrd(doc);
return ord==0 ? null : NumberUtils.SortableStr2int(termsIndex.lookup(ord, spare));
}
@Override
public ValueFiller getValueFiller() {
return new ValueFiller() {
private final MutableValueInt mval = new MutableValueInt();
@Override
public MutableValue getValue() {
return mval;
}
@Override
public void fillValue(int doc) {
int ord=termsIndex.getOrd(doc);
if (ord == 0) {
mval.value = def;
mval.exists = false;
} else {
mval.value = NumberUtils.SortableStr2int(termsIndex.lookup(ord, spare),0,3);
mval.exists = true;
}
}
};
}
};
}
// in solr/core/src/java/org/apache/solr/schema/StrField.java
Override
public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException {
writer.writeStr(name, f.stringValue(), true);
}
// in solr/core/src/java/org/apache/solr/schema/DoubleField.java
Override
public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException {
String s = f.stringValue();
// these values may be from a legacy lucene index, which may
// not be properly formatted in some output formats, or may
// incorrectly have a zero length.
if (s.length()==0) {
// zero length value means someone mistakenly indexed the value
// instead of simply leaving it out. Write a null value instead of a numeric.
writer.writeNull(name);
return;
}
try {
double val = Double.parseDouble(s);
writer.writeDouble(name, val);
} catch (NumberFormatException e){
// can't parse - write out the contents as a string so nothing is lost and
// clients don't get a parse error.
writer.writeStr(name, s, true);
}
}
// in solr/core/src/java/org/apache/solr/schema/RandomSortField.java
Override
public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException { }
// in solr/core/src/java/org/apache/solr/schema/RandomSortField.java
Override
public FieldComparator<Integer> newComparator(final String fieldname, final int numHits, int sortPos, boolean reversed) throws IOException {
return new FieldComparator<Integer>() {
int seed;
private final int[] values = new int[numHits];
int bottomVal;
@Override
public int compare(int slot1, int slot2) {
return values[slot1] - values[slot2]; // values will be positive... no overflow possible.
}
@Override
public void setBottom(int slot) {
bottomVal = values[slot];
}
@Override
public int compareBottom(int doc) throws IOException {
return bottomVal - hash(doc+seed);
}
@Override
public void copy(int slot, int doc) throws IOException {
values[slot] = hash(doc+seed);
}
@Override
public FieldComparator setNextReader(AtomicReaderContext context) throws IOException {
seed = getSeed(fieldname, context);
return this;
}
@Override
public Integer value(int slot) {
return values[slot];
}
@Override
public int compareDocToValue(int doc, Integer valueObj) {
// values will be positive... no overflow possible.
return hash(doc+seed) - valueObj.intValue();
}
};
}
// in solr/core/src/java/org/apache/solr/schema/RandomSortField.java
Override
public int compareBottom(int doc) throws IOException {
return bottomVal - hash(doc+seed);
}
// in solr/core/src/java/org/apache/solr/schema/RandomSortField.java
Override
public void copy(int slot, int doc) throws IOException {
values[slot] = hash(doc+seed);
}
// in solr/core/src/java/org/apache/solr/schema/RandomSortField.java
Override
public FieldComparator setNextReader(AtomicReaderContext context) throws IOException {
seed = getSeed(fieldname, context);
return this;
}
// in solr/core/src/java/org/apache/solr/schema/RandomSortField.java
Override
public FunctionValues getValues(Map context, final AtomicReaderContext readerContext) throws IOException {
return new IntDocValues(this) {
private final int seed = getSeed(field, readerContext);
@Override
public int intVal(int doc) {
return hash(doc+seed);
}
};
}
// in solr/core/src/java/org/apache/solr/schema/DateField.java
Override
public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException {
writer.writeDate(name, toExternal(f));
}
// in solr/core/src/java/org/apache/solr/schema/DateField.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
return new DocTermsIndexDocValues(this, readerContext, field) {
@Override
protected String toTerm(String readableValue) {
// needed for frange queries to work properly
return ft.toInternal(readableValue);
}
@Override
public float floatVal(int doc) {
return (float)intVal(doc);
}
@Override
public int intVal(int doc) {
int ord=termsIndex.getOrd(doc);
return ord;
}
@Override
public long longVal(int doc) {
return (long)intVal(doc);
}
@Override
public double doubleVal(int doc) {
return (double)intVal(doc);
}
@Override
public String strVal(int doc) {
int ord=termsIndex.getOrd(doc);
if (ord == 0) {
return null;
} else {
final BytesRef br = termsIndex.lookup(ord, spare);
return ft.indexedToReadable(br, spareChars).toString();
}
}
@Override
public Object objectVal(int doc) {
int ord=termsIndex.getOrd(doc);
if (ord == 0) {
return null;
} else {
final BytesRef br = termsIndex.lookup(ord, new BytesRef());
return ft.toObject(null, br);
}
}
@Override
public String toString(int doc) {
return description() + '=' + intVal(doc);
}
};
}
// in solr/core/src/java/org/apache/solr/schema/CurrencyField.java
public void write(XMLWriter xmlWriter, String name, IndexableField field) throws IOException {
xmlWriter.writeStr(name, field.stringValue(), false);
}
// in solr/core/src/java/org/apache/solr/schema/CurrencyField.java
Override
public void write(TextResponseWriter writer, String name, IndexableField field) throws IOException {
writer.writeStr(name, field.stringValue(), false);
}
// in solr/core/src/java/org/apache/solr/schema/CurrencyField.java
public FunctionValues getValues(Map context, AtomicReaderContext reader) throws IOException {
final FunctionValues amounts = amountValues.getValues(context, reader);
final FunctionValues currencies = currencyValues.getValues(context, reader);
return new FunctionValues() {
private final int MAX_CURRENCIES_TO_CACHE = 256;
private final int[] fractionDigitCache = new int[MAX_CURRENCIES_TO_CACHE];
private final String[] currencyOrdToCurrencyCache = new String[MAX_CURRENCIES_TO_CACHE];
private final double[] exchangeRateCache = new double[MAX_CURRENCIES_TO_CACHE];
private int targetFractionDigits = -1;
private int targetCurrencyOrd = -1;
private boolean initializedCache;
private String getDocCurrencyCode(int doc, int currencyOrd) {
if (currencyOrd < MAX_CURRENCIES_TO_CACHE) {
String currency = currencyOrdToCurrencyCache[currencyOrd];
if (currency == null) {
currencyOrdToCurrencyCache[currencyOrd] = currency = currencies.strVal(doc);
}
if (currency == null) {
currency = defaultCurrency;
}
if (targetCurrencyOrd == -1 && currency.equals(targetCurrencyCode)) {
targetCurrencyOrd = currencyOrd;
}
return currency;
} else {
return currencies.strVal(doc);
}
}
public long longVal(int doc) {
if (!initializedCache) {
for (int i = 0; i < fractionDigitCache.length; i++) {
fractionDigitCache[i] = -1;
}
initializedCache = true;
}
long amount = amounts.longVal(doc);
int currencyOrd = currencies.ordVal(doc);
if (currencyOrd == targetCurrencyOrd) {
return amount;
}
double exchangeRate;
int sourceFractionDigits;
if (targetFractionDigits == -1) {
targetFractionDigits = Currency.getInstance(targetCurrencyCode).getDefaultFractionDigits();
}
if (currencyOrd < MAX_CURRENCIES_TO_CACHE) {
exchangeRate = exchangeRateCache[currencyOrd];
if (exchangeRate <= 0.0) {
String sourceCurrencyCode = getDocCurrencyCode(doc, currencyOrd);
exchangeRate = exchangeRateCache[currencyOrd] = provider.getExchangeRate(sourceCurrencyCode, targetCurrencyCode);
}
sourceFractionDigits = fractionDigitCache[currencyOrd];
if (sourceFractionDigits == -1) {
String sourceCurrencyCode = getDocCurrencyCode(doc, currencyOrd);
sourceFractionDigits = fractionDigitCache[currencyOrd] = Currency.getInstance(sourceCurrencyCode).getDefaultFractionDigits();
}
} else {
String sourceCurrencyCode = getDocCurrencyCode(doc, currencyOrd);
exchangeRate = provider.getExchangeRate(sourceCurrencyCode, targetCurrencyCode);
sourceFractionDigits = Currency.getInstance(sourceCurrencyCode).getDefaultFractionDigits();
}
return CurrencyValue.convertAmount(exchangeRate, sourceFractionDigits, amount, targetFractionDigits);
}
public int intVal(int doc) {
return (int) longVal(doc);
}
public double doubleVal(int doc) {
return (double) longVal(doc);
}
public float floatVal(int doc) {
return (float) longVal(doc);
}
public String strVal(int doc) {
return Long.toString(longVal(doc));
}
public String toString(int doc) {
return name() + '(' + amounts.toString(doc) + ',' + currencies.toString(doc) + ')';
}
};
}
// in solr/core/src/java/org/apache/solr/schema/SortableFloatField.java
Override
public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException {
String sval = f.stringValue();
writer.writeFloat(name, NumberUtils.SortableStr2float(sval));
}
// in solr/core/src/java/org/apache/solr/schema/SortableFloatField.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
final float def = defVal;
return new DocTermsIndexDocValues(this, readerContext, field) {
private final BytesRef spare = new BytesRef();
@Override
protected String toTerm(String readableValue) {
return NumberUtils.float2sortableStr(readableValue);
}
@Override
public float floatVal(int doc) {
int ord=termsIndex.getOrd(doc);
return ord==0 ? def : NumberUtils.SortableStr2float(termsIndex.lookup(ord, spare));
}
@Override
public int intVal(int doc) {
return (int)floatVal(doc);
}
@Override
public long longVal(int doc) {
return (long)floatVal(doc);
}
@Override
public double doubleVal(int doc) {
return (double)floatVal(doc);
}
@Override
public String strVal(int doc) {
return Float.toString(floatVal(doc));
}
@Override
public String toString(int doc) {
return description() + '=' + floatVal(doc);
}
@Override
public Object objectVal(int doc) {
int ord=termsIndex.getOrd(doc);
return ord==0 ? null : NumberUtils.SortableStr2float(termsIndex.lookup(ord, spare));
}
@Override
public ValueFiller getValueFiller() {
return new ValueFiller() {
private final MutableValueFloat mval = new MutableValueFloat();
@Override
public MutableValue getValue() {
return mval;
}
@Override
public void fillValue(int doc) {
int ord=termsIndex.getOrd(doc);
if (ord == 0) {
mval.value = def;
mval.exists = false;
} else {
mval.value = NumberUtils.SortableStr2float(termsIndex.lookup(ord, spare));
mval.exists = true;
}
}
};
}
};
}
// in solr/core/src/java/org/apache/solr/schema/IntField.java
Override
public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException {
String s = f.stringValue();
// these values may be from a legacy lucene index, which may
// not be properly formatted in some output formats, or may
// incorrectly have a zero length.
if (s.length()==0) {
// zero length value means someone mistakenly indexed the value
// instead of simply leaving it out. Write a null value instead of a numeric.
writer.writeNull(name);
return;
}
try {
int val = Integer.parseInt(s);
writer.writeInt(name, val);
} catch (NumberFormatException e){
// can't parse - write out the contents as a string so nothing is lost and
// clients don't get a parse error.
writer.writeStr(name, s, true);
}
}
// in solr/core/src/java/org/apache/solr/schema/FloatField.java
Override
public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException {
String s = f.stringValue();
// these values may be from a legacy lucene index, which may
// not be properly formatted in some output formats, or may
// incorrectly have a zero length.
if (s.length()==0) {
// zero length value means someone mistakenly indexed the value
// instead of simply leaving it out. Write a null value instead of a numeric.
writer.writeNull(name);
return;
}
try {
float fval = Float.parseFloat(s);
writer.writeFloat(name, fval);
} catch (NumberFormatException e){
// can't parse - write out the contents as a string so nothing is lost and
// clients don't get a parse error.
writer.writeStr(name, s, true);
}
}
// in solr/core/src/java/org/apache/solr/schema/TrieField.java
Override
public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException {
writer.writeVal(name, toObject(f));
}
// in solr/core/src/java/org/apache/solr/schema/BoolField.java
Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new Tokenizer(reader) {
final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
boolean done = false;
@Override
public void reset(Reader input) throws IOException {
done = false;
super.reset(input);
}
@Override
public boolean incrementToken() throws IOException {
clearAttributes();
if (done) return false;
done = true;
int ch = input.read();
if (ch==-1) return false;
termAtt.copyBuffer(
((ch=='t' || ch=='T' || ch=='1') ? TRUE_TOKEN : FALSE_TOKEN)
,0,1);
return true;
}
};
return new TokenStreamComponents(tokenizer);
}
// in solr/core/src/java/org/apache/solr/schema/BoolField.java
Override
public void reset(Reader input) throws IOException {
done = false;
super.reset(input);
}
// in solr/core/src/java/org/apache/solr/schema/BoolField.java
Override
public boolean incrementToken() throws IOException {
clearAttributes();
if (done) return false;
done = true;
int ch = input.read();
if (ch==-1) return false;
termAtt.copyBuffer(
((ch=='t' || ch=='T' || ch=='1') ? TRUE_TOKEN : FALSE_TOKEN)
,0,1);
return true;
}
// in solr/core/src/java/org/apache/solr/schema/BoolField.java
Override
public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException {
writer.writeBool(name, f.stringValue().charAt(0) == 'T');
}
// in solr/core/src/java/org/apache/solr/schema/BoolField.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
final FieldCache.DocTermsIndex sindex = FieldCache.DEFAULT.getTermsIndex(readerContext.reader(), field);
// figure out what ord maps to true
int nord = sindex.numOrd();
BytesRef br = new BytesRef();
int tord = -1;
for (int i=1; i<nord; i++) {
sindex.lookup(i, br);
if (br.length==1 && br.bytes[br.offset]=='T') {
tord = i;
break;
}
}
final int trueOrd = tord;
return new BoolDocValues(this) {
@Override
public boolean boolVal(int doc) {
return sindex.getOrd(doc) == trueOrd;
}
@Override
public boolean exists(int doc) {
return sindex.getOrd(doc) != 0;
}
@Override
public ValueFiller getValueFiller() {
return new ValueFiller() {
private final MutableValueBool mval = new MutableValueBool();
@Override
public MutableValue getValue() {
return mval;
}
@Override
public void fillValue(int doc) {
int ord = sindex.getOrd(doc);
mval.value = (ord == trueOrd);
mval.exists = (ord != 0);
}
};
}
};
}
// in solr/core/src/java/org/apache/solr/schema/BinaryField.java
Override
public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException {
writer.writeStr(name, toBase64String(toObject(f)), false);
}
// in solr/core/src/java/org/apache/solr/schema/LatLonType.java
Override
public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException {
writer.writeStr(name, f.stringValue(), false);
}
// in solr/core/src/java/org/apache/solr/schema/LatLonType.java
Override
public Query rewrite(IndexReader reader) throws IOException {
return bboxQuery != null ? bboxQuery.rewrite(reader) : this;
}
// in solr/core/src/java/org/apache/solr/schema/LatLonType.java
Override
public float getValueForNormalization() throws IOException {
queryWeight = getBoost();
return queryWeight * queryWeight;
}
// in solr/core/src/java/org/apache/solr/schema/LatLonType.java
Override
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException {
return new SpatialScorer(context, acceptDocs, this, queryWeight);
}
// in solr/core/src/java/org/apache/solr/schema/LatLonType.java
Override
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
return ((SpatialScorer)scorer(context, true, true, context.reader().getLiveDocs())).explain(doc);
}
// in solr/core/src/java/org/apache/solr/schema/LatLonType.java
Override
public int nextDoc() throws IOException {
for(;;) {
++doc;
if (doc>=maxDoc) {
return doc=NO_MORE_DOCS;
}
if (acceptDocs != null && !acceptDocs.get(doc)) continue;
if (!match()) continue;
return doc;
}
}
// in solr/core/src/java/org/apache/solr/schema/LatLonType.java
Override
public int advance(int target) throws IOException {
// this will work even if target==NO_MORE_DOCS
doc=target-1;
return nextDoc();
}
// in solr/core/src/java/org/apache/solr/schema/LatLonType.java
Override
public float score() throws IOException {
double dist = (doc == lastDistDoc) ? lastDist : dist(latVals.doubleVal(doc), lonVals.doubleVal(doc));
return (float)(dist * qWeight);
}
// in solr/core/src/java/org/apache/solr/schema/LatLonType.java
public Explanation explain(int doc) throws IOException {
advance(doc);
boolean matched = this.doc == doc;
this.doc = doc;
float sc = matched ? score() : 0;
double dist = dist(latVals.doubleVal(doc), lonVals.doubleVal(doc));
String description = SpatialDistanceQuery.this.toString();
Explanation result = new ComplexExplanation
(this.doc == doc, sc, description + " product of:");
// result.addDetail(new Explanation((float)dist, "hsin("+latVals.explain(doc)+","+lonVals.explain(doc)));
result.addDetail(new Explanation((float)dist, "hsin("+latVals.doubleVal(doc)+","+lonVals.doubleVal(doc)));
result.addDetail(new Explanation(getBoost(), "boost"));
result.addDetail(new Explanation(weight.queryNorm,"queryNorm"));
return result;
}
// in solr/core/src/java/org/apache/solr/schema/LatLonType.java
Override
public void collect(int doc) throws IOException {
spatialScorer.doc = doc;
if (spatialScorer.match()) delegate.collect(doc);
}
// in solr/core/src/java/org/apache/solr/schema/LatLonType.java
Override
public void setNextReader(AtomicReaderContext context) throws IOException {
maxdoc = context.reader().maxDoc();
spatialScorer = new SpatialScorer(context, null, weight, 1.0f);
super.setNextReader(context);
}
// in solr/core/src/java/org/apache/solr/schema/LatLonType.java
Override
public Weight createWeight(IndexSearcher searcher) throws IOException {
// if we were supposed to use bboxQuery, then we should have been rewritten using that query
assert bboxQuery == null;
return new SpatialWeight(searcher);
}
// in solr/core/src/java/org/apache/solr/schema/ByteField.java
Override
public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException {
String s = f.stringValue();
// these values may be from a legacy lucene index, which may
// not be properly formatted in some output formats, or may
// incorrectly have a zero length.
if (s.length()==0) {
// zero length value means someone mistakenly indexed the value
// instead of simply leaving it out. Write a null value instead of a numeric.
writer.writeNull(name);
return;
}
try {
byte val = Byte.parseByte(s);
writer.writeInt(name, val);
} catch (NumberFormatException e){
// can't parse - write out the contents as a string so nothing is lost and
// clients don't get a parse error.
writer.writeStr(name, s, true);
}
}
// in solr/core/src/java/org/apache/solr/schema/PointType.java
Override
public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException {
writer.writeStr(name, f.stringValue(), false);
}
// in solr/core/src/java/org/apache/solr/schema/UUIDField.java
Override
public void write(TextResponseWriter writer, String name, IndexableField f)
throws IOException {
writer.writeStr(name, f.stringValue(), false);
}
// in solr/core/src/java/org/apache/solr/schema/SimplePreAnalyzedParser.java
Override
public ParseResult parse(Reader reader, AttributeSource parent) throws IOException {
ParseResult res = new ParseResult();
StringBuilder sb = new StringBuilder();
char[] buf = new char[128];
int cnt;
while ((cnt = reader.read(buf)) > 0) {
sb.append(buf, 0, cnt);
}
String val = sb.toString();
// empty string - accept even without version number
if (val.length() == 0) {
return res;
}
// first consume the version
int idx = val.indexOf(' ');
if (idx == -1) {
throw new IOException("Missing VERSION token");
}
String version = val.substring(0, idx);
if (!VERSION.equals(version)) {
throw new IOException("Unknown VERSION " + version);
}
val = val.substring(idx + 1);
// then consume the optional stored part
int tsStart = 0;
boolean hasStored = false;
StringBuilder storedBuf = new StringBuilder();
if (val.charAt(0) == '=') {
hasStored = true;
if (val.length() > 1) {
for (int i = 1; i < val.length(); i++) {
char c = val.charAt(i);
if (c == '\\') {
if (i < val.length() - 1) {
c = val.charAt(++i);
if (c == '=') { // we recognize only \= escape in the stored part
storedBuf.append('=');
} else {
storedBuf.append('\\');
storedBuf.append(c);
continue;
}
} else {
storedBuf.append(c);
continue;
}
} else if (c == '=') {
// end of stored text
tsStart = i + 1;
break;
} else {
storedBuf.append(c);
}
}
if (tsStart == 0) { // missing end-of-stored marker
throw new IOException("Missing end marker of stored part");
}
} else {
throw new IOException("Unexpected end of stored field");
}
}
if (hasStored) {
res.str = storedBuf.toString();
}
Tok tok = new Tok();
StringBuilder attName = new StringBuilder();
StringBuilder attVal = new StringBuilder();
// parser state
S s = S.UNDEF;
int lastPos = 0;
for (int i = tsStart; i < val.length(); i++) {
char c = val.charAt(i);
if (c == ' ') {
// collect leftovers
switch (s) {
case VALUE :
if (attVal.length() == 0) {
throw new IOException("Unexpected character '" + c + "' at position " + i + " - empty value of attribute.");
}
if (attName.length() > 0) {
tok.attr.put(attName.toString(), attVal.toString());
}
break;
case NAME: // attr name without a value ?
if (attName.length() > 0) {
throw new IOException("Unexpected character '" + c + "' at position " + i + " - missing attribute value.");
} else {
// accept missing att name and value
}
break;
case TOKEN:
case UNDEF:
// do nothing, advance to next token
}
attName.setLength(0);
attVal.setLength(0);
if (!tok.isEmpty() || s == S.NAME) {
AttributeSource.State state = createState(parent, tok, lastPos);
if (state != null) res.states.add(state.clone());
}
// reset tok
s = S.UNDEF;
tok.reset();
// skip
lastPos++;
continue;
}
StringBuilder tgt = null;
switch (s) {
case TOKEN:
tgt = tok.token;
break;
case NAME:
tgt = attName;
break;
case VALUE:
tgt = attVal;
break;
case UNDEF:
tgt = tok.token;
s = S.TOKEN;
}
if (c == '\\') {
if (s == S.TOKEN) lastPos++;
if (i >= val.length() - 1) { // end
tgt.append(c);
continue;
} else {
c = val.charAt(++i);
switch (c) {
case '\\' :
case '=' :
case ',' :
case ' ' :
tgt.append(c);
break;
case 'n':
tgt.append('\n');
break;
case 'r':
tgt.append('\r');
break;
case 't':
tgt.append('\t');
break;
default:
tgt.append('\\');
tgt.append(c);
lastPos++;
}
}
} else {
// state switch
if (c == ',') {
if (s == S.TOKEN) {
s = S.NAME;
} else if (s == S.VALUE) { // end of value, start of next attr
if (attVal.length() == 0) {
throw new IOException("Unexpected character '" + c + "' at position " + i + " - empty value of attribute.");
}
if (attName.length() > 0 && attVal.length() > 0) {
tok.attr.put(attName.toString(), attVal.toString());
}
// reset
attName.setLength(0);
attVal.setLength(0);
s = S.NAME;
} else {
throw new IOException("Unexpected character '" + c + "' at position " + i + " - missing attribute value.");
}
} else if (c == '=') {
if (s == S.NAME) {
s = S.VALUE;
} else {
throw new IOException("Unexpected character '" + c + "' at position " + i + " - empty value of attribute.");
}
} else {
tgt.append(c);
if (s == S.TOKEN) lastPos++;
}
}
}
// collect leftovers
if (!tok.isEmpty() || s == S.NAME || s == S.VALUE) {
// remaining attrib?
if (s == S.VALUE) {
if (attName.length() > 0 && attVal.length() > 0) {
tok.attr.put(attName.toString(), attVal.toString());
}
}
AttributeSource.State state = createState(parent, tok, lastPos);
if (state != null) res.states.add(state.clone());
}
return res;
}
// in solr/core/src/java/org/apache/solr/schema/SimplePreAnalyzedParser.java
public String toFormattedString(Field f) throws IOException {
StringBuilder sb = new StringBuilder();
sb.append(VERSION + " ");
if (f.fieldType().stored()) {
String s = f.stringValue();
if (s != null) {
// encode the equals sign
s = s.replaceAll("=", "\\=");
sb.append('=');
sb.append(s);
sb.append('=');
}
}
TokenStream ts = f.tokenStreamValue();
if (ts != null) {
StringBuilder tok = new StringBuilder();
boolean next = false;
while (ts.incrementToken()) {
if (next) {
sb.append(' ');
} else {
next = true;
}
tok.setLength(0);
Iterator<Class<? extends Attribute>> it = ts.getAttributeClassesIterator();
String cTerm = null;
String tTerm = null;
while (it.hasNext()) {
Class<? extends Attribute> cl = it.next();
if (!ts.hasAttribute(cl)) {
continue;
}
Attribute att = ts.getAttribute(cl);
if (cl.isAssignableFrom(CharTermAttribute.class)) {
CharTermAttribute catt = (CharTermAttribute)att;
cTerm = escape(catt.buffer(), catt.length());
} else if (cl.isAssignableFrom(TermToBytesRefAttribute.class)) {
TermToBytesRefAttribute tatt = (TermToBytesRefAttribute)att;
char[] tTermChars = tatt.getBytesRef().utf8ToString().toCharArray();
tTerm = escape(tTermChars, tTermChars.length);
} else {
if (tok.length() > 0) tok.append(',');
if (cl.isAssignableFrom(FlagsAttribute.class)) {
tok.append("f=" + Integer.toHexString(((FlagsAttribute)att).getFlags()));
} else if (cl.isAssignableFrom(OffsetAttribute.class)) {
tok.append("s=" + ((OffsetAttribute)att).startOffset() + ",e=" + ((OffsetAttribute)att).endOffset());
} else if (cl.isAssignableFrom(PayloadAttribute.class)) {
Payload p = ((PayloadAttribute)att).getPayload();
if (p != null && p.length() > 0) {
tok.append("p=" + bytesToHex(p.getData(), p.getOffset(), p.length()));
} else if (tok.length() > 0) {
tok.setLength(tok.length() - 1); // remove the last comma
}
} else if (cl.isAssignableFrom(PositionIncrementAttribute.class)) {
tok.append("i=" + ((PositionIncrementAttribute)att).getPositionIncrement());
} else if (cl.isAssignableFrom(TypeAttribute.class)) {
tok.append("y=" + escape(((TypeAttribute)att).type()));
} else {
tok.append(cl.getName() + "=" + escape(att.toString()));
}
}
}
String term = null;
if (cTerm != null) {
term = cTerm;
} else {
term = tTerm;
}
if (term != null && term.length() > 0) {
if (tok.length() > 0) {
tok.insert(0, term + ",");
} else {
tok.insert(0, term);
}
}
sb.append(tok);
}
}
return sb.toString();
}
// in solr/core/src/java/org/apache/solr/schema/LongField.java
Override
public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException {
String s = f.stringValue();
// these values may be from a legacy lucene index, which may
// not be properly formatted in some output formats, or may
// incorrectly have a zero length.
if (s.length()==0) {
// zero length value means someone mistakenly indexed the value
// instead of simply leaving it out. Write a null value instead of a numeric.
writer.writeNull(name);
return;
}
try {
long val = Long.parseLong(s);
writer.writeLong(name, val);
} catch (NumberFormatException e){
// can't parse - write out the contents as a string so nothing is lost and
// clients don't get a parse error.
writer.writeStr(name, s, true);
}
}
// in solr/core/src/java/org/apache/solr/schema/GeoHashField.java
Override
public void write(TextResponseWriter writer, String name, IndexableField f)
throws IOException {
writer.writeStr(name, toExternal(f), false);
}
// in solr/core/src/java/org/apache/solr/schema/ExternalFileField.java
Override
public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException {
throw new UnsupportedOperationException();
}
// in solr/core/src/java/org/apache/solr/schema/ShortField.java
Override
public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException {
String s = f.stringValue();
// these values may be from a legacy lucene index, which may
// not be properly formatted in some output formats, or may
// incorrectly have a zero length.
if (s.length()==0) {
// zero length value means someone mistakenly indexed the value
// instead of simply leaving it out. Write a null value instead of a numeric.
writer.writeNull(name);
return;
}
try {
short val = Short.parseShort(s);
writer.writeInt(name, val);
} catch (NumberFormatException e){
// can't parse - write out the contents as a string so nothing is lost and
// clients don't get a parse error.
writer.writeStr(name, s, true);
}
}
// in solr/core/src/java/org/apache/solr/schema/SchemaField.java
public void write(TextResponseWriter writer, String name, IndexableField val) throws IOException {
// name is passed in because it may be null if name should not be used.
type.write(writer,name,val);
}
// in solr/core/src/java/org/apache/solr/schema/StrFieldSource.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
return new DocTermsIndexDocValues(this, readerContext, field) {
@Override
protected String toTerm(String readableValue) {
return readableValue;
}
@Override
public int ordVal(int doc) {
return termsIndex.getOrd(doc);
}
@Override
public int numOrd() {
return termsIndex.numOrd();
}
@Override
public Object objectVal(int doc) {
return strVal(doc);
}
@Override
public String toString(int doc) {
return description() + '=' + strVal(doc);
}
};
}
// in solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java
Override
public void write(TextResponseWriter writer, String name, IndexableField f)
throws IOException {
writer.writeStr(name, f.stringValue(), true);
}
// in solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java
public String toFormattedString(Field f) throws IOException {
return parser.toFormattedString(f);
}
// in solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java
public final boolean incrementToken() throws IOException {
// lazy init the iterator
if (it == null) {
it = cachedStates.iterator();
}
if (!it.hasNext()) {
return false;
}
AttributeSource.State state = (State) it.next();
restoreState(state.clone());
return true;
}
// in solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java
Override
public void reset(Reader input) throws IOException {
super.reset(input);
cachedStates.clear();
stringValue = null;
binaryValue = null;
ParseResult res = parser.parse(input, this);
if (res != null) {
stringValue = res.str;
binaryValue = res.bin;
if (res.states != null) {
cachedStates.addAll(res.states);
}
}
}
// in solr/core/src/java/org/apache/solr/schema/TextField.java
Override
public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException {
writer.writeStr(name, f.stringValue(), true);
}
// in solr/core/src/java/org/apache/solr/schema/TrieDateField.java
Override
public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException {
wrappedField.write(writer, name, f);
}
// in solr/core/src/java/org/apache/solr/schema/SortableLongField.java
Override
public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException {
String sval = f.stringValue();
writer.writeLong(name, NumberUtils.SortableStr2long(sval,0,sval.length()));
}
// in solr/core/src/java/org/apache/solr/schema/SortableLongField.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
final long def = defVal;
return new DocTermsIndexDocValues(this, readerContext, field) {
private final BytesRef spare = new BytesRef();
@Override
protected String toTerm(String readableValue) {
return NumberUtils.long2sortableStr(readableValue);
}
@Override
public float floatVal(int doc) {
return (float)longVal(doc);
}
@Override
public int intVal(int doc) {
return (int)longVal(doc);
}
@Override
public long longVal(int doc) {
int ord=termsIndex.getOrd(doc);
return ord==0 ? def : NumberUtils.SortableStr2long(termsIndex.lookup(ord, spare),0,5);
}
@Override
public double doubleVal(int doc) {
return (double)longVal(doc);
}
@Override
public String strVal(int doc) {
return Long.toString(longVal(doc));
}
@Override
public Object objectVal(int doc) {
int ord=termsIndex.getOrd(doc);
return ord==0 ? null : NumberUtils.SortableStr2long(termsIndex.lookup(ord, spare));
}
@Override
public String toString(int doc) {
return description() + '=' + longVal(doc);
}
@Override
public ValueFiller getValueFiller() {
return new ValueFiller() {
private final MutableValueLong mval = new MutableValueLong();
@Override
public MutableValue getValue() {
return mval;
}
@Override
public void fillValue(int doc) {
int ord=termsIndex.getOrd(doc);
if (ord == 0) {
mval.value = def;
mval.exists = false;
} else {
mval.value = NumberUtils.SortableStr2long(termsIndex.lookup(ord, spare),0,5);
mval.exists = true;
}
}
};
}
};
}
// in solr/core/src/java/org/apache/solr/schema/SortableDoubleField.java
Override
public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException {
String sval = f.stringValue();
writer.writeDouble(name, NumberUtils.SortableStr2double(sval));
}
// in solr/core/src/java/org/apache/solr/schema/SortableDoubleField.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
final double def = defVal;
return new DocTermsIndexDocValues(this, readerContext, field) {
private final BytesRef spare = new BytesRef();
@Override
protected String toTerm(String readableValue) {
return NumberUtils.double2sortableStr(readableValue);
}
@Override
public float floatVal(int doc) {
return (float)doubleVal(doc);
}
@Override
public int intVal(int doc) {
return (int)doubleVal(doc);
}
@Override
public long longVal(int doc) {
return (long)doubleVal(doc);
}
@Override
public double doubleVal(int doc) {
int ord=termsIndex.getOrd(doc);
return ord==0 ? def : NumberUtils.SortableStr2double(termsIndex.lookup(ord, spare));
}
@Override
public String strVal(int doc) {
return Double.toString(doubleVal(doc));
}
@Override
public Object objectVal(int doc) {
int ord=termsIndex.getOrd(doc);
return ord==0 ? null : NumberUtils.SortableStr2double(termsIndex.lookup(ord, spare));
}
@Override
public String toString(int doc) {
return description() + '=' + doubleVal(doc);
}
@Override
public ValueFiller getValueFiller() {
return new ValueFiller() {
private final MutableValueDouble mval = new MutableValueDouble();
@Override
public MutableValue getValue() {
return mval;
}
@Override
public void fillValue(int doc) {
int ord=termsIndex.getOrd(doc);
if (ord == 0) {
mval.value = def;
mval.exists = false;
} else {
mval.value = NumberUtils.SortableStr2double(termsIndex.lookup(ord, spare));
mval.exists = true;
}
}
};
}
};
}
// in solr/core/src/java/org/apache/solr/schema/CollationField.java
Override
public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException {
writer.writeStr(name, f.stringValue(), true);
}
// in solr/core/src/java/org/apache/solr/schema/JsonPreAnalyzedParser.java
Override
public String toFormattedString(Field f) throws IOException {
Map<String,Object> map = new HashMap<String,Object>();
map.put(VERSION_KEY, VERSION);
if (f.fieldType().stored()) {
String stringValue = f.stringValue();
if (stringValue != null) {
map.put(STRING_KEY, stringValue);
}
BytesRef binaryValue = f.binaryValue();
if (binaryValue != null) {
map.put(BINARY_KEY, Base64.byteArrayToBase64(binaryValue.bytes, binaryValue.offset, binaryValue.length));
}
}
TokenStream ts = f.tokenStreamValue();
if (ts != null) {
List<Map<String,Object>> tokens = new LinkedList<Map<String,Object>>();
while (ts.incrementToken()) {
Iterator<Class<? extends Attribute>> it = ts.getAttributeClassesIterator();
String cTerm = null;
String tTerm = null;
Map<String,Object> tok = new TreeMap<String,Object>();
while (it.hasNext()) {
Class<? extends Attribute> cl = it.next();
if (!ts.hasAttribute(cl)) {
continue;
}
Attribute att = ts.getAttribute(cl);
if (cl.isAssignableFrom(CharTermAttribute.class)) {
CharTermAttribute catt = (CharTermAttribute)att;
cTerm = new String(catt.buffer(), 0, catt.length());
} else if (cl.isAssignableFrom(TermToBytesRefAttribute.class)) {
TermToBytesRefAttribute tatt = (TermToBytesRefAttribute)att;
tTerm = tatt.getBytesRef().utf8ToString();
} else {
if (cl.isAssignableFrom(FlagsAttribute.class)) {
tok.put(FLAGS_KEY, Integer.toHexString(((FlagsAttribute)att).getFlags()));
} else if (cl.isAssignableFrom(OffsetAttribute.class)) {
tok.put(OFFSET_START_KEY, ((OffsetAttribute)att).startOffset());
tok.put(OFFSET_END_KEY, ((OffsetAttribute)att).endOffset());
} else if (cl.isAssignableFrom(PayloadAttribute.class)) {
Payload p = ((PayloadAttribute)att).getPayload();
if (p != null && p.length() > 0) {
tok.put(PAYLOAD_KEY, Base64.byteArrayToBase64(p.getData(), p.getOffset(), p.length()));
}
} else if (cl.isAssignableFrom(PositionIncrementAttribute.class)) {
tok.put(POSINCR_KEY, ((PositionIncrementAttribute)att).getPositionIncrement());
} else if (cl.isAssignableFrom(TypeAttribute.class)) {
tok.put(TYPE_KEY, ((TypeAttribute)att).type());
} else {
tok.put(cl.getName(), att.toString());
}
}
}
String term = null;
if (cTerm != null) {
term = cTerm;
} else {
term = tTerm;
}
if (term != null && term.length() > 0) {
tok.put(TOKEN_KEY, term);
}
tokens.add(tok);
}
map.put(TOKENS_KEY, tokens);
}
return JSONUtil.toJSON(map, -1);
}
// in solr/core/src/java/org/apache/solr/schema/FieldType.java
Override
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer ts = new Tokenizer(reader) {
final char[] cbuf = new char[maxChars];
final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
@Override
public boolean incrementToken() throws IOException {
clearAttributes();
int n = input.read(cbuf,0,maxChars);
if (n<=0) return false;
String s = toInternal(new String(cbuf,0,n));
termAtt.setEmpty().append(s);
offsetAtt.setOffset(correctOffset(0),correctOffset(n));
return true;
}
};
return new TokenStreamComponents(ts);
}
// in solr/core/src/java/org/apache/solr/schema/FieldType.java
Override
public boolean incrementToken() throws IOException {
clearAttributes();
int n = input.read(cbuf,0,maxChars);
if (n<=0) return false;
String s = toInternal(new String(cbuf,0,n));
termAtt.setEmpty().append(s);
offsetAtt.setOffset(correctOffset(0),correctOffset(n));
return true;
}
// in solr/core/src/java/org/apache/solr/internal/csv/CSVParser.java
public String[][] getAllValues() throws IOException {
ArrayList records = new ArrayList();
String[] values;
String[][] ret = null;
while ((values = getLine()) != null) {
records.add(values);
}
if (records.size() > 0) {
ret = new String[records.size()][];
records.toArray(ret);
}
return ret;
}
// in solr/core/src/java/org/apache/solr/internal/csv/CSVParser.java
public String nextValue() throws IOException {
Token tkn = nextToken();
String ret = null;
switch (tkn.type) {
case TT_TOKEN:
case TT_EORECORD:
ret = tkn.content.toString();
break;
case TT_EOF:
ret = null;
break;
case TT_INVALID:
default:
// error no token available (or error)
throw new IOException(
"(line " + getLineNumber()
+ ") invalid parse sequence");
// unreachable: break;
}
return ret;
}
// in solr/core/src/java/org/apache/solr/internal/csv/CSVParser.java
public String[] getLine() throws IOException {
String[] ret = EMPTY_STRING_ARRAY;
record.clear();
while (true) {
reusableToken.reset();
nextToken(reusableToken);
switch (reusableToken.type) {
case TT_TOKEN:
record.add(reusableToken.content.toString());
break;
case TT_EORECORD:
record.add(reusableToken.content.toString());
break;
case TT_EOF:
if (reusableToken.isReady) {
record.add(reusableToken.content.toString());
} else {
ret = null;
}
break;
case TT_INVALID:
default:
// error: throw IOException
throw new IOException("(line " + getLineNumber() + ") invalid parse sequence");
// unreachable: break;
}
if (reusableToken.type != TT_TOKEN) {
break;
}
}
if (!record.isEmpty()) {
ret = (String[]) record.toArray(new String[record.size()]);
}
return ret;
}
// in solr/core/src/java/org/apache/solr/internal/csv/CSVParser.java
protected Token nextToken() throws IOException {
return nextToken(new Token());
}
// in solr/core/src/java/org/apache/solr/internal/csv/CSVParser.java
protected Token nextToken(Token tkn) throws IOException {
wsBuf.clear(); // resuse
// get the last read char (required for empty line detection)
int lastChar = in.readAgain();
// read the next char and set eol
/* note: unfourtunately isEndOfLine may consumes a character silently.
* this has no effect outside of the method. so a simple workaround
* is to call 'readAgain' on the stream...
* uh: might using objects instead of base-types (jdk1.5 autoboxing!)
*/
int c = in.read();
boolean eol = isEndOfLine(c);
c = in.readAgain();
// empty line detection: eol AND (last char was EOL or beginning)
while (strategy.getIgnoreEmptyLines() && eol
&& (lastChar == '\n'
|| lastChar == ExtendedBufferedReader.UNDEFINED)
&& !isEndOfFile(lastChar)) {
// go on char ahead ...
lastChar = c;
c = in.read();
eol = isEndOfLine(c);
c = in.readAgain();
// reached end of file without any content (empty line at the end)
if (isEndOfFile(c)) {
tkn.type = TT_EOF;
return tkn;
}
}
// did we reached eof during the last iteration already ? TT_EOF
if (isEndOfFile(lastChar) || (lastChar != strategy.getDelimiter() && isEndOfFile(c))) {
tkn.type = TT_EOF;
return tkn;
}
// important: make sure a new char gets consumed in each iteration
while (!tkn.isReady && tkn.type != TT_EOF) {
// ignore whitespaces at beginning of a token
while (strategy.getIgnoreLeadingWhitespaces() && isWhitespace(c) && !eol) {
wsBuf.append((char) c);
c = in.read();
eol = isEndOfLine(c);
}
// ok, start of token reached: comment, encapsulated, or token
if (c == strategy.getCommentStart()) {
// ignore everything till end of line and continue (incr linecount)
in.readLine();
tkn = nextToken(tkn.reset());
} else if (c == strategy.getDelimiter()) {
// empty token return TT_TOKEN("")
tkn.type = TT_TOKEN;
tkn.isReady = true;
} else if (eol) {
// empty token return TT_EORECORD("")
//noop: tkn.content.append("");
tkn.type = TT_EORECORD;
tkn.isReady = true;
} else if (c == strategy.getEncapsulator()) {
// consume encapsulated token
encapsulatedTokenLexer(tkn, c);
} else if (isEndOfFile(c)) {
// end of file return TT_EOF()
//noop: tkn.content.append("");
tkn.type = TT_EOF;
tkn.isReady = true;
} else {
// next token must be a simple token
// add removed blanks when not ignoring whitespace chars...
if (!strategy.getIgnoreLeadingWhitespaces()) {
tkn.content.append(wsBuf);
}
simpleTokenLexer(tkn, c);
}
}
return tkn;
}
// in solr/core/src/java/org/apache/solr/internal/csv/CSVParser.java
private Token simpleTokenLexer(Token tkn, int c) throws IOException {
for (;;) {
if (isEndOfLine(c)) {
// end of record
tkn.type = TT_EORECORD;
tkn.isReady = true;
break;
} else if (isEndOfFile(c)) {
// end of file
tkn.type = TT_EOF;
tkn.isReady = true;
break;
} else if (c == strategy.getDelimiter()) {
// end of token
tkn.type = TT_TOKEN;
tkn.isReady = true;
break;
} else if (c == '\\' && strategy.getUnicodeEscapeInterpretation() && in.lookAhead() == 'u') {
// interpret unicode escaped chars (like \u0070 -> p)
tkn.content.append((char) unicodeEscapeLexer(c));
} else if (c == strategy.getEscape()) {
tkn.content.append((char)readEscape(c));
} else {
tkn.content.append((char) c);
}
c = in.read();
}
if (strategy.getIgnoreTrailingWhitespaces()) {
tkn.content.trimTrailingWhitespace();
}
return tkn;
}
// in solr/core/src/java/org/apache/solr/internal/csv/CSVParser.java
private Token encapsulatedTokenLexer(Token tkn, int c) throws IOException {
// save current line
int startLineNumber = getLineNumber();
// ignore the given delimiter
// assert c == delimiter;
for (;;) {
c = in.read();
if (c == '\\' && strategy.getUnicodeEscapeInterpretation() && in.lookAhead()=='u') {
tkn.content.append((char) unicodeEscapeLexer(c));
} else if (c == strategy.getEscape()) {
tkn.content.append((char)readEscape(c));
} else if (c == strategy.getEncapsulator()) {
if (in.lookAhead() == strategy.getEncapsulator()) {
// double or escaped encapsulator -> add single encapsulator to token
c = in.read();
tkn.content.append((char) c);
} else {
// token finish mark (encapsulator) reached: ignore whitespace till delimiter
for (;;) {
c = in.read();
if (c == strategy.getDelimiter()) {
tkn.type = TT_TOKEN;
tkn.isReady = true;
return tkn;
} else if (isEndOfFile(c)) {
tkn.type = TT_EOF;
tkn.isReady = true;
return tkn;
} else if (isEndOfLine(c)) {
// ok eo token reached
tkn.type = TT_EORECORD;
tkn.isReady = true;
return tkn;
} else if (!isWhitespace(c)) {
// error invalid char between token and next delimiter
throw new IOException(
"(line " + getLineNumber()
+ ") invalid char between encapsulated token end delimiter"
);
}
}
}
} else if (isEndOfFile(c)) {
// error condition (end of file before end of token)
throw new IOException(
"(startline " + startLineNumber + ")"
+ "eof reached before encapsulated token finished"
);
} else {
// consume character
tkn.content.append((char) c);
}
}
}
// in solr/core/src/java/org/apache/solr/internal/csv/CSVParser.java
protected int unicodeEscapeLexer(int c) throws IOException {
int ret = 0;
// ignore 'u' (assume c==\ now) and read 4 hex digits
c = in.read();
code.clear();
try {
for (int i = 0; i < 4; i++) {
c = in.read();
if (isEndOfFile(c) || isEndOfLine(c)) {
throw new NumberFormatException("number too short");
}
code.append((char) c);
}
ret = Integer.parseInt(code.toString(), 16);
} catch (NumberFormatException e) {
throw new IOException(
"(line " + getLineNumber() + ") Wrong unicode escape sequence found '"
+ code.toString() + "'" + e.toString());
}
return ret;
}
// in solr/core/src/java/org/apache/solr/internal/csv/CSVParser.java
private int readEscape(int c) throws IOException {
// assume c is the escape char (normally a backslash)
c = in.read();
int out;
switch (c) {
case 'r': out='\r'; break;
case 'n': out='\n'; break;
case 't': out='\t'; break;
case 'b': out='\b'; break;
case 'f': out='\f'; break;
default : out=c;
}
return out;
}
// in solr/core/src/java/org/apache/solr/internal/csv/CSVParser.java
private boolean isEndOfLine(int c) throws IOException {
// check if we have \r\n...
if (c == '\r') {
if (in.lookAhead() == '\n') {
// note: does not change c outside of this method !!
c = in.read();
}
}
return (c == '\n');
}
// in solr/core/src/java/org/apache/solr/internal/csv/CSVUtils.java
public static String[][] parse(String s) throws IOException {
if (s == null) {
throw new IllegalArgumentException("Null argument not allowed.");
}
String[][] result = (new CSVParser(new StringReader(s))).getAllValues();
if (result == null) {
// since CSVStrategy ignores empty lines an empty array is returned
// (i.e. not "result = new String[][] {{""}};")
result = EMPTY_DOUBLE_STRING_ARRAY;
}
return result;
}
// in solr/core/src/java/org/apache/solr/internal/csv/CSVUtils.java
public static String[] parseLine(String s) throws IOException {
if (s == null) {
throw new IllegalArgumentException("Null argument not allowed.");
}
// uh,jh: make sure that parseLine("").length == 0
if (s.length() == 0) {
return EMPTY_STRING_ARRAY;
}
return (new CSVParser(new StringReader(s))).getLine();
}
// in solr/core/src/java/org/apache/solr/internal/csv/CSVPrinter.java
public void println() throws IOException {
out.write(strategy.getPrinterNewline());
newLine = true;
}
// in solr/core/src/java/org/apache/solr/internal/csv/CSVPrinter.java
public void flush() throws IOException {
out.flush();
}
// in solr/core/src/java/org/apache/solr/internal/csv/CSVPrinter.java
public void println(String[] values) throws IOException {
for (int i = 0; i < values.length; i++) {
print(values[i]);
}
println();
}
// in solr/core/src/java/org/apache/solr/internal/csv/CSVPrinter.java
public void printlnComment(String comment) throws IOException {
if(this.strategy.isCommentingDisabled()) {
return;
}
if (!newLine) {
println();
}
out.write(this.strategy.getCommentStart());
out.write(' ');
for (int i = 0; i < comment.length(); i++) {
char c = comment.charAt(i);
switch (c) {
case '\r' :
if (i + 1 < comment.length() && comment.charAt(i + 1) == '\n') {
i++;
}
// break intentionally excluded.
case '\n' :
println();
out.write(this.strategy.getCommentStart());
out.write(' ');
break;
default :
out.write(c);
break;
}
}
println();
}
// in solr/core/src/java/org/apache/solr/internal/csv/CSVPrinter.java
public void print(char[] value, int offset, int len, boolean checkForEscape) throws IOException {
if (!checkForEscape) {
printSep();
out.write(value, offset, len);
return;
}
if (strategy.getEncapsulator() != CSVStrategy.ENCAPSULATOR_DISABLED) {
printAndEncapsulate(value, offset, len);
} else if (strategy.getEscape() != CSVStrategy.ESCAPE_DISABLED) {
printAndEscape(value, offset, len);
} else {
printSep();
out.write(value, offset, len);
}
}
// in solr/core/src/java/org/apache/solr/internal/csv/CSVPrinter.java
void printSep() throws IOException {
if (newLine) {
newLine = false;
} else {
out.write(this.strategy.getDelimiter());
}
}
// in solr/core/src/java/org/apache/solr/internal/csv/CSVPrinter.java
void printAndEscape(char[] value, int offset, int len) throws IOException {
int start = offset;
int pos = offset;
int end = offset + len;
printSep();
char delim = this.strategy.getDelimiter();
char escape = this.strategy.getEscape();
while (pos < end) {
char c = value[pos];
if (c == '\r' || c=='\n' || c==delim || c==escape) {
// write out segment up until this char
int l = pos-start;
if (l>0) {
out.write(value, start, l);
}
if (c=='\n') c='n';
else if (c=='\r') c='r';
out.write(escape);
out.write(c);
start = pos+1; // start on the current char after this one
}
pos++;
}
// write last segment
int l = pos-start;
if (l>0) {
out.write(value, start, l);
}
}
// in solr/core/src/java/org/apache/solr/internal/csv/CSVPrinter.java
void printAndEncapsulate(char[] value, int offset, int len) throws IOException {
boolean first = newLine; // is this the first value on this line?
boolean quote = false;
int start = offset;
int pos = offset;
int end = offset + len;
printSep();
char delim = this.strategy.getDelimiter();
char encapsulator = this.strategy.getEncapsulator();
if (len <= 0) {
// always quote an empty token that is the first
// on the line, as it may be the only thing on the
// line. If it were not quoted in that case,
// an empty line has no tokens.
if (first) {
quote = true;
}
} else {
char c = value[pos];
// Hmmm, where did this rule come from?
if (first
&& (c < '0'
|| (c > '9' && c < 'A')
|| (c > 'Z' && c < 'a')
|| (c > 'z'))) {
quote = true;
// } else if (c == ' ' || c == '\f' || c == '\t') {
} else if (c <= '#') {
// Some other chars at the start of a value caused the parser to fail, so for now
// encapsulate if we start in anything less than '#'. We are being conservative
// by including the default comment char too.
quote = true;
} else {
while (pos < end) {
c = value[pos];
if (c=='\n' || c=='\r' || c==encapsulator || c==delim) {
quote = true;
break;
}
pos++;
}
if (!quote) {
pos = end-1;
c = value[pos];
// if (c == ' ' || c == '\f' || c == '\t') {
// Some other chars at the end caused the parser to fail, so for now
// encapsulate if we end in anything less than ' '
if (c <= ' ') {
quote = true;
}
}
}
}
if (!quote) {
// no encapsulation needed - write out the original value
out.write(value, offset, len);
return;
}
// we hit something that needed encapsulation
out.write(encapsulator);
// Pick up where we left off: pos should be positioned on the first character that caused
// the need for encapsulation.
while (pos<end) {
char c = value[pos];
if (c==encapsulator) {
// write out the chunk up until this point
// add 1 to the length to write out the encapsulator also
out.write(value, start, pos-start+1);
// put the next starting position on the encapsulator so we will
// write it out again with the next string (effectively doubling it)
start = pos;
}
pos++;
}
// write the last segment
out.write(value, start, pos-start);
out.write(encapsulator);
}
// in solr/core/src/java/org/apache/solr/internal/csv/CSVPrinter.java
public void print(String value, boolean checkForEscape) throws IOException {
if (!checkForEscape) {
// write directly from string
printSep();
out.write(value);
return;
}
if (buf.length < value.length()) {
buf = new char[value.length()];
}
value.getChars(0, value.length(), buf, 0);
print(buf, 0, value.length(), checkForEscape);
}
// in solr/core/src/java/org/apache/solr/internal/csv/CSVPrinter.java
public void print(String value) throws IOException {
print(value, true);
}
// in solr/core/src/java/org/apache/solr/internal/csv/ExtendedBufferedReader.java
public int read() throws IOException {
// initalize the lookahead
if (lookaheadChar == UNDEFINED) {
lookaheadChar = super.read();
}
lastChar = lookaheadChar;
if (super.ready()) {
lookaheadChar = super.read();
} else {
lookaheadChar = UNDEFINED;
}
if (lastChar == '\n') {
lineCounter++;
}
return lastChar;
}
// in solr/core/src/java/org/apache/solr/internal/csv/ExtendedBufferedReader.java
public int read(char[] buf, int off, int len) throws IOException {
// do not claim if len == 0
if (len == 0) {
return 0;
}
// init lookahead, but do not block !!
if (lookaheadChar == UNDEFINED) {
if (ready()) {
lookaheadChar = super.read();
} else {
return -1;
}
}
// 'first read of underlying stream'
if (lookaheadChar == -1) {
return -1;
}
// continue until the lookaheadChar would block
int cOff = off;
while (len > 0 && ready()) {
if (lookaheadChar == -1) {
// eof stream reached, do not continue
return cOff - off;
} else {
buf[cOff++] = (char) lookaheadChar;
if (lookaheadChar == '\n') {
lineCounter++;
}
lastChar = lookaheadChar;
lookaheadChar = super.read();
len--;
}
}
return cOff - off;
}
// in solr/core/src/java/org/apache/solr/internal/csv/ExtendedBufferedReader.java
public String readUntil(char c) throws IOException {
if (lookaheadChar == UNDEFINED) {
lookaheadChar = super.read();
}
line.clear(); // reuse
while (lookaheadChar != c && lookaheadChar != END_OF_STREAM) {
line.append((char) lookaheadChar);
if (lookaheadChar == '\n') {
lineCounter++;
}
lastChar = lookaheadChar;
lookaheadChar = super.read();
}
return line.toString();
}
// in solr/core/src/java/org/apache/solr/internal/csv/ExtendedBufferedReader.java
public String readLine() throws IOException {
if (lookaheadChar == UNDEFINED) {
lookaheadChar = super.read();
}
line.clear(); //reuse
// return null if end of stream has been reached
if (lookaheadChar == END_OF_STREAM) {
return null;
}
// do we have a line termination already
char laChar = (char) lookaheadChar;
if (laChar == '\n' || laChar == '\r') {
lastChar = lookaheadChar;
lookaheadChar = super.read();
// ignore '\r\n' as well
if ((char) lookaheadChar == '\n') {
lastChar = lookaheadChar;
lookaheadChar = super.read();
}
lineCounter++;
return line.toString();
}
// create the rest-of-line return and update the lookahead
line.append(laChar);
String restOfLine = super.readLine(); // TODO involves copying
lastChar = lookaheadChar;
lookaheadChar = super.read();
if (restOfLine != null) {
line.append(restOfLine);
}
lineCounter++;
return line.toString();
}
// in solr/core/src/java/org/apache/solr/internal/csv/ExtendedBufferedReader.java
public long skip(long n) throws IllegalArgumentException, IOException {
if (lookaheadChar == UNDEFINED) {
lookaheadChar = super.read();
}
// illegal argument
if (n < 0) {
throw new IllegalArgumentException("negative argument not supported");
}
// no skipping
if (n == 0 || lookaheadChar == END_OF_STREAM) {
return 0;
}
// skip and reread the lookahead-char
long skiped = 0;
if (n > 1) {
skiped = super.skip(n - 1);
}
lookaheadChar = super.read();
// fixme uh: we should check the skiped sequence for line-terminations...
lineCounter = Integer.MIN_VALUE;
return skiped + 1;
}
// in solr/core/src/java/org/apache/solr/internal/csv/ExtendedBufferedReader.java
public long skipUntil(char c) throws IllegalArgumentException, IOException {
if (lookaheadChar == UNDEFINED) {
lookaheadChar = super.read();
}
long counter = 0;
while (lookaheadChar != c && lookaheadChar != END_OF_STREAM) {
if (lookaheadChar == '\n') {
lineCounter++;
}
lookaheadChar = super.read();
counter++;
}
return counter;
}
// in solr/core/src/java/org/apache/solr/internal/csv/ExtendedBufferedReader.java
public int lookAhead() throws IOException {
if (lookaheadChar == UNDEFINED) {
lookaheadChar = super.read();
}
return lookaheadChar;
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
public final int docFreq(Term term) throws IOException {
return reader.docFreq(term);
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
public void close() throws IOException {
if (debug) {
if (cachingEnabled) {
StringBuilder sb = new StringBuilder();
sb.append("Closing ").append(name);
for (SolrCache cache : cacheList) {
sb.append("\n\t");
sb.append(cache);
}
log.debug(sb.toString());
} else {
if (debug) log.debug("Closing " + name);
}
}
core.getInfoRegistry().remove(name);
// super.close();
// can't use super.close() since it just calls reader.close() and that may only be called once
// per reader (even if incRef() was previously called).
if (closeReader) reader.decRef();
for (SolrCache cache : cacheList) {
cache.close();
}
directoryFactory.release(getIndexReader().directory());
// do this at the end so it only gets done if there are no exceptions
numCloses.incrementAndGet();
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
public static void initRegenerators(SolrConfig solrConfig) {
if (solrConfig.fieldValueCacheConfig != null && solrConfig.fieldValueCacheConfig.getRegenerator() == null) {
solrConfig.fieldValueCacheConfig.setRegenerator(
new CacheRegenerator() {
public boolean regenerateItem(SolrIndexSearcher newSearcher, SolrCache newCache, SolrCache oldCache, Object oldKey, Object oldVal) throws IOException {
if (oldVal instanceof UnInvertedField) {
UnInvertedField.getUnInvertedField((String)oldKey, newSearcher);
}
return true;
}
}
);
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
public boolean regenerateItem(SolrIndexSearcher newSearcher, SolrCache newCache, SolrCache oldCache, Object oldKey, Object oldVal) throws IOException {
if (oldVal instanceof UnInvertedField) {
UnInvertedField.getUnInvertedField((String)oldKey, newSearcher);
}
return true;
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
public boolean regenerateItem(SolrIndexSearcher newSearcher, SolrCache newCache, SolrCache oldCache, Object oldKey, Object oldVal) throws IOException {
newSearcher.cacheDocSet((Query)oldKey, null, false);
return true;
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
public boolean regenerateItem(SolrIndexSearcher newSearcher, SolrCache newCache, SolrCache oldCache, Object oldKey, Object oldVal) throws IOException {
QueryResultKey key = (QueryResultKey)oldKey;
int nDocs=1;
// request 1 doc and let caching round up to the next window size...
// unless the window size is <=1, in which case we will pick
// the minimum of the number of documents requested last time and
// a reasonable number such as 40.
// TODO: make more configurable later...
if (queryResultWindowSize<=1) {
DocList oldList = (DocList)oldVal;
int oldnDocs = oldList.offset() + oldList.size();
// 40 has factors of 2,4,5,10,20
nDocs = Math.min(oldnDocs,40);
}
int flags=NO_CHECK_QCACHE | key.nc_flags;
QueryCommand qc = new QueryCommand();
qc.setQuery(key.query)
.setFilterList(key.filters)
.setSort(key.sort)
.setLen(nDocs)
.setSupersetMaxDoc(nDocs)
.setFlags(flags);
QueryResult qr = new QueryResult();
newSearcher.getDocListC(qr,qc);
return true;
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
public QueryResult search(QueryResult qr, QueryCommand cmd) throws IOException {
getDocListC(qr,cmd);
return qr;
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
Override
public void binaryField(FieldInfo fieldInfo, byte[] value, int offset, int length) throws IOException {
doc.add(new StoredField(fieldInfo.name, value));
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
Override
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
final FieldType ft = new FieldType(TextField.TYPE_STORED);
ft.setStoreTermVectors(fieldInfo.hasVectors());
ft.setIndexed(fieldInfo.isIndexed());
ft.setOmitNorms(fieldInfo.omitsNorms());
ft.setIndexOptions(fieldInfo.getIndexOptions());
doc.add(new Field(fieldInfo.name, value, ft));
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
Override
public Document doc(int i) throws IOException {
return doc(i, (Set<String>)null);
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
Override
public void doc(int n, StoredFieldVisitor visitor) throws IOException {
getIndexReader().document(n, visitor);
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
public Document doc(int i, Set<String> fields) throws IOException {
Document d;
if (documentCache != null) {
d = documentCache.get(i);
if (d!=null) return d;
}
if(!enableLazyFieldLoading || fields == null) {
d = getIndexReader().document(i);
} else {
final SetNonLazyFieldSelector visitor = new SetNonLazyFieldSelector(fields, getIndexReader(), i);
getIndexReader().document(i, visitor);
d = visitor.doc;
}
if (documentCache != null) {
documentCache.put(i, d);
}
return d;
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
public void readDocs(Document[] docs, DocList ids) throws IOException {
readDocs(docs, ids, null);
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
public void readDocs(Document[] docs, DocList ids, Set<String> fields) throws IOException {
DocIterator iter = ids.iterator();
for (int i=0; i<docs.length; i++) {
docs[i] = doc(iter.nextDoc(), fields);
}
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
public Sort weightSort(Sort sort) throws IOException {
return (sort != null) ? sort.rewrite(this) : null;
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
public int getFirstMatch(Term t) throws IOException {
Fields fields = atomicReader.fields();
if (fields == null) return -1;
Terms terms = fields.terms(t.field());
if (terms == null) return -1;
BytesRef termBytes = t.bytes();
final TermsEnum termsEnum = terms.iterator(null);
if (!termsEnum.seekExact(termBytes, false)) {
return -1;
}
DocsEnum docs = termsEnum.docs(atomicReader.getLiveDocs(), null, false);
if (docs == null) return -1;
int id = docs.nextDoc();
return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
public long lookupId(BytesRef idBytes) throws IOException {
String field = schema.getUniqueKeyField().getName();
final AtomicReaderContext[] leaves = leafContexts;
for (int i=0; i<leaves.length; i++) {
final AtomicReaderContext leaf = leaves[i];
final AtomicReader reader = leaf.reader();
final Fields fields = reader.fields();
if (fields == null) continue;
final Bits liveDocs = reader.getLiveDocs();
final DocsEnum docs = reader.termDocsEnum(liveDocs, field, idBytes, false);
if (docs == null) continue;
int id = docs.nextDoc();
if (id == DocIdSetIterator.NO_MORE_DOCS) continue;
assert docs.nextDoc() == DocIdSetIterator.NO_MORE_DOCS;
return (((long)i) << 32) | id;
}
return -1;
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
public void cacheDocSet(Query query, DocSet optionalAnswer, boolean mustCache) throws IOException {
// Even if the cache is null, still compute the DocSet as it may serve to warm the Lucene
// or OS disk cache.
if (optionalAnswer != null) {
if (filterCache!=null) {
filterCache.put(query,optionalAnswer);
}
return;
}
// Throw away the result, relying on the fact that getDocSet
// will currently always cache what it found. If getDocSet() starts
// using heuristics about what to cache, and mustCache==true, (or if we
// want this method to start using heuristics too) then
// this needs to change.
getDocSet(query);
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
public DocSet getDocSet(Query query) throws IOException {
if (query instanceof ExtendedQuery) {
ExtendedQuery eq = (ExtendedQuery)query;
if (!eq.getCache()) {
if (query instanceof WrappedQuery) {
query = ((WrappedQuery)query).getWrappedQuery();
}
query = QueryUtils.makeQueryable(query);
return getDocSetNC(query, null);
}
}
// Get the absolute value (positive version) of this query. If we
// get back the same reference, we know it's positive.
Query absQ = QueryUtils.getAbs(query);
boolean positive = query==absQ;
if (filterCache != null) {
DocSet absAnswer = filterCache.get(absQ);
if (absAnswer!=null) {
if (positive) return absAnswer;
else return getPositiveDocSet(matchAllDocsQuery).andNot(absAnswer);
}
}
DocSet absAnswer = getDocSetNC(absQ, null);
DocSet answer = positive ? absAnswer : getPositiveDocSet(matchAllDocsQuery).andNot(absAnswer);
if (filterCache != null) {
// cache negative queries as positive
filterCache.put(absQ, absAnswer);
}
return answer;
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
DocSet getPositiveDocSet(Query q) throws IOException {
DocSet answer;
if (filterCache != null) {
answer = filterCache.get(q);
if (answer!=null) return answer;
}
answer = getDocSetNC(q,null);
if (filterCache != null) filterCache.put(
q,answer);
return answer;
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
public DocSet getDocSet(List<Query> queries) throws IOException {
ProcessedFilter pf = getProcessedFilter(null, queries);
if (pf.answer != null) return pf.answer;
DocSetCollector setCollector = new DocSetCollector(maxDoc()>>6, maxDoc());
Collector collector = setCollector;
if (pf.postFilter != null) {
pf.postFilter.setLastDelegate(collector);
collector = pf.postFilter;
}
final AtomicReaderContext[] leaves = leafContexts;
for (int i=0; i<leaves.length; i++) {
final AtomicReaderContext leaf = leaves[i];
final AtomicReader reader = leaf.reader();
final Bits liveDocs = reader.getLiveDocs(); // TODO: the filter may already only have liveDocs...
DocIdSet idSet = null;
if (pf.filter != null) {
idSet = pf.filter.getDocIdSet(leaf, liveDocs);
if (idSet == null) continue;
}
DocIdSetIterator idIter = null;
if (idSet != null) {
idIter = idSet.iterator();
if (idIter == null) continue;
}
collector.setNextReader(leaf);
int max = reader.maxDoc();
if (idIter == null) {
for (int docid = 0; docid<max; docid++) {
if (liveDocs != null && !liveDocs.get(docid)) continue;
collector.collect(docid);
}
} else {
for (int docid = -1; (docid = idIter.advance(docid+1)) < max; ) {
collector.collect(docid);
}
}
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
public ProcessedFilter getProcessedFilter(DocSet setFilter, List<Query> queries) throws IOException {
ProcessedFilter pf = new ProcessedFilter();
if (queries==null || queries.size()==0) {
if (setFilter != null)
pf.filter = setFilter.getTopFilter();
return pf;
}
DocSet answer=null;
boolean[] neg = new boolean[queries.size()+1];
DocSet[] sets = new DocSet[queries.size()+1];
List<Query> notCached = null;
List<Query> postFilters = null;
int end = 0;
int smallestIndex = -1;
if (setFilter != null) {
answer = sets[end++] = setFilter;
smallestIndex = end;
}
int smallestCount = Integer.MAX_VALUE;
for (Query q : queries) {
if (q instanceof ExtendedQuery) {
ExtendedQuery eq = (ExtendedQuery)q;
if (!eq.getCache()) {
if (eq.getCost() >= 100 && eq instanceof PostFilter) {
if (postFilters == null) postFilters = new ArrayList<Query>(sets.length-end);
postFilters.add(q);
} else {
if (notCached == null) notCached = new ArrayList<Query>(sets.length-end);
notCached.add(q);
}
continue;
}
}
Query posQuery = QueryUtils.getAbs(q);
sets[end] = getPositiveDocSet(posQuery);
// Negative query if absolute value different from original
if (q==posQuery) {
neg[end] = false;
// keep track of the smallest positive set.
// This optimization is only worth it if size() is cached, which it would
// be if we don't do any set operations.
int sz = sets[end].size();
if (sz<smallestCount) {
smallestCount=sz;
smallestIndex=end;
answer = sets[end];
}
} else {
neg[end] = true;
}
end++;
}
// Are all of our normal cached filters negative?
if (end > 0 && answer==null) {
answer = getPositiveDocSet(matchAllDocsQuery);
}
// do negative queries first to shrink set size
for (int i=0; i<end; i++) {
if (neg[i]) answer = answer.andNot(sets[i]);
}
for (int i=0; i<end; i++) {
if (!neg[i] && i!=smallestIndex) answer = answer.intersection(sets[i]);
}
if (notCached != null) {
Collections.sort(notCached, sortByCost);
List<Weight> weights = new ArrayList<Weight>(notCached.size());
for (Query q : notCached) {
Query qq = QueryUtils.makeQueryable(q);
weights.add(createNormalizedWeight(qq));
}
pf.filter = new FilterImpl(answer, weights);
} else {
if (postFilters == null) {
if (answer == null) {
answer = getPositiveDocSet(matchAllDocsQuery);
}
// "answer" is the only part of the filter, so set it.
pf.answer = answer;
}
if (answer != null) {
pf.filter = answer.getTopFilter();
}
}
if (postFilters != null) {
Collections.sort(postFilters, sortByCost);
for (int i=postFilters.size()-1; i>=0; i--) {
DelegatingCollector prev = pf.postFilter;
pf.postFilter = ((PostFilter)postFilters.get(i)).getFilterCollector(this);
if (prev != null) pf.postFilter.setDelegate(prev);
}
}
return pf;
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
public DocSet getDocSet(DocsEnumState deState) throws IOException {
int largestPossible = deState.termsEnum.docFreq();
boolean useCache = filterCache != null && largestPossible >= deState.minSetSizeCached;
TermQuery key = null;
if (useCache) {
key = new TermQuery(new Term(deState.fieldName, BytesRef.deepCopyOf(deState.termsEnum.term())));
DocSet result = filterCache.get(key);
if (result != null) return result;
}
int smallSetSize = maxDoc()>>6;
int scratchSize = Math.min(smallSetSize, largestPossible);
if (deState.scratch == null || deState.scratch.length < scratchSize)
deState.scratch = new int[scratchSize];
final int[] docs = deState.scratch;
int upto = 0;
int bitsSet = 0;
OpenBitSet obs = null;
DocsEnum docsEnum = deState.termsEnum.docs(deState.liveDocs, deState.docsEnum, false);
if (deState.docsEnum == null) {
deState.docsEnum = docsEnum;
}
if (docsEnum instanceof MultiDocsEnum) {
MultiDocsEnum.EnumWithSlice[] subs = ((MultiDocsEnum)docsEnum).getSubs();
int numSubs = ((MultiDocsEnum)docsEnum).getNumSubs();
for (int subindex = 0; subindex<numSubs; subindex++) {
MultiDocsEnum.EnumWithSlice sub = subs[subindex];
if (sub.docsEnum == null) continue;
int base = sub.slice.start;
int docid;
if (largestPossible > docs.length) {
if (obs == null) obs = new OpenBitSet(maxDoc());
while ((docid = sub.docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
obs.fastSet(docid + base);
bitsSet++;
}
} else {
while ((docid = sub.docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
docs[upto++] = docid + base;
}
}
}
} else {
int docid;
if (largestPossible > docs.length) {
if (obs == null) obs = new OpenBitSet(maxDoc());
while ((docid = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
obs.fastSet(docid);
bitsSet++;
}
} else {
while ((docid = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
docs[upto++] = docid;
}
}
}
DocSet result;
if (obs != null) {
for (int i=0; i<upto; i++) {
obs.fastSet(docs[i]);
}
bitsSet += upto;
result = new BitDocSet(obs, bitsSet);
} else {
result = upto==0 ? DocSet.EMPTY : new SortedIntDocSet(Arrays.copyOf(docs, upto));
}
if (useCache) {
filterCache.put(key, result);
}
return result;
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
protected DocSet getDocSetNC(Query query, DocSet filter) throws IOException {
DocSetCollector collector = new DocSetCollector(maxDoc()>>6, maxDoc());
if (filter==null) {
if (query instanceof TermQuery) {
Term t = ((TermQuery)query).getTerm();
final AtomicReaderContext[] leaves = leafContexts;
for (int i=0; i<leaves.length; i++) {
final AtomicReaderContext leaf = leaves[i];
final AtomicReader reader = leaf.reader();
collector.setNextReader(leaf);
Fields fields = reader.fields();
Terms terms = fields.terms(t.field());
BytesRef termBytes = t.bytes();
Bits liveDocs = reader.getLiveDocs();
DocsEnum docsEnum = null;
if (terms != null) {
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(termBytes, false)) {
docsEnum = termsEnum.docs(liveDocs, null, false);
}
}
if (docsEnum != null) {
int docid;
while ((docid = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
collector.collect(docid);
}
}
}
} else {
super.search(query,null,collector);
}
return collector.getDocSet();
} else {
Filter luceneFilter = filter.getTopFilter();
super.search(query, luceneFilter, collector);
return collector.getDocSet();
}
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
public DocSet getDocSet(Query query, DocSet filter) throws IOException {
if (filter==null) return getDocSet(query);
if (query instanceof ExtendedQuery) {
ExtendedQuery eq = (ExtendedQuery)query;
if (!eq.getCache()) {
if (query instanceof WrappedQuery) {
query = ((WrappedQuery)query).getWrappedQuery();
}
query = QueryUtils.makeQueryable(query);
return getDocSetNC(query, filter);
}
}
// Negative query if absolute value different from original
Query absQ = QueryUtils.getAbs(query);
boolean positive = absQ==query;
DocSet first;
if (filterCache != null) {
first = filterCache.get(absQ);
if (first==null) {
first = getDocSetNC(absQ,null);
filterCache.put(absQ,first);
}
return positive ? first.intersection(filter) : filter.andNot(first);
}
// If there isn't a cache, then do a single filtered query if positive.
return positive ? getDocSetNC(absQ,filter) : filter.andNot(getPositiveDocSet(absQ));
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
public DocList getDocList(Query query, Query filter, Sort lsort, int offset, int len) throws IOException {
QueryCommand qc = new QueryCommand();
qc.setQuery(query)
.setFilterList(filter)
.setSort(lsort)
.setOffset(offset)
.setLen(len);
QueryResult qr = new QueryResult();
search(qr,qc);
return qr.getDocList();
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
public DocList getDocList(Query query, List<Query> filterList, Sort lsort, int offset, int len, int flags) throws IOException {
QueryCommand qc = new QueryCommand();
qc.setQuery(query)
.setFilterList(filterList)
.setSort(lsort)
.setOffset(offset)
.setLen(len)
.setFlags(flags);
QueryResult qr = new QueryResult();
search(qr,qc);
return qr.getDocList();
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
private void getDocListC(QueryResult qr, QueryCommand cmd) throws IOException {
DocListAndSet out = new DocListAndSet();
qr.setDocListAndSet(out);
QueryResultKey key=null;
int maxDocRequested = cmd.getOffset() + cmd.getLen();
// check for overflow, and check for # docs in index
if (maxDocRequested < 0 || maxDocRequested > maxDoc()) maxDocRequested = maxDoc();
int supersetMaxDoc= maxDocRequested;
DocList superset = null;
int flags = cmd.getFlags();
Query q = cmd.getQuery();
if (q instanceof ExtendedQuery) {
ExtendedQuery eq = (ExtendedQuery)q;
if (!eq.getCache()) {
flags |= (NO_CHECK_QCACHE | NO_SET_QCACHE | NO_CHECK_FILTERCACHE);
}
}
// we can try and look up the complete query in the cache.
// we can't do that if filter!=null though (we don't want to
// do hashCode() and equals() for a big DocSet).
if (queryResultCache != null && cmd.getFilter()==null
&& (flags & (NO_CHECK_QCACHE|NO_SET_QCACHE)) != ((NO_CHECK_QCACHE|NO_SET_QCACHE)))
{
// all of the current flags can be reused during warming,
// so set all of them on the cache key.
key = new QueryResultKey(q, cmd.getFilterList(), cmd.getSort(), flags);
if ((flags & NO_CHECK_QCACHE)==0) {
superset = queryResultCache.get(key);
if (superset != null) {
// check that the cache entry has scores recorded if we need them
if ((flags & GET_SCORES)==0 || superset.hasScores()) {
// NOTE: subset() returns null if the DocList has fewer docs than
// requested
out.docList = superset.subset(cmd.getOffset(),cmd.getLen());
}
}
if (out.docList != null) {
// found the docList in the cache... now check if we need the docset too.
// OPT: possible future optimization - if the doclist contains all the matches,
// use it to make the docset instead of rerunning the query.
if (out.docSet==null && ((flags & GET_DOCSET)!=0) ) {
if (cmd.getFilterList()==null) {
out.docSet = getDocSet(cmd.getQuery());
} else {
List<Query> newList = new ArrayList<Query>(cmd.getFilterList().size()+1);
newList.add(cmd.getQuery());
newList.addAll(cmd.getFilterList());
out.docSet = getDocSet(newList);
}
}
return;
}
}
// If we are going to generate the result, bump up to the
// next resultWindowSize for better caching.
if ((flags & NO_SET_QCACHE) == 0) {
// handle 0 special case as well as avoid idiv in the common case.
if (maxDocRequested < queryResultWindowSize) {
supersetMaxDoc=queryResultWindowSize;
} else {
supersetMaxDoc = ((maxDocRequested -1)/queryResultWindowSize + 1)*queryResultWindowSize;
if (supersetMaxDoc < 0) supersetMaxDoc=maxDocRequested;
}
} else {
key = null; // we won't be caching the result
}
}
// OK, so now we need to generate an answer.
// One way to do that would be to check if we have an unordered list
// of results for the base query. If so, we can apply the filters and then
// sort by the resulting set. This can only be used if:
// - the sort doesn't contain score
// - we don't want score returned.
// check if we should try and use the filter cache
boolean useFilterCache=false;
if ((flags & (GET_SCORES|NO_CHECK_FILTERCACHE))==0 && useFilterForSortedQuery && cmd.getSort() != null && filterCache != null) {
useFilterCache=true;
SortField[] sfields = cmd.getSort().getSort();
for (SortField sf : sfields) {
if (sf.getType() == SortField.Type.SCORE) {
useFilterCache=false;
break;
}
}
}
// disable useFilterCache optimization temporarily
if (useFilterCache) {
// now actually use the filter cache.
// for large filters that match few documents, this may be
// slower than simply re-executing the query.
if (out.docSet == null) {
out.docSet = getDocSet(cmd.getQuery(),cmd.getFilter());
DocSet bigFilt = getDocSet(cmd.getFilterList());
if (bigFilt != null) out.docSet = out.docSet.intersection(bigFilt);
}
// todo: there could be a sortDocSet that could take a list of
// the filters instead of anding them first...
// perhaps there should be a multi-docset-iterator
superset = sortDocSet(out.docSet,cmd.getSort(),supersetMaxDoc);
out.docList = superset.subset(cmd.getOffset(),cmd.getLen());
} else {
// do it the normal way...
cmd.setSupersetMaxDoc(supersetMaxDoc);
if ((flags & GET_DOCSET)!=0) {
// this currently conflates returning the docset for the base query vs
// the base query and all filters.
DocSet qDocSet = getDocListAndSetNC(qr,cmd);
// cache the docSet matching the query w/o filtering
if (qDocSet!=null && filterCache!=null && !qr.isPartialResults()) filterCache.put(cmd.getQuery(),qDocSet);
} else {
getDocListNC(qr,cmd);
//Parameters: cmd.getQuery(),theFilt,cmd.getSort(),0,supersetMaxDoc,cmd.getFlags(),cmd.getTimeAllowed(),responseHeader);
}
superset = out.docList;
out.docList = superset.subset(cmd.getOffset(),cmd.getLen());
}
// lastly, put the superset in the cache if the size is less than or equal
// to queryResultMaxDocsCached
if (key != null && superset.size() <= queryResultMaxDocsCached && !qr.isPartialResults()) {
queryResultCache.put(key, superset);
}
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
private void getDocListNC(QueryResult qr,QueryCommand cmd) throws IOException {
final long timeAllowed = cmd.getTimeAllowed();
int len = cmd.getSupersetMaxDoc();
int last = len;
if (last < 0 || last > maxDoc()) last=maxDoc();
final int lastDocRequested = last;
int nDocsReturned;
int totalHits;
float maxScore;
int[] ids;
float[] scores;
boolean needScores = (cmd.getFlags() & GET_SCORES) != 0;
Query query = QueryUtils.makeQueryable(cmd.getQuery());
ProcessedFilter pf = getProcessedFilter(cmd.getFilter(), cmd.getFilterList());
final Filter luceneFilter = pf.filter;
// handle zero case...
if (lastDocRequested<=0) {
final float[] topscore = new float[] { Float.NEGATIVE_INFINITY };
final int[] numHits = new int[1];
Collector collector;
if (!needScores) {
collector = new Collector () {
@Override
public void setScorer(Scorer scorer) throws IOException {
}
@Override
public void collect(int doc) throws IOException {
numHits[0]++;
}
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {
}
@Override
public boolean acceptsDocsOutOfOrder() {
return true;
}
};
} else {
collector = new Collector() {
Scorer scorer;
@Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
}
@Override
public void collect(int doc) throws IOException {
numHits[0]++;
float score = scorer.score();
if (score > topscore[0]) topscore[0]=score;
}
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {
}
@Override
public boolean acceptsDocsOutOfOrder() {
return true;
}
};
}
if( timeAllowed > 0 ) {
collector = new TimeLimitingCollector(collector, TimeLimitingCollector.getGlobalCounter(), timeAllowed);
}
if (pf.postFilter != null) {
pf.postFilter.setLastDelegate(collector);
collector = pf.postFilter;
}
try {
super.search(query, luceneFilter, collector);
}
catch( TimeLimitingCollector.TimeExceededException x ) {
log.warn( "Query: " + query + "; " + x.getMessage() );
qr.setPartialResults(true);
}
nDocsReturned=0;
ids = new int[nDocsReturned];
scores = new float[nDocsReturned];
totalHits = numHits[0];
maxScore = totalHits>0 ? topscore[0] : 0.0f;
} else {
TopDocsCollector topCollector;
if (cmd.getSort() == null) {
if(cmd.getScoreDoc() != null) {
topCollector = TopScoreDocCollector.create(len, cmd.getScoreDoc(), true); //create the Collector with InOrderPagingCollector
} else {
topCollector = TopScoreDocCollector.create(len, true);
}
} else {
topCollector = TopFieldCollector.create(weightSort(cmd.getSort()), len, false, needScores, needScores, true);
}
Collector collector = topCollector;
if( timeAllowed > 0 ) {
collector = new TimeLimitingCollector(collector, TimeLimitingCollector.getGlobalCounter(), timeAllowed);
}
if (pf.postFilter != null) {
pf.postFilter.setLastDelegate(collector);
collector = pf.postFilter;
}
try {
super.search(query, luceneFilter, collector);
}
catch( TimeLimitingCollector.TimeExceededException x ) {
log.warn( "Query: " + query + "; " + x.getMessage() );
qr.setPartialResults(true);
}
totalHits = topCollector.getTotalHits();
TopDocs topDocs = topCollector.topDocs(0, len);
maxScore = totalHits>0 ? topDocs.getMaxScore() : 0.0f;
nDocsReturned = topDocs.scoreDocs.length;
ids = new int[nDocsReturned];
scores = (cmd.getFlags()&GET_SCORES)!=0 ? new float[nDocsReturned] : null;
for (int i=0; i<nDocsReturned; i++) {
ScoreDoc scoreDoc = topDocs.scoreDocs[i];
ids[i] = scoreDoc.doc;
if (scores != null) scores[i] = scoreDoc.score;
}
}
int sliceLen = Math.min(lastDocRequested,nDocsReturned);
if (sliceLen < 0) sliceLen=0;
qr.setDocList(new DocSlice(0,sliceLen,ids,scores,totalHits,maxScore));
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
Override
public void setScorer(Scorer scorer) throws IOException {
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
Override
public void collect(int doc) throws IOException {
numHits[0]++;
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
Override
public void setNextReader(AtomicReaderContext context) throws IOException {
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
Override
public void collect(int doc) throws IOException {
numHits[0]++;
float score = scorer.score();
if (score > topscore[0]) topscore[0]=score;
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
Override
public void setNextReader(AtomicReaderContext context) throws IOException {
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
private DocSet getDocListAndSetNC(QueryResult qr,QueryCommand cmd) throws IOException {
int len = cmd.getSupersetMaxDoc();
int last = len;
if (last < 0 || last > maxDoc()) last=maxDoc();
final int lastDocRequested = last;
int nDocsReturned;
int totalHits;
float maxScore;
int[] ids;
float[] scores;
DocSet set;
boolean needScores = (cmd.getFlags() & GET_SCORES) != 0;
int maxDoc = maxDoc();
int smallSetSize = maxDoc>>6;
ProcessedFilter pf = getProcessedFilter(cmd.getFilter(), cmd.getFilterList());
final Filter luceneFilter = pf.filter;
Query query = QueryUtils.makeQueryable(cmd.getQuery());
final long timeAllowed = cmd.getTimeAllowed();
// handle zero case...
if (lastDocRequested<=0) {
final float[] topscore = new float[] { Float.NEGATIVE_INFINITY };
Collector collector;
DocSetCollector setCollector;
if (!needScores) {
collector = setCollector = new DocSetCollector(smallSetSize, maxDoc);
} else {
collector = setCollector = new DocSetDelegateCollector(smallSetSize, maxDoc, new Collector() {
Scorer scorer;
@Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
}
@Override
public void collect(int doc) throws IOException {
float score = scorer.score();
if (score > topscore[0]) topscore[0]=score;
}
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {
}
@Override
public boolean acceptsDocsOutOfOrder() {
return false;
}
});
}
if( timeAllowed > 0 ) {
collector = new TimeLimitingCollector(collector, TimeLimitingCollector.getGlobalCounter(), timeAllowed);
}
if (pf.postFilter != null) {
pf.postFilter.setLastDelegate(collector);
collector = pf.postFilter;
}
try {
super.search(query, luceneFilter, collector);
}
catch( TimeLimitingCollector.TimeExceededException x ) {
log.warn( "Query: " + query + "; " + x.getMessage() );
qr.setPartialResults(true);
}
set = setCollector.getDocSet();
nDocsReturned = 0;
ids = new int[nDocsReturned];
scores = new float[nDocsReturned];
totalHits = set.size();
maxScore = totalHits>0 ? topscore[0] : 0.0f;
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
Override
public void collect(int doc) throws IOException {
float score = scorer.score();
if (score > topscore[0]) topscore[0]=score;
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
Override
public void setNextReader(AtomicReaderContext context) throws IOException {
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
public DocList getDocList(Query query, DocSet filter, Sort lsort, int offset, int len) throws IOException {
QueryCommand qc = new QueryCommand();
qc.setQuery(query)
.setFilter(filter)
.setSort(lsort)
.setOffset(offset)
.setLen(len);
QueryResult qr = new QueryResult();
search(qr,qc);
return qr.getDocList();
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
public DocListAndSet getDocListAndSet(Query query, Query filter, Sort lsort, int offset, int len) throws IOException {
QueryCommand qc = new QueryCommand();
qc.setQuery(query)
.setFilterList(filter)
.setSort(lsort)
.setOffset(offset)
.setLen(len)
.setNeedDocSet(true);
QueryResult qr = new QueryResult();
search(qr,qc);
return qr.getDocListAndSet();
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
public DocListAndSet getDocListAndSet(Query query, Query filter, Sort lsort, int offset, int len, int flags) throws IOException {
QueryCommand qc = new QueryCommand();
qc.setQuery(query)
.setFilterList(filter)
.setSort(lsort)
.setOffset(offset)
.setLen(len)
.setFlags(flags)
.setNeedDocSet(true);
QueryResult qr = new QueryResult();
search(qr,qc);
return qr.getDocListAndSet();
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
public DocListAndSet getDocListAndSet(Query query, List<Query> filterList, Sort lsort, int offset, int len) throws IOException {
QueryCommand qc = new QueryCommand();
qc.setQuery(query)
.setFilterList(filterList)
.setSort(lsort)
.setOffset(offset)
.setLen(len)
.setNeedDocSet(true);
QueryResult qr = new QueryResult();
search(qr,qc);
return qr.getDocListAndSet();
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
public DocListAndSet getDocListAndSet(Query query, List<Query> filterList, Sort lsort, int offset, int len, int flags) throws IOException {
QueryCommand qc = new QueryCommand();
qc.setQuery(query)
.setFilterList(filterList)
.setSort(lsort)
.setOffset(offset)
.setLen(len)
.setFlags(flags)
.setNeedDocSet(true);
QueryResult qr = new QueryResult();
search(qr,qc);
return qr.getDocListAndSet();
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
public DocListAndSet getDocListAndSet(Query query, DocSet filter, Sort lsort, int offset, int len) throws IOException {
QueryCommand qc = new QueryCommand();
qc.setQuery(query)
.setFilter(filter)
.setSort(lsort)
.setOffset(offset)
.setLen(len)
.setNeedDocSet(true);
QueryResult qr = new QueryResult();
search(qr,qc);
return qr.getDocListAndSet();
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
public DocListAndSet getDocListAndSet(Query query, DocSet filter, Sort lsort, int offset, int len, int flags) throws IOException {
QueryCommand qc = new QueryCommand();
qc.setQuery(query)
.setFilter(filter)
.setSort(lsort)
.setOffset(offset)
.setLen(len)
.setFlags(flags)
.setNeedDocSet(true);
QueryResult qr = new QueryResult();
search(qr,qc);
return qr.getDocListAndSet();
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
protected DocList sortDocSet(DocSet set, Sort sort, int nDocs) throws IOException {
if (nDocs == 0) {
// SOLR-2923
return new DocSlice(0, 0, new int[0], null, 0, 0f);
}
// bit of a hack to tell if a set is sorted - do it better in the future.
boolean inOrder = set instanceof BitDocSet || set instanceof SortedIntDocSet;
TopDocsCollector topCollector = TopFieldCollector.create(weightSort(sort), nDocs, false, false, false, inOrder);
DocIterator iter = set.iterator();
int base=0;
int end=0;
int readerIndex = 0;
while (iter.hasNext()) {
int doc = iter.nextDoc();
while (doc>=end) {
AtomicReaderContext leaf = leafContexts[readerIndex++];
base = leaf.docBase;
end = base + leaf.reader().maxDoc();
topCollector.setNextReader(leaf);
// we should never need to set the scorer given the settings for the collector
}
topCollector.collect(doc-base);
}
TopDocs topDocs = topCollector.topDocs(0, nDocs);
int nDocsReturned = topDocs.scoreDocs.length;
int[] ids = new int[nDocsReturned];
for (int i=0; i<nDocsReturned; i++) {
ScoreDoc scoreDoc = topDocs.scoreDocs[i];
ids[i] = scoreDoc.doc;
}
return new DocSlice(0,nDocsReturned,ids,null,topDocs.totalHits,0.0f);
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
public int numDocs(Query a, DocSet b) throws IOException {
// Negative query if absolute value different from original
Query absQ = QueryUtils.getAbs(a);
DocSet positiveA = getPositiveDocSet(absQ);
return a==absQ ? b.intersectionSize(positiveA) : b.andNotSize(positiveA);
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
public int numDocs(DocSet a, DocsEnumState deState) throws IOException {
// Negative query if absolute value different from original
return a.intersectionSize(getDocSet(deState));
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
public int numDocs(Query a, Query b) throws IOException {
Query absA = QueryUtils.getAbs(a);
Query absB = QueryUtils.getAbs(b);
DocSet positiveA = getPositiveDocSet(absA);
DocSet positiveB = getPositiveDocSet(absB);
// Negative query if absolute value different from original
if (a==absA) {
if (b==absB) return positiveA.intersectionSize(positiveB);
return positiveA.andNotSize(positiveB);
}
if (b==absB) return positiveB.andNotSize(positiveA);
// if both negative, we need to create a temp DocSet since we
// don't have a counting method that takes three.
DocSet all = getPositiveDocSet(matchAllDocsQuery);
// -a -b == *:*.andNot(a).andNotSize(b) == *.*.andNotSize(a.union(b))
// we use the last form since the intermediate DocSet should normally be smaller.
return all.andNotSize(positiveA.union(positiveB));
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
public Document[] readDocs(DocList ids) throws IOException {
Document[] docs = new Document[ids.size()];
readDocs(docs,ids);
return docs;
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
public void warm(SolrIndexSearcher old) throws IOException {
// Make sure this is first! filters can help queryResults execute!
long warmingStartTime = System.currentTimeMillis();
// warm the caches in order...
ModifiableSolrParams params = new ModifiableSolrParams();
params.add("warming","true");
for (int i=0; i<cacheList.length; i++) {
if (debug) log.debug("autowarming " + this + " from " + old + "\n\t" + old.cacheList[i]);
SolrQueryRequest req = new LocalSolrQueryRequest(core,params) {
@Override public SolrIndexSearcher getSearcher() { return SolrIndexSearcher.this; }
@Override public void close() { }
};
SolrQueryResponse rsp = new SolrQueryResponse();
SolrRequestInfo.setRequestInfo(new SolrRequestInfo(req, rsp));
try {
this.cacheList[i].warm(this, old.cacheList[i]);
} finally {
try {
req.close();
} finally {
SolrRequestInfo.clearRequestInfo();
}
}
if (debug) log.debug("autowarming result for " + this + "\n\t" + this.cacheList[i]);
}
warmupTime = System.currentTimeMillis() - warmingStartTime;
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
Override
public Explanation explain(Query query, int doc) throws IOException {
return super.explain(QueryUtils.makeQueryable(query), doc);
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
DocIdSet sub = topFilter == null ? null : topFilter.getDocIdSet(context, acceptDocs);
if (weights.size() == 0) return sub;
return new FilterSet(sub, context);
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
Override
public DocIdSetIterator iterator() throws IOException {
List<DocIdSetIterator> iterators = new ArrayList<DocIdSetIterator>(weights.size()+1);
if (docIdSet != null) {
DocIdSetIterator iter = docIdSet.iterator();
if (iter == null) return null;
iterators.add(iter);
}
for (Weight w : weights) {
Scorer scorer = w.scorer(context, true, false, context.reader().getLiveDocs());
if (scorer == null) return null;
iterators.add(scorer);
}
if (iterators.size()==0) return null;
if (iterators.size()==1) return iterators.get(0);
if (iterators.size()==2) return new DualFilterIterator(iterators.get(0), iterators.get(1));
return new FilterIterator(iterators.toArray(new DocIdSetIterator[iterators.size()]));
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
Override
public Bits bits() throws IOException {
return null; // don't use random access
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
private int doNext(int doc) throws IOException {
int which=0; // index of the iterator with the highest id
int i=1;
outer: for(;;) {
for (; i<iterators.length; i++) {
if (i == which) continue;
DocIdSetIterator iter = iterators[i];
int next = iter.advance(doc);
if (next != doc) {
doc = next;
which = i;
i = 0;
continue outer;
}
}
return doc;
}
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
Override
public int nextDoc() throws IOException {
return doNext(first.nextDoc());
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
Override
public int advance(int target) throws IOException {
return doNext(first.advance(target));
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
Override
public int nextDoc() throws IOException {
int doc = a.nextDoc();
for(;;) {
int other = b.advance(doc);
if (other == doc) return doc;
doc = a.advance(other);
if (other == doc) return doc;
}
}
// in solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
Override
public int advance(int target) throws IOException {
int doc = a.advance(target);
for(;;) {
int other = b.advance(doc);
if (other == doc) return doc;
doc = a.advance(other);
if (other == doc) return doc;
}
}
// in solr/core/src/java/org/apache/solr/search/LFUCache.java
public void warm(SolrIndexSearcher searcher, SolrCache old) throws IOException {
if (regenerator == null) return;
long warmingStartTime = System.currentTimeMillis();
LFUCache other = (LFUCache) old;
// warm entries
if (autowarmCount != 0) {
int sz = other.size();
if (autowarmCount != -1) sz = Math.min(sz, autowarmCount);
Map items = other.cache.getMostUsedItems(sz);
Map.Entry[] itemsArr = new Map.Entry[items.size()];
int counter = 0;
for (Object mapEntry : items.entrySet()) {
itemsArr[counter++] = (Map.Entry) mapEntry;
}
for (int i = itemsArr.length - 1; i >= 0; i--) {
try {
boolean continueRegen = regenerator.regenerateItem(searcher,
this, old, itemsArr[i].getKey(), itemsArr[i].getValue());
if (!continueRegen) break;
} catch (Throwable e) {
SolrException.log(log, "Error during auto-warming of key:" + itemsArr[i].getKey(), e);
}
}
}
warmupTime = System.currentTimeMillis() - warmingStartTime;
}
// in solr/core/src/java/org/apache/solr/search/DelegatingCollector.java
Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
delegate.setScorer(scorer);
}
// in solr/core/src/java/org/apache/solr/search/DelegatingCollector.java
Override
public void collect(int doc) throws IOException {
delegate.collect(doc);
}
// in solr/core/src/java/org/apache/solr/search/DelegatingCollector.java
Override
public void setNextReader(AtomicReaderContext context) throws IOException {
this.context = context;
this.docBase = context.docBase;
delegate.setNextReader(context);
}
// in solr/core/src/java/org/apache/solr/search/Grouping.java
public void execute() throws IOException {
if (commands.isEmpty()) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Specify at least one field, function or query to group by.");
}
DocListAndSet out = new DocListAndSet();
qr.setDocListAndSet(out);
SolrIndexSearcher.ProcessedFilter pf = searcher.getProcessedFilter(cmd.getFilter(), cmd.getFilterList());
final Filter luceneFilter = pf.filter;
maxDoc = searcher.maxDoc();
needScores = (cmd.getFlags() & SolrIndexSearcher.GET_SCORES) != 0;
boolean cacheScores = false;
// NOTE: Change this when groupSort can be specified per group
if (!needScores && !commands.isEmpty()) {
if (commands.get(0).groupSort == null) {
cacheScores = true;
} else {
for (SortField field : commands.get(0).groupSort.getSort()) {
if (field.getType() == SortField.Type.SCORE) {
cacheScores = true;
break;
}
}
}
} else if (needScores) {
cacheScores = needScores;
}
getDocSet = (cmd.getFlags() & SolrIndexSearcher.GET_DOCSET) != 0;
getDocList = (cmd.getFlags() & SolrIndexSearcher.GET_DOCLIST) != 0;
query = QueryUtils.makeQueryable(cmd.getQuery());
for (Command cmd : commands) {
cmd.prepare();
}
AbstractAllGroupHeadsCollector<?> allGroupHeadsCollector = null;
List<Collector> collectors = new ArrayList<Collector>(commands.size());
for (Command cmd : commands) {
Collector collector = cmd.createFirstPassCollector();
if (collector != null) {
collectors.add(collector);
}
if (getGroupedDocSet && allGroupHeadsCollector == null) {
collectors.add(allGroupHeadsCollector = cmd.createAllGroupCollector());
}
}
Collector allCollectors = MultiCollector.wrap(collectors.toArray(new Collector[collectors.size()]));
DocSetCollector setCollector = null;
if (getDocSet && allGroupHeadsCollector == null) {
setCollector = new DocSetDelegateCollector(maxDoc >> 6, maxDoc, allCollectors);
allCollectors = setCollector;
}
CachingCollector cachedCollector = null;
if (cacheSecondPassSearch && allCollectors != null) {
int maxDocsToCache = (int) Math.round(maxDoc * (maxDocsPercentageToCache / 100.0d));
// Only makes sense to cache if we cache more than zero.
// Maybe we should have a minimum and a maximum, that defines the window we would like caching for.
if (maxDocsToCache > 0) {
allCollectors = cachedCollector = CachingCollector.create(allCollectors, cacheScores, maxDocsToCache);
}
}
if (pf.postFilter != null) {
pf.postFilter.setLastDelegate(allCollectors);
allCollectors = pf.postFilter;
}
if (allCollectors != null) {
searchWithTimeLimiter(luceneFilter, allCollectors);
}
if (getGroupedDocSet && allGroupHeadsCollector != null) {
FixedBitSet fixedBitSet = allGroupHeadsCollector.retrieveGroupHeads(maxDoc);
long[] bits = fixedBitSet.getBits();
OpenBitSet openBitSet = new OpenBitSet(bits, bits.length);
qr.setDocSet(new BitDocSet(openBitSet));
} else if (getDocSet) {
qr.setDocSet(setCollector.getDocSet());
}
collectors.clear();
for (Command cmd : commands) {
Collector collector = cmd.createSecondPassCollector();
if (collector != null)
collectors.add(collector);
}
if (!collectors.isEmpty()) {
Collector secondPhaseCollectors = MultiCollector.wrap(collectors.toArray(new Collector[collectors.size()]));
if (collectors.size() > 0) {
if (cachedCollector != null) {
if (cachedCollector.isCached()) {
cachedCollector.replay(secondPhaseCollectors);
} else {
signalCacheWarning = true;
logger.warn(String.format("The grouping cache is active, but not used because it exceeded the max cache limit of %d percent", maxDocsPercentageToCache));
logger.warn("Please increase cache size or disable group caching.");
searchWithTimeLimiter(luceneFilter, secondPhaseCollectors);
}
} else {
if (pf.postFilter != null) {
pf.postFilter.setLastDelegate(secondPhaseCollectors);
secondPhaseCollectors = pf.postFilter;
}
searchWithTimeLimiter(luceneFilter, secondPhaseCollectors);
}
}
}
for (Command cmd : commands) {
cmd.finish();
}
qr.groupedResults = grouped;
if (getDocList) {
int sz = idSet.size();
int[] ids = new int[sz];
int idx = 0;
for (int val : idSet) {
ids[idx++] = val;
}
qr.setDocList(new DocSlice(0, sz, ids, null, maxMatches, maxScore));
}
}
// in solr/core/src/java/org/apache/solr/search/Grouping.java
private void searchWithTimeLimiter(final Filter luceneFilter, Collector collector) throws IOException {
if (cmd.getTimeAllowed() > 0) {
if (timeLimitingCollector == null) {
timeLimitingCollector = new TimeLimitingCollector(collector, TimeLimitingCollector.getGlobalCounter(), cmd.getTimeAllowed());
} else {
/*
* This is so the same timer can be used for grouping's multiple phases.
* We don't want to create a new TimeLimitingCollector for each phase because that would
* reset the timer for each phase. If time runs out during the first phase, the
* second phase should timeout quickly.
*/
timeLimitingCollector.setCollector(collector);
}
collector = timeLimitingCollector;
}
try {
searcher.search(query, luceneFilter, collector);
} catch (TimeLimitingCollector.TimeExceededException x) {
logger.warn( "Query: " + query + "; " + x.getMessage() );
qr.setPartialResults(true);
}
}
// in solr/core/src/java/org/apache/solr/search/Grouping.java
protected Collector createSecondPassCollector() throws IOException {
return null;
}
// in solr/core/src/java/org/apache/solr/search/Grouping.java
public AbstractAllGroupHeadsCollector<?> createAllGroupCollector() throws IOException {
return null;
}
// in solr/core/src/java/org/apache/solr/search/Grouping.java
protected void prepare() throws IOException {
actualGroupsToFind = getMax(offset, numGroups, maxDoc);
}
// in solr/core/src/java/org/apache/solr/search/Grouping.java
protected Collector createFirstPassCollector() throws IOException {
// Ok we don't want groups, but do want a total count
if (actualGroupsToFind <= 0) {
fallBackCollector = new TotalHitCountCollector();
return fallBackCollector;
}
sort = sort == null ? Sort.RELEVANCE : sort;
firstPass = new TermFirstPassGroupingCollector(groupBy, sort, actualGroupsToFind);
return firstPass;
}
// in solr/core/src/java/org/apache/solr/search/Grouping.java
protected Collector createSecondPassCollector() throws IOException {
if (actualGroupsToFind <= 0) {
allGroupsCollector = new TermAllGroupsCollector(groupBy);
return totalCount == TotalCount.grouped ? allGroupsCollector : null;
}
topGroups = format == Format.grouped ? firstPass.getTopGroups(offset, false) : firstPass.getTopGroups(0, false);
if (topGroups == null) {
if (totalCount == TotalCount.grouped) {
allGroupsCollector = new TermAllGroupsCollector(groupBy);
fallBackCollector = new TotalHitCountCollector();
return MultiCollector.wrap(allGroupsCollector, fallBackCollector);
} else {
fallBackCollector = new TotalHitCountCollector();
return fallBackCollector;
}
}
int groupedDocsToCollect = getMax(groupOffset, docsPerGroup, maxDoc);
groupedDocsToCollect = Math.max(groupedDocsToCollect, 1);
secondPass = new TermSecondPassGroupingCollector(
groupBy, topGroups, sort, groupSort, groupedDocsToCollect, needScores, needScores, false
);
if (totalCount == TotalCount.grouped) {
allGroupsCollector = new TermAllGroupsCollector(groupBy);
return MultiCollector.wrap(secondPass, allGroupsCollector);
} else {
return secondPass;
}
}
// in solr/core/src/java/org/apache/solr/search/Grouping.java
Override
public AbstractAllGroupHeadsCollector<?> createAllGroupCollector() throws IOException {
Sort sortWithinGroup = groupSort != null ? groupSort : new Sort();
return TermAllGroupHeadsCollector.create(groupBy, sortWithinGroup);
}
// in solr/core/src/java/org/apache/solr/search/Grouping.java
protected void finish() throws IOException {
result = secondPass != null ? secondPass.getTopGroups(0) : null;
if (main) {
mainResult = createSimpleResponse();
return;
}
NamedList groupResult = commonResponse();
if (format == Format.simple) {
groupResult.add("doclist", createSimpleResponse());
return;
}
List groupList = new ArrayList();
groupResult.add("groups", groupList); // grouped={ key={ groups=[
if (result == null) {
return;
}
// handle case of rows=0
if (numGroups == 0) return;
for (GroupDocs<BytesRef> group : result.groups) {
NamedList nl = new SimpleOrderedMap();
groupList.add(nl); // grouped={ key={ groups=[ {
// To keep the response format compatable with trunk.
// In trunk MutableValue can convert an indexed value to its native type. E.g. string to int
// The only option I currently see is the use the FieldType for this
if (group.groupValue != null) {
SchemaField schemaField = searcher.getSchema().getField(groupBy);
FieldType fieldType = schemaField.getType();
String readableValue = fieldType.indexedToReadable(group.groupValue.utf8ToString());
IndexableField field = schemaField.createField(readableValue, 0.0f);
nl.add("groupValue", fieldType.toObject(field));
} else {
nl.add("groupValue", null);
}
addDocList(nl, group);
}
}
// in solr/core/src/java/org/apache/solr/search/Grouping.java
protected void prepare() throws IOException {
actualGroupsToFind = getMax(offset, numGroups, maxDoc);
}
// in solr/core/src/java/org/apache/solr/search/Grouping.java
protected Collector createFirstPassCollector() throws IOException {
DocSet groupFilt = searcher.getDocSet(query);
topCollector = newCollector(groupSort, needScores);
collector = new FilterCollector(groupFilt, topCollector);
return collector;
}
// in solr/core/src/java/org/apache/solr/search/Grouping.java
TopDocsCollector newCollector(Sort sort, boolean needScores) throws IOException {
int groupDocsToCollect = getMax(groupOffset, docsPerGroup, maxDoc);
if (sort == null || sort == Sort.RELEVANCE) {
return TopScoreDocCollector.create(groupDocsToCollect, true);
} else {
return TopFieldCollector.create(searcher.weightSort(sort), groupDocsToCollect, false, needScores, needScores, true);
}
}
// in solr/core/src/java/org/apache/solr/search/Grouping.java
protected void finish() throws IOException {
TopDocsCollector topDocsCollector = (TopDocsCollector) collector.getDelegate();
TopDocs topDocs = topDocsCollector.topDocs();
GroupDocs<String> groupDocs = new GroupDocs<String>(topDocs.getMaxScore(), topDocs.totalHits, topDocs.scoreDocs, query.toString(), null);
if (main) {
mainResult = getDocList(groupDocs);
} else {
NamedList rsp = commonResponse();
addDocList(rsp, groupDocs);
}
}
// in solr/core/src/java/org/apache/solr/search/Grouping.java
protected void prepare() throws IOException {
Map context = ValueSource.newContext(searcher);
groupBy.createWeight(context, searcher);
actualGroupsToFind = getMax(offset, numGroups, maxDoc);
}
// in solr/core/src/java/org/apache/solr/search/Grouping.java
protected Collector createFirstPassCollector() throws IOException {
// Ok we don't want groups, but do want a total count
if (actualGroupsToFind <= 0) {
fallBackCollector = new TotalHitCountCollector();
return fallBackCollector;
}
sort = sort == null ? Sort.RELEVANCE : sort;
firstPass = new FunctionFirstPassGroupingCollector(groupBy, context, searcher.weightSort(sort), actualGroupsToFind);
return firstPass;
}
// in solr/core/src/java/org/apache/solr/search/Grouping.java
protected Collector createSecondPassCollector() throws IOException {
if (actualGroupsToFind <= 0) {
allGroupsCollector = new FunctionAllGroupsCollector(groupBy, context);
return totalCount == TotalCount.grouped ? allGroupsCollector : null;
}
topGroups = format == Format.grouped ? firstPass.getTopGroups(offset, false) : firstPass.getTopGroups(0, false);
if (topGroups == null) {
if (totalCount == TotalCount.grouped) {
allGroupsCollector = new FunctionAllGroupsCollector(groupBy, context);
fallBackCollector = new TotalHitCountCollector();
return MultiCollector.wrap(allGroupsCollector, fallBackCollector);
} else {
fallBackCollector = new TotalHitCountCollector();
return fallBackCollector;
}
}
int groupdDocsToCollect = getMax(groupOffset, docsPerGroup, maxDoc);
groupdDocsToCollect = Math.max(groupdDocsToCollect, 1);
secondPass = new FunctionSecondPassGroupingCollector(
topGroups, sort, groupSort, groupdDocsToCollect, needScores, needScores, false, groupBy, context
);
if (totalCount == TotalCount.grouped) {
allGroupsCollector = new FunctionAllGroupsCollector(groupBy, context);
return MultiCollector.wrap(secondPass, allGroupsCollector);
} else {
return secondPass;
}
}
// in solr/core/src/java/org/apache/solr/search/Grouping.java
Override
public AbstractAllGroupHeadsCollector<?> createAllGroupCollector() throws IOException {
Sort sortWithinGroup = groupSort != null ? groupSort : new Sort();
return new FunctionAllGroupHeadsCollector(groupBy, context, sortWithinGroup);
}
// in solr/core/src/java/org/apache/solr/search/Grouping.java
protected void finish() throws IOException {
result = secondPass != null ? secondPass.getTopGroups(0) : null;
if (main) {
mainResult = createSimpleResponse();
return;
}
NamedList groupResult = commonResponse();
if (format == Format.simple) {
groupResult.add("doclist", createSimpleResponse());
return;
}
List groupList = new ArrayList();
groupResult.add("groups", groupList); // grouped={ key={ groups=[
if (result == null) {
return;
}
// handle case of rows=0
if (numGroups == 0) return;
for (GroupDocs<MutableValue> group : result.groups) {
NamedList nl = new SimpleOrderedMap();
groupList.add(nl); // grouped={ key={ groups=[ {
nl.add("groupValue", group.groupValue.toObject());
addDocList(nl, group);
}
}
// in solr/core/src/java/org/apache/solr/search/FunctionRangeQuery.java
Override
public void collect(int doc) throws IOException {
if (doc<maxdoc && scorer.matches(doc)) {
delegate.collect(doc);
}
}
// in solr/core/src/java/org/apache/solr/search/FunctionRangeQuery.java
Override
public void setNextReader(AtomicReaderContext context) throws IOException {
maxdoc = context.reader().maxDoc();
FunctionValues dv = rangeFilt.getValueSource().getValues(fcontext, context);
scorer = dv.getRangeScorer(context.reader(), rangeFilt.getLowerVal(), rangeFilt.getUpperVal(), rangeFilt.isIncludeLower(), rangeFilt.isIncludeUpper());
super.setNextReader(context);
}
// in solr/core/src/java/org/apache/solr/search/BitDocSet.java
Override
public Filter getTopFilter() {
final OpenBitSet bs = bits;
// TODO: if cardinality isn't cached, do a quick measure of sparseness
// and return null from bits() if too sparse.
return new Filter() {
@Override
public DocIdSet getDocIdSet(final AtomicReaderContext context, final Bits acceptDocs) throws IOException {
AtomicReader reader = context.reader();
// all Solr DocSets that are used as filters only include live docs
final Bits acceptDocs2 = acceptDocs == null ? null : (reader.getLiveDocs() == acceptDocs ? null : acceptDocs);
if (context.isTopLevel) {
return BitsFilteredDocIdSet.wrap(bs, acceptDocs);
}
final int base = context.docBase;
final int maxDoc = reader.maxDoc();
final int max = base + maxDoc; // one past the max doc in this segment.
return BitsFilteredDocIdSet.wrap(new DocIdSet() {
@Override
public DocIdSetIterator iterator() throws IOException {
return new DocIdSetIterator() {
int pos=base-1;
int adjustedDoc=-1;
@Override
public int docID() {
return adjustedDoc;
}
@Override
public int nextDoc() throws IOException {
pos = bs.nextSetBit(pos+1);
return adjustedDoc = (pos>=0 && pos<max) ? pos-base : NO_MORE_DOCS;
}
@Override
public int advance(int target) throws IOException {
if (target==NO_MORE_DOCS) return adjustedDoc=NO_MORE_DOCS;
pos = bs.nextSetBit(target+base);
return adjustedDoc = (pos>=0 && pos<max) ? pos-base : NO_MORE_DOCS;
}
};
}
@Override
public boolean isCacheable() {
return true;
}
@Override
public Bits bits() throws IOException {
return new Bits() {
@Override
public boolean get(int index) {
return bs.fastGet(index + base);
}
@Override
public int length() {
return maxDoc;
}
};
}
}, acceptDocs2);
}
};
}
// in solr/core/src/java/org/apache/solr/search/BitDocSet.java
Override
public DocIdSet getDocIdSet(final AtomicReaderContext context, final Bits acceptDocs) throws IOException {
AtomicReader reader = context.reader();
// all Solr DocSets that are used as filters only include live docs
final Bits acceptDocs2 = acceptDocs == null ? null : (reader.getLiveDocs() == acceptDocs ? null : acceptDocs);
if (context.isTopLevel) {
return BitsFilteredDocIdSet.wrap(bs, acceptDocs);
}
final int base = context.docBase;
final int maxDoc = reader.maxDoc();
final int max = base + maxDoc; // one past the max doc in this segment.
return BitsFilteredDocIdSet.wrap(new DocIdSet() {
@Override
public DocIdSetIterator iterator() throws IOException {
return new DocIdSetIterator() {
int pos=base-1;
int adjustedDoc=-1;
@Override
public int docID() {
return adjustedDoc;
}
@Override
public int nextDoc() throws IOException {
pos = bs.nextSetBit(pos+1);
return adjustedDoc = (pos>=0 && pos<max) ? pos-base : NO_MORE_DOCS;
}
@Override
public int advance(int target) throws IOException {
if (target==NO_MORE_DOCS) return adjustedDoc=NO_MORE_DOCS;
pos = bs.nextSetBit(target+base);
return adjustedDoc = (pos>=0 && pos<max) ? pos-base : NO_MORE_DOCS;
}
};
}
@Override
public boolean isCacheable() {
return true;
}
@Override
public Bits bits() throws IOException {
return new Bits() {
@Override
public boolean get(int index) {
return bs.fastGet(index + base);
}
@Override
public int length() {
return maxDoc;
}
};
}
}, acceptDocs2);
}
// in solr/core/src/java/org/apache/solr/search/BitDocSet.java
Override
public DocIdSetIterator iterator() throws IOException {
return new DocIdSetIterator() {
int pos=base-1;
int adjustedDoc=-1;
@Override
public int docID() {
return adjustedDoc;
}
@Override
public int nextDoc() throws IOException {
pos = bs.nextSetBit(pos+1);
return adjustedDoc = (pos>=0 && pos<max) ? pos-base : NO_MORE_DOCS;
}
@Override
public int advance(int target) throws IOException {
if (target==NO_MORE_DOCS) return adjustedDoc=NO_MORE_DOCS;
pos = bs.nextSetBit(target+base);
return adjustedDoc = (pos>=0 && pos<max) ? pos-base : NO_MORE_DOCS;
}
};
}
// in solr/core/src/java/org/apache/solr/search/BitDocSet.java
Override
public int nextDoc() throws IOException {
pos = bs.nextSetBit(pos+1);
return adjustedDoc = (pos>=0 && pos<max) ? pos-base : NO_MORE_DOCS;
}
// in solr/core/src/java/org/apache/solr/search/BitDocSet.java
Override
public int advance(int target) throws IOException {
if (target==NO_MORE_DOCS) return adjustedDoc=NO_MORE_DOCS;
pos = bs.nextSetBit(target+base);
return adjustedDoc = (pos>=0 && pos<max) ? pos-base : NO_MORE_DOCS;
}
// in solr/core/src/java/org/apache/solr/search/BitDocSet.java
Override
public Bits bits() throws IOException {
return new Bits() {
@Override
public boolean get(int index) {
return bs.fastGet(index + base);
}
@Override
public int length() {
return maxDoc;
}
};
}
// in solr/core/src/java/org/apache/solr/search/SolrConstantScoreQuery.java
Override
public Query rewrite(IndexReader reader) throws IOException {
return this;
}
// in solr/core/src/java/org/apache/solr/search/SolrConstantScoreQuery.java
Override
public float getValueForNormalization() throws IOException {
queryWeight = getBoost();
return queryWeight * queryWeight;
}
// in solr/core/src/java/org/apache/solr/search/SolrConstantScoreQuery.java
Override
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException {
return new ConstantScorer(context, this, queryWeight, acceptDocs);
}
// in solr/core/src/java/org/apache/solr/search/SolrConstantScoreQuery.java
Override
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
ConstantScorer cs = new ConstantScorer(context, this, queryWeight, context.reader().getLiveDocs());
boolean exists = cs.docIdSetIterator.advance(doc) == doc;
ComplexExplanation result = new ComplexExplanation();
if (exists) {
result.setDescription("ConstantScoreQuery(" + filter
+ "), product of:");
result.setValue(queryWeight);
result.setMatch(Boolean.TRUE);
result.addDetail(new Explanation(getBoost(), "boost"));
result.addDetail(new Explanation(queryNorm,"queryNorm"));
} else {
result.setDescription("ConstantScoreQuery(" + filter
+ ") doesn't match id " + doc);
result.setValue(0);
result.setMatch(Boolean.FALSE);
}
return result;
}
// in solr/core/src/java/org/apache/solr/search/SolrConstantScoreQuery.java
Override
public int nextDoc() throws IOException {
return docIdSetIterator.nextDoc();
}
// in solr/core/src/java/org/apache/solr/search/SolrConstantScoreQuery.java
Override
public float score() throws IOException {
return theScore;
}
// in solr/core/src/java/org/apache/solr/search/SolrConstantScoreQuery.java
Override
public int advance(int target) throws IOException {
return docIdSetIterator.advance(target);
}
// in solr/core/src/java/org/apache/solr/search/LRUCache.java
public void warm(SolrIndexSearcher searcher, SolrCache<K,V> old) throws IOException {
if (regenerator==null) return;
long warmingStartTime = System.currentTimeMillis();
LRUCache<K,V> other = (LRUCache<K,V>)old;
// warm entries
if (isAutowarmingOn()) {
Object[] keys,vals = null;
// Don't do the autowarming in the synchronized block, just pull out the keys and values.
synchronized (other.map) {
int sz = autowarm.getWarmCount(other.map.size());
keys = new Object[sz];
vals = new Object[sz];
Iterator<Map.Entry<K, V>> iter = other.map.entrySet().iterator();
// iteration goes from oldest (least recently used) to most recently used,
// so we need to skip over the oldest entries.
int skip = other.map.size() - sz;
for (int i=0; i<skip; i++) iter.next();
for (int i=0; i<sz; i++) {
Map.Entry<K,V> entry = iter.next();
keys[i]=entry.getKey();
vals[i]=entry.getValue();
}
}
// autowarm from the oldest to the newest entries so that the ordering will be
// correct in the new cache.
for (int i=0; i<keys.length; i++) {
try {
boolean continueRegen = regenerator.regenerateItem(searcher, this, old, keys[i], vals[i]);
if (!continueRegen) break;
}
catch (Throwable e) {
SolrException.log(log,"Error during auto-warming of key:" + keys[i], e);
}
}
}
warmupTime = System.currentTimeMillis() - warmingStartTime;
}
// in solr/core/src/java/org/apache/solr/search/QueryParsing.java
static FieldType writeFieldName(String name, IndexSchema schema, Appendable out, int flags) throws IOException {
FieldType ft = null;
ft = schema.getFieldTypeNoEx(name);
out.append(name);
if (ft == null) {
out.append("(UNKNOWN FIELD " + name + ')');
}
out.append(':');
return ft;
}
// in solr/core/src/java/org/apache/solr/search/QueryParsing.java
static void writeFieldVal(String val, FieldType ft, Appendable out, int flags) throws IOException {
if (ft != null) {
try {
out.append(ft.indexedToReadable(val));
} catch (Exception e) {
out.append("EXCEPTION(val=");
out.append(val);
out.append(")");
}
} else {
out.append(val);
}
}
// in solr/core/src/java/org/apache/solr/search/QueryParsing.java
static void writeFieldVal(BytesRef val, FieldType ft, Appendable out, int flags) throws IOException {
if (ft != null) {
try {
CharsRef readable = new CharsRef();
ft.indexedToReadable(val, readable);
out.append(readable);
} catch (Exception e) {
out.append("EXCEPTION(val=");
out.append(val.utf8ToString());
out.append(")");
}
} else {
out.append(val.utf8ToString());
}
}
// in solr/core/src/java/org/apache/solr/search/QueryParsing.java
public static void toString(Query query, IndexSchema schema, Appendable out, int flags) throws IOException {
boolean writeBoost = true;
if (query instanceof TermQuery) {
TermQuery q = (TermQuery) query;
Term t = q.getTerm();
FieldType ft = writeFieldName(t.field(), schema, out, flags);
writeFieldVal(t.bytes(), ft, out, flags);
} else if (query instanceof TermRangeQuery) {
TermRangeQuery q = (TermRangeQuery) query;
String fname = q.getField();
FieldType ft = writeFieldName(fname, schema, out, flags);
out.append(q.includesLower() ? '[' : '{');
BytesRef lt = q.getLowerTerm();
BytesRef ut = q.getUpperTerm();
if (lt == null) {
out.append('*');
} else {
writeFieldVal(lt, ft, out, flags);
}
out.append(" TO ");
if (ut == null) {
out.append('*');
} else {
writeFieldVal(ut, ft, out, flags);
}
out.append(q.includesUpper() ? ']' : '}');
} else if (query instanceof NumericRangeQuery) {
NumericRangeQuery q = (NumericRangeQuery) query;
String fname = q.getField();
FieldType ft = writeFieldName(fname, schema, out, flags);
out.append(q.includesMin() ? '[' : '{');
Number lt = q.getMin();
Number ut = q.getMax();
if (lt == null) {
out.append('*');
} else {
out.append(lt.toString());
}
out.append(" TO ");
if (ut == null) {
out.append('*');
} else {
out.append(ut.toString());
}
out.append(q.includesMax() ? ']' : '}');
} else if (query instanceof BooleanQuery) {
BooleanQuery q = (BooleanQuery) query;
boolean needParens = false;
if (q.getBoost() != 1.0 || q.getMinimumNumberShouldMatch() != 0 || q.isCoordDisabled()) {
needParens = true;
}
if (needParens) {
out.append('(');
}
boolean first = true;
for (BooleanClause c : q.clauses()) {
if (!first) {
out.append(' ');
} else {
first = false;
}
if (c.isProhibited()) {
out.append('-');
} else if (c.isRequired()) {
out.append('+');
}
Query subQuery = c.getQuery();
boolean wrapQuery = false;
// TODO: may need to put parens around other types
// of queries too, depending on future syntax.
if (subQuery instanceof BooleanQuery) {
wrapQuery = true;
}
if (wrapQuery) {
out.append('(');
}
toString(subQuery, schema, out, flags);
if (wrapQuery) {
out.append(')');
}
}
if (needParens) {
out.append(')');
}
if (q.getMinimumNumberShouldMatch() > 0) {
out.append('~');
out.append(Integer.toString(q.getMinimumNumberShouldMatch()));
}
if (q.isCoordDisabled()) {
out.append("/no_coord");
}
} else if (query instanceof PrefixQuery) {
PrefixQuery q = (PrefixQuery) query;
Term prefix = q.getPrefix();
FieldType ft = writeFieldName(prefix.field(), schema, out, flags);
out.append(prefix.text());
out.append('*');
} else if (query instanceof WildcardQuery) {
out.append(query.toString());
writeBoost = false;
} else if (query instanceof FuzzyQuery) {
out.append(query.toString());
writeBoost = false;
} else if (query instanceof ConstantScoreQuery) {
out.append(query.toString());
writeBoost = false;
} else {
out.append(query.getClass().getSimpleName()
+ '(' + query.toString() + ')');
writeBoost = false;
}
if (writeBoost && query.getBoost() != 1.0f) {
out.append("^");
out.append(Float.toString(query.getBoost()));
}
}
// in solr/core/src/java/org/apache/solr/search/grouping/distributed/command/TopGroupsFieldCommand.java
public List<Collector> create() throws IOException {
if (firstPhaseGroups.isEmpty()) {
return Collections.emptyList();
}
List<Collector> collectors = new ArrayList<Collector>();
secondPassCollector = new TermSecondPassGroupingCollector(
field.getName(), firstPhaseGroups, groupSort, sortWithinGroup, maxDocPerGroup, needScores, needMaxScore, true
);
collectors.add(secondPassCollector);
return collectors;
}
// in solr/core/src/java/org/apache/solr/search/grouping/distributed/command/SearchGroupsFieldCommand.java
public List<Collector> create() throws IOException {
List<Collector> collectors = new ArrayList<Collector>();
if (topNGroups > 0) {
firstPassGroupingCollector = new TermFirstPassGroupingCollector(field.getName(), groupSort, topNGroups);
collectors.add(firstPassGroupingCollector);
}
if (includeGroupCount) {
allGroupsCollector = new TermAllGroupsCollector(field.getName());
collectors.add(allGroupsCollector);
}
return collectors;
}
// in solr/core/src/java/org/apache/solr/search/grouping/distributed/command/QueryCommand.java
public Builder setDocSet(SolrIndexSearcher searcher) throws IOException {
return setDocSet(searcher.getDocSet(query));
}
// in solr/core/src/java/org/apache/solr/search/grouping/distributed/command/QueryCommand.java
public List<Collector> create() throws IOException {
if (sort == null || sort == Sort.RELEVANCE) {
collector = TopScoreDocCollector.create(docsToCollect, true);
} else {
collector = TopFieldCollector.create(sort, docsToCollect, true, needScores, needScores, true);
}
filterCollector = new FilterCollector(docSet, collector);
return Arrays.asList((Collector) filterCollector);
}
// in solr/core/src/java/org/apache/solr/search/grouping/distributed/shardresultserializer/SearchGroupsResultTransformer.java
public NamedList transform(List<Command> data) throws IOException {
NamedList<NamedList> result = new NamedList<NamedList>();
for (Command command : data) {
final NamedList<Object> commandResult = new NamedList<Object>();
if (SearchGroupsFieldCommand.class.isInstance(command)) {
SearchGroupsFieldCommand fieldCommand = (SearchGroupsFieldCommand) command;
Pair<Integer, Collection<SearchGroup<BytesRef>>> pair = fieldCommand.result();
Integer groupedCount = pair.getA();
Collection<SearchGroup<BytesRef>> searchGroups = pair.getB();
if (searchGroups != null) {
commandResult.add("topGroups", serializeSearchGroup(searchGroups, fieldCommand.getGroupSort()));
}
if (groupedCount != null) {
commandResult.add("groupCount", groupedCount);
}
} else {
continue;
}
result.add(command.getKey(), commandResult);
}
return result;
}
// in solr/core/src/java/org/apache/solr/search/grouping/distributed/shardresultserializer/SearchGroupsResultTransformer.java
public Map<String, Pair<Integer, Collection<SearchGroup<BytesRef>>>> transformToNative(NamedList<NamedList> shardResponse, Sort groupSort, Sort sortWithinGroup, String shard) throws IOException {
Map<String, Pair<Integer, Collection<SearchGroup<BytesRef>>>> result = new HashMap<String, Pair<Integer, Collection<SearchGroup<BytesRef>>>>();
for (Map.Entry<String, NamedList> command : shardResponse) {
List<SearchGroup<BytesRef>> searchGroups = new ArrayList<SearchGroup<BytesRef>>();
NamedList topGroupsAndGroupCount = command.getValue();
@SuppressWarnings("unchecked")
NamedList<List<Comparable>> rawSearchGroups = (NamedList<List<Comparable>>) topGroupsAndGroupCount.get("topGroups");
if (rawSearchGroups != null) {
for (Map.Entry<String, List<Comparable>> rawSearchGroup : rawSearchGroups){
SearchGroup<BytesRef> searchGroup = new SearchGroup<BytesRef>();
searchGroup.groupValue = rawSearchGroup.getKey() != null ? new BytesRef(rawSearchGroup.getKey()) : null;
searchGroup.sortValues = rawSearchGroup.getValue().toArray(new Comparable[rawSearchGroup.getValue().size()]);
searchGroups.add(searchGroup);
}
}
Integer groupCount = (Integer) topGroupsAndGroupCount.get("groupCount");
result.put(command.getKey(), new Pair<Integer, Collection<SearchGroup<BytesRef>>>(groupCount, searchGroups));
}
return result;
}
// in solr/core/src/java/org/apache/solr/search/grouping/distributed/shardresultserializer/TopGroupsResultTransformer.java
public NamedList transform(List<Command> data) throws IOException {
NamedList<NamedList> result = new NamedList<NamedList>();
for (Command command : data) {
NamedList commandResult;
if (TopGroupsFieldCommand.class.isInstance(command)) {
TopGroupsFieldCommand fieldCommand = (TopGroupsFieldCommand) command;
SchemaField groupField = rb.req.getSearcher().getSchema().getField(fieldCommand.getKey());
commandResult = serializeTopGroups(fieldCommand.result(), groupField);
} else if (QueryCommand.class.isInstance(command)) {
QueryCommand queryCommand = (QueryCommand) command;
commandResult = serializeTopDocs(queryCommand.result());
} else {
commandResult = null;
}
result.add(command.getKey(), commandResult);
}
return result;
}
// in solr/core/src/java/org/apache/solr/search/grouping/distributed/shardresultserializer/TopGroupsResultTransformer.java
protected NamedList serializeTopGroups(TopGroups<BytesRef> data, SchemaField groupField) throws IOException {
NamedList<Object> result = new NamedList<Object>();
result.add("totalGroupedHitCount", data.totalGroupedHitCount);
result.add("totalHitCount", data.totalHitCount);
if (data.totalGroupCount != null) {
result.add("totalGroupCount", data.totalGroupCount);
}
CharsRef spare = new CharsRef();
SchemaField uniqueField = rb.req.getSearcher().getSchema().getUniqueKeyField();
for (GroupDocs<BytesRef> searchGroup : data.groups) {
NamedList<Object> groupResult = new NamedList<Object>();
groupResult.add("totalHits", searchGroup.totalHits);
if (!Float.isNaN(searchGroup.maxScore)) {
groupResult.add("maxScore", searchGroup.maxScore);
}
List<NamedList<Object>> documents = new ArrayList<NamedList<Object>>();
for (int i = 0; i < searchGroup.scoreDocs.length; i++) {
NamedList<Object> document = new NamedList<Object>();
documents.add(document);
Document doc = retrieveDocument(uniqueField, searchGroup.scoreDocs[i].doc);
document.add("id", uniqueField.getType().toExternal(doc.getField(uniqueField.getName())));
if (!Float.isNaN(searchGroup.scoreDocs[i].score)) {
document.add("score", searchGroup.scoreDocs[i].score);
}
if (!(searchGroup.scoreDocs[i] instanceof FieldDoc)) {
continue;
}
FieldDoc fieldDoc = (FieldDoc) searchGroup.scoreDocs[i];
Object[] convertedSortValues = new Object[fieldDoc.fields.length];
for (int j = 0; j < fieldDoc.fields.length; j++) {
Object sortValue = fieldDoc.fields[j];
Sort sortWithinGroup = rb.getGroupingSpec().getSortWithinGroup();
SchemaField field = sortWithinGroup.getSort()[j].getField() != null ? rb.req.getSearcher().getSchema().getFieldOrNull(sortWithinGroup.getSort()[j].getField()) : null;
if (field != null) {
FieldType fieldType = field.getType();
if (sortValue instanceof BytesRef) {
UnicodeUtil.UTF8toUTF16((BytesRef)sortValue, spare);
String indexedValue = spare.toString();
sortValue = fieldType.toObject(field.createField(fieldType.indexedToReadable(indexedValue), 0.0f));
} else if (sortValue instanceof String) {
sortValue = fieldType.toObject(field.createField(fieldType.indexedToReadable((String) sortValue), 0.0f));
}
}
convertedSortValues[j] = sortValue;
}
document.add("sortValues", convertedSortValues);
}
groupResult.add("documents", documents);
String groupValue = searchGroup.groupValue != null ? groupField.getType().indexedToReadable(searchGroup.groupValue.utf8ToString()): null;
result.add(groupValue, groupResult);
}
return result;
}
// in solr/core/src/java/org/apache/solr/search/grouping/distributed/shardresultserializer/TopGroupsResultTransformer.java
protected NamedList serializeTopDocs(QueryCommandResult result) throws IOException {
NamedList<Object> queryResult = new NamedList<Object>();
queryResult.add("matches", result.getMatches());
queryResult.add("totalHits", result.getTopDocs().totalHits);
if (rb.getGroupingSpec().isNeedScore()) {
queryResult.add("maxScore", result.getTopDocs().getMaxScore());
}
List<NamedList> documents = new ArrayList<NamedList>();
queryResult.add("documents", documents);
SchemaField uniqueField = rb.req.getSearcher().getSchema().getUniqueKeyField();
CharsRef spare = new CharsRef();
for (ScoreDoc scoreDoc : result.getTopDocs().scoreDocs) {
NamedList<Object> document = new NamedList<Object>();
documents.add(document);
Document doc = retrieveDocument(uniqueField, scoreDoc.doc);
document.add("id", uniqueField.getType().toExternal(doc.getField(uniqueField.getName())));
if (rb.getGroupingSpec().isNeedScore()) {
document.add("score", scoreDoc.score);
}
if (!FieldDoc.class.isInstance(scoreDoc)) {
continue;
}
FieldDoc fieldDoc = (FieldDoc) scoreDoc;
Object[] convertedSortValues = new Object[fieldDoc.fields.length];
for (int j = 0; j < fieldDoc.fields.length; j++) {
Object sortValue = fieldDoc.fields[j];
Sort groupSort = rb.getGroupingSpec().getGroupSort();
SchemaField field = groupSort.getSort()[j].getField() != null ? rb.req.getSearcher().getSchema().getFieldOrNull(groupSort.getSort()[j].getField()) : null;
if (field != null) {
FieldType fieldType = field.getType();
if (sortValue instanceof BytesRef) {
UnicodeUtil.UTF8toUTF16((BytesRef)sortValue, spare);
String indexedValue = spare.toString();
sortValue = fieldType.toObject(field.createField(fieldType.indexedToReadable(indexedValue), 0.0f));
} else if (sortValue instanceof String) {
sortValue = fieldType.toObject(field.createField(fieldType.indexedToReadable((String) sortValue), 0.0f));
}
}
convertedSortValues[j] = sortValue;
}
document.add("sortValues", convertedSortValues);
}
return queryResult;
}
// in solr/core/src/java/org/apache/solr/search/grouping/distributed/shardresultserializer/TopGroupsResultTransformer.java
private Document retrieveDocument(final SchemaField uniqueField, int doc) throws IOException {
DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(uniqueField.getName());
rb.req.getSearcher().doc(doc, visitor);
return visitor.getDocument();
}
// in solr/core/src/java/org/apache/solr/search/grouping/CommandHandler.java
private DocSet computeGroupedDocSet(Query query, Filter luceneFilter, List<Collector> collectors) throws IOException {
Command firstCommand = commands.get(0);
AbstractAllGroupHeadsCollector termAllGroupHeadsCollector =
TermAllGroupHeadsCollector.create(firstCommand.getKey(), firstCommand.getSortWithinGroup());
if (collectors.isEmpty()) {
searchWithTimeLimiter(query, luceneFilter, termAllGroupHeadsCollector);
} else {
collectors.add(termAllGroupHeadsCollector);
searchWithTimeLimiter(query, luceneFilter, MultiCollector.wrap(collectors.toArray(new Collector[collectors.size()])));
}
int maxDoc = searcher.maxDoc();
long[] bits = termAllGroupHeadsCollector.retrieveGroupHeads(maxDoc).getBits();
return new BitDocSet(new OpenBitSet(bits, bits.length));
}
// in solr/core/src/java/org/apache/solr/search/grouping/CommandHandler.java
private DocSet computeDocSet(Query query, Filter luceneFilter, List<Collector> collectors) throws IOException {
int maxDoc = searcher.maxDoc();
DocSetCollector docSetCollector;
if (collectors.isEmpty()) {
docSetCollector = new DocSetCollector(maxDoc >> 6, maxDoc);
} else {
Collector wrappedCollectors = MultiCollector.wrap(collectors.toArray(new Collector[collectors.size()]));
docSetCollector = new DocSetDelegateCollector(maxDoc >> 6, maxDoc, wrappedCollectors);
}
searchWithTimeLimiter(query, luceneFilter, docSetCollector);
return docSetCollector.getDocSet();
}
// in solr/core/src/java/org/apache/solr/search/grouping/CommandHandler.java
private void searchWithTimeLimiter(final Query query, final Filter luceneFilter, Collector collector) throws IOException {
if (queryCommand.getTimeAllowed() > 0 ) {
collector = new TimeLimitingCollector(collector, TimeLimitingCollector.getGlobalCounter(), queryCommand.getTimeAllowed());
}
TotalHitCountCollector hitCountCollector = new TotalHitCountCollector();
if (includeHitCount) {
collector = MultiCollector.wrap(collector, hitCountCollector);
}
try {
searcher.search(query, luceneFilter, collector);
} catch (TimeLimitingCollector.TimeExceededException x) {
partialResults = true;
logger.warn( "Query: " + query + "; " + x.getMessage() );
}
if (includeHitCount) {
totalHitCount = hitCountCollector.getTotalHits();
}
}
// in solr/core/src/java/org/apache/solr/search/grouping/collector/FilterCollector.java
public void setScorer(Scorer scorer) throws IOException {
delegate.setScorer(scorer);
}
// in solr/core/src/java/org/apache/solr/search/grouping/collector/FilterCollector.java
public void collect(int doc) throws IOException {
matches++;
if (filter.exists(doc + docBase)) {
delegate.collect(doc);
}
}
// in solr/core/src/java/org/apache/solr/search/grouping/collector/FilterCollector.java
public void setNextReader(AtomicReaderContext context) throws IOException {
this.docBase = context.docBase;
delegate.setNextReader(context);
}
// in solr/core/src/java/org/apache/solr/search/DocSetDelegateCollector.java
Override
public void collect(int doc) throws IOException {
collector.collect(doc);
doc += base;
// optimistically collect the first docs in an array
// in case the total number will be small enough to represent
// as a small set like SortedIntDocSet instead...
// Storing in this array will be quicker to convert
// than scanning through a potentially huge bit vector.
// FUTURE: when search methods all start returning docs in order, maybe
// we could have a ListDocSet() and use the collected array directly.
if (pos < scratch.length) {
scratch[pos]=doc;
} else {
// this conditional could be removed if BitSet was preallocated, but that
// would take up more memory, and add more GC time...
if (bits==null) bits = new OpenBitSet(maxDoc);
bits.fastSet(doc);
}
pos++;
}
// in solr/core/src/java/org/apache/solr/search/DocSetDelegateCollector.java
Override
public void setScorer(Scorer scorer) throws IOException {
collector.setScorer(scorer);
}
// in solr/core/src/java/org/apache/solr/search/DocSetDelegateCollector.java
Override
public void setNextReader(AtomicReaderContext context) throws IOException {
collector.setNextReader(context);
this.base = context.docBase;
}
// in solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java
Override
public Query rewrite(IndexReader reader) throws IOException {
// don't rewrite the subQuery
return this;
}
// in solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java
public Weight createWeight(IndexSearcher searcher) throws IOException {
return new JoinQueryWeight((SolrIndexSearcher)searcher);
}
// in solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java
Override
public void close() throws IOException {
ref.decref();
}
// in solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java
Override
public void close() throws IOException {
fromCore.close();
}
// in solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java
Override
public float getValueForNormalization() throws IOException {
queryWeight = getBoost();
return queryWeight * queryWeight;
}
// in solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java
Override
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException {
if (filter == null) {
boolean debug = rb != null && rb.isDebug();
long start = debug ? System.currentTimeMillis() : 0;
resultSet = getDocSet();
long end = debug ? System.currentTimeMillis() : 0;
if (debug) {
SimpleOrderedMap<Object> dbg = new SimpleOrderedMap<Object>();
dbg.add("time", (end-start));
dbg.add("fromSetSize", fromSetSize); // the input
dbg.add("toSetSize", resultSet.size()); // the output
dbg.add("fromTermCount", fromTermCount);
dbg.add("fromTermTotalDf", fromTermTotalDf);
dbg.add("fromTermDirectCount", fromTermDirectCount);
dbg.add("fromTermHits", fromTermHits);
dbg.add("fromTermHitsTotalDf", fromTermHitsTotalDf);
dbg.add("toTermHits", toTermHits);
dbg.add("toTermHitsTotalDf", toTermHitsTotalDf);
dbg.add("toTermDirectCount", toTermDirectCount);
dbg.add("smallSetsDeferred", smallSetsDeferred);
dbg.add("toSetDocsAdded", resultListDocs);
// TODO: perhaps synchronize addDebug in the future...
rb.addDebug(dbg, "join", JoinQuery.this.toString());
}
filter = resultSet.getTopFilter();
}
// Although this set only includes live docs, other filters can be pushed down to queries.
DocIdSet readerSet = filter.getDocIdSet(context, acceptDocs);
if (readerSet == null) readerSet=DocIdSet.EMPTY_DOCIDSET;
return new JoinScorer(this, readerSet.iterator(), getBoost());
}
// in solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java
public DocSet getDocSet() throws IOException {
OpenBitSet resultBits = null;
// minimum docFreq to use the cache
int minDocFreqFrom = Math.max(5, fromSearcher.maxDoc() >> 13);
int minDocFreqTo = Math.max(5, toSearcher.maxDoc() >> 13);
// use a smaller size than normal since we will need to sort and dedup the results
int maxSortedIntSize = Math.max(10, toSearcher.maxDoc() >> 10);
DocSet fromSet = fromSearcher.getDocSet(q);
fromSetSize = fromSet.size();
List<DocSet> resultList = new ArrayList<DocSet>(10);
// make sure we have a set that is fast for random access, if we will use it for that
DocSet fastForRandomSet = fromSet;
if (minDocFreqFrom>0 && fromSet instanceof SortedIntDocSet) {
SortedIntDocSet sset = (SortedIntDocSet)fromSet;
fastForRandomSet = new HashDocSet(sset.getDocs(), 0, sset.size());
}
Fields fromFields = fromSearcher.getAtomicReader().fields();
Fields toFields = fromSearcher==toSearcher ? fromFields : toSearcher.getAtomicReader().fields();
if (fromFields == null) return DocSet.EMPTY;
Terms terms = fromFields.terms(fromField);
Terms toTerms = toFields.terms(toField);
if (terms == null || toTerms==null) return DocSet.EMPTY;
String prefixStr = TrieField.getMainValuePrefix(fromSearcher.getSchema().getFieldType(fromField));
BytesRef prefix = prefixStr == null ? null : new BytesRef(prefixStr);
BytesRef term = null;
TermsEnum termsEnum = terms.iterator(null);
TermsEnum toTermsEnum = toTerms.iterator(null);
SolrIndexSearcher.DocsEnumState fromDeState = null;
SolrIndexSearcher.DocsEnumState toDeState = null;
if (prefix == null) {
term = termsEnum.next();
} else {
if (termsEnum.seekCeil(prefix, true) != TermsEnum.SeekStatus.END) {
term = termsEnum.term();
}
}
Bits fromLiveDocs = fromSearcher.getAtomicReader().getLiveDocs();
Bits toLiveDocs = fromSearcher == toSearcher ? fromLiveDocs : toSearcher.getAtomicReader().getLiveDocs();
fromDeState = new SolrIndexSearcher.DocsEnumState();
fromDeState.fieldName = fromField;
fromDeState.liveDocs = fromLiveDocs;
fromDeState.termsEnum = termsEnum;
fromDeState.docsEnum = null;
fromDeState.minSetSizeCached = minDocFreqFrom;
toDeState = new SolrIndexSearcher.DocsEnumState();
toDeState.fieldName = toField;
toDeState.liveDocs = toLiveDocs;
toDeState.termsEnum = toTermsEnum;
toDeState.docsEnum = null;
toDeState.minSetSizeCached = minDocFreqTo;
while (term != null) {
if (prefix != null && !StringHelper.startsWith(term, prefix))
break;
fromTermCount++;
boolean intersects = false;
int freq = termsEnum.docFreq();
fromTermTotalDf++;
if (freq < minDocFreqFrom) {
fromTermDirectCount++;
// OK to skip liveDocs, since we check for intersection with docs matching query
fromDeState.docsEnum = fromDeState.termsEnum.docs(null, fromDeState.docsEnum, false);
DocsEnum docsEnum = fromDeState.docsEnum;
if (docsEnum instanceof MultiDocsEnum) {
MultiDocsEnum.EnumWithSlice[] subs = ((MultiDocsEnum)docsEnum).getSubs();
int numSubs = ((MultiDocsEnum)docsEnum).getNumSubs();
outer: for (int subindex = 0; subindex<numSubs; subindex++) {
MultiDocsEnum.EnumWithSlice sub = subs[subindex];
if (sub.docsEnum == null) continue;
int base = sub.slice.start;
int docid;
while ((docid = sub.docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (fastForRandomSet.exists(docid+base)) {
intersects = true;
break outer;
}
}
}
} else {
int docid;
while ((docid = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (fastForRandomSet.exists(docid)) {
intersects = true;
break;
}
}
}
} else {
// use the filter cache
DocSet fromTermSet = fromSearcher.getDocSet(fromDeState);
intersects = fromSet.intersects(fromTermSet);
}
if (intersects) {
fromTermHits++;
fromTermHitsTotalDf++;
TermsEnum.SeekStatus status = toTermsEnum.seekCeil(term);
if (status == TermsEnum.SeekStatus.END) break;
if (status == TermsEnum.SeekStatus.FOUND) {
toTermHits++;
int df = toTermsEnum.docFreq();
toTermHitsTotalDf += df;
if (resultBits==null && df + resultListDocs > maxSortedIntSize && resultList.size() > 0) {
resultBits = new OpenBitSet(toSearcher.maxDoc());
}
// if we don't have a bitset yet, or if the resulting set will be too large
// use the filterCache to get a DocSet
if (toTermsEnum.docFreq() >= minDocFreqTo || resultBits == null) {
// use filter cache
DocSet toTermSet = toSearcher.getDocSet(toDeState);
resultListDocs += toTermSet.size();
if (resultBits != null) {
toTermSet.setBitsOn(resultBits);
} else {
if (toTermSet instanceof BitDocSet) {
resultBits = (OpenBitSet)((BitDocSet)toTermSet).bits.clone();
} else {
resultList.add(toTermSet);
}
}
} else {
toTermDirectCount++;
// need to use liveDocs here so we don't map to any deleted ones
toDeState.docsEnum = toDeState.termsEnum.docs(toDeState.liveDocs, toDeState.docsEnum, false);
DocsEnum docsEnum = toDeState.docsEnum;
if (docsEnum instanceof MultiDocsEnum) {
MultiDocsEnum.EnumWithSlice[] subs = ((MultiDocsEnum)docsEnum).getSubs();
int numSubs = ((MultiDocsEnum)docsEnum).getNumSubs();
for (int subindex = 0; subindex<numSubs; subindex++) {
MultiDocsEnum.EnumWithSlice sub = subs[subindex];
if (sub.docsEnum == null) continue;
int base = sub.slice.start;
int docid;
while ((docid = sub.docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
resultListDocs++;
resultBits.fastSet(docid + base);
}
}
} else {
int docid;
while ((docid = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
resultListDocs++;
resultBits.fastSet(docid);
}
}
}
}
}
term = termsEnum.next();
}
smallSetsDeferred = resultList.size();
if (resultBits != null) {
for (DocSet set : resultList) {
set.setBitsOn(resultBits);
}
return new BitDocSet(resultBits);
}
if (resultList.size()==0) {
return DocSet.EMPTY;
}
if (resultList.size() == 1) {
return resultList.get(0);
}
int sz = 0;
for (DocSet set : resultList)
sz += set.size();
int[] docs = new int[sz];
int pos = 0;
for (DocSet set : resultList) {
System.arraycopy(((SortedIntDocSet)set).getDocs(), 0, docs, pos, set.size());
pos += set.size();
}
Arrays.sort(docs);
int[] dedup = new int[sz];
pos = 0;
int last = -1;
for (int doc : docs) {
if (doc != last)
dedup[pos++] = doc;
last = doc;
}
if (pos != dedup.length) {
dedup = Arrays.copyOf(dedup, pos);
}
return new SortedIntDocSet(dedup, dedup.length);
}
// in solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java
Override
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
Scorer scorer = scorer(context, true, false, context.reader().getLiveDocs());
boolean exists = scorer.advance(doc) == doc;
ComplexExplanation result = new ComplexExplanation();
if (exists) {
result.setDescription(this.toString()
+ " , product of:");
result.setValue(queryWeight);
result.setMatch(Boolean.TRUE);
result.addDetail(new Explanation(getBoost(), "boost"));
result.addDetail(new Explanation(queryNorm,"queryNorm"));
} else {
result.setDescription(this.toString()
+ " doesn't match id " + doc);
result.setValue(0);
result.setMatch(Boolean.FALSE);
}
return result;
}
// in solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java
Override
public int nextDoc() throws IOException {
return iter.nextDoc();
}
// in solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java
Override
public float score() throws IOException {
return score;
}
// in solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java
Override
public int advance(int target) throws IOException {
return iter.advance(target);
}
// in solr/core/src/java/org/apache/solr/search/function/distance/StringDistanceFunction.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
final FunctionValues str1DV = str1.getValues(context, readerContext);
final FunctionValues str2DV = str2.getValues(context, readerContext);
return new FloatDocValues(this) {
@Override
public float floatVal(int doc) {
return dist.getDistance(str1DV.strVal(doc), str2DV.strVal(doc));
}
@Override
public String toString(int doc) {
StringBuilder sb = new StringBuilder();
sb.append("strdist").append('(');
sb.append(str1DV.toString(doc)).append(',').append(str2DV.toString(doc))
.append(", dist=").append(dist.getClass().getName());
sb.append(')');
return sb.toString();
}
};
}
// in solr/core/src/java/org/apache/solr/search/function/distance/GeohashHaversineFunction.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
final FunctionValues gh1DV = geoHash1.getValues(context, readerContext);
final FunctionValues gh2DV = geoHash2.getValues(context, readerContext);
return new DoubleDocValues(this) {
@Override
public double doubleVal(int doc) {
return distance(doc, gh1DV, gh2DV);
}
@Override
public String toString(int doc) {
StringBuilder sb = new StringBuilder();
sb.append(name()).append('(');
sb.append(gh1DV.toString(doc)).append(',').append(gh2DV.toString(doc));
sb.append(')');
return sb.toString();
}
};
}
// in solr/core/src/java/org/apache/solr/search/function/distance/GeohashHaversineFunction.java
Override
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
geoHash1.createWeight(context, searcher);
geoHash2.createWeight(context, searcher);
}
// in solr/core/src/java/org/apache/solr/search/function/distance/GeohashFunction.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
final FunctionValues latDV = lat.getValues(context, readerContext);
final FunctionValues lonDV = lon.getValues(context, readerContext);
return new FunctionValues() {
@Override
public String strVal(int doc) {
return GeohashUtils.encodeLatLon(latDV.doubleVal(doc), lonDV.doubleVal(doc));
}
@Override
public String toString(int doc) {
StringBuilder sb = new StringBuilder();
sb.append(name()).append('(');
sb.append(latDV.toString(doc)).append(',').append(lonDV.toString(doc));
sb.append(')');
return sb.toString();
}
};
}
// in solr/core/src/java/org/apache/solr/search/function/distance/VectorDistanceFunction.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
final FunctionValues vals1 = source1.getValues(context, readerContext);
final FunctionValues vals2 = source2.getValues(context, readerContext);
return new DoubleDocValues(this) {
@Override
public double doubleVal(int doc) {
return distance(doc, vals1, vals2);
}
@Override
public String toString(int doc) {
StringBuilder sb = new StringBuilder();
sb.append(name()).append('(').append(power).append(',');
boolean firstTime = true;
sb.append(vals1.toString(doc)).append(',');
sb.append(vals2.toString(doc));
sb.append(')');
return sb.toString();
}
};
}
// in solr/core/src/java/org/apache/solr/search/function/distance/VectorDistanceFunction.java
Override
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
source1.createWeight(context, searcher);
source2.createWeight(context, searcher);
}
// in solr/core/src/java/org/apache/solr/search/function/distance/HaversineFunction.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
final FunctionValues vals1 = p1.getValues(context, readerContext);
final FunctionValues vals2 = p2.getValues(context, readerContext);
return new DoubleDocValues(this) {
@Override
public double doubleVal(int doc) {
return distance(doc, vals1, vals2);
}
@Override
public String toString(int doc) {
StringBuilder sb = new StringBuilder();
sb.append(name()).append('(');
sb.append(vals1.toString(doc)).append(',').append(vals2.toString(doc));
sb.append(')');
return sb.toString();
}
};
}
// in solr/core/src/java/org/apache/solr/search/function/distance/HaversineFunction.java
Override
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
p1.createWeight(context, searcher);
p2.createWeight(context, searcher);
}
// in solr/core/src/java/org/apache/solr/search/function/distance/HaversineConstFunction.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
final FunctionValues latVals = latSource.getValues(context, readerContext);
final FunctionValues lonVals = lonSource.getValues(context, readerContext);
final double latCenterRad = this.latCenter * DEGREES_TO_RADIANS;
final double lonCenterRad = this.lonCenter * DEGREES_TO_RADIANS;
final double latCenterRad_cos = this.latCenterRad_cos;
return new DoubleDocValues(this) {
@Override
public double doubleVal(int doc) {
double latRad = latVals.doubleVal(doc) * DEGREES_TO_RADIANS;
double lonRad = lonVals.doubleVal(doc) * DEGREES_TO_RADIANS;
double diffX = latCenterRad - latRad;
double diffY = lonCenterRad - lonRad;
double hsinX = Math.sin(diffX * 0.5);
double hsinY = Math.sin(diffY * 0.5);
double h = hsinX * hsinX +
(latCenterRad_cos * Math.cos(latRad) * hsinY * hsinY);
return (EARTH_MEAN_DIAMETER * Math.atan2(Math.sqrt(h), Math.sqrt(1 - h)));
}
@Override
public String toString(int doc) {
return name() + '(' + latVals.toString(doc) + ',' + lonVals.toString(doc) + ',' + latCenter + ',' + lonCenter + ')';
}
};
}
// in solr/core/src/java/org/apache/solr/search/function/distance/HaversineConstFunction.java
Override
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
latSource.createWeight(context, searcher);
lonSource.createWeight(context, searcher);
}
// in solr/core/src/java/org/apache/solr/search/function/FileFloatSource.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
final int off = readerContext.docBase;
IndexReaderContext topLevelContext = ReaderUtil.getTopLevelContext(readerContext);
final float[] arr = getCachedFloats(topLevelContext.reader());
return new FloatDocValues(this) {
@Override
public float floatVal(int doc) {
return arr[doc + off];
}
@Override
public Object objectVal(int doc) {
return floatVal(doc); // TODO: keep track of missing values
}
};
}
// in solr/core/src/java/org/apache/solr/search/function/ValueSourceRangeFilter.java
Override
public DocIdSet getDocIdSet(final Map context, final AtomicReaderContext readerContext, Bits acceptDocs) throws IOException {
return BitsFilteredDocIdSet.wrap(new DocIdSet() {
@Override
public DocIdSetIterator iterator() throws IOException {
return valueSource.getValues(context, readerContext).getRangeScorer(readerContext.reader(), lowerVal, upperVal, includeLower, includeUpper);
}
@Override
public Bits bits() throws IOException {
return null; // don't use random access
}
}, acceptDocs);
}
// in solr/core/src/java/org/apache/solr/search/function/ValueSourceRangeFilter.java
Override
public DocIdSetIterator iterator() throws IOException {
return valueSource.getValues(context, readerContext).getRangeScorer(readerContext.reader(), lowerVal, upperVal, includeLower, includeUpper);
}
// in solr/core/src/java/org/apache/solr/search/function/ValueSourceRangeFilter.java
Override
public Bits bits() throws IOException {
return null; // don't use random access
}
// in solr/core/src/java/org/apache/solr/search/function/ValueSourceRangeFilter.java
Override
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
valueSource.createWeight(context, searcher);
}
// in solr/core/src/java/org/apache/solr/search/SortedIntDocSet.java
Override
public Filter getTopFilter() {
return new Filter() {
int lastEndIdx = 0;
@Override
public DocIdSet getDocIdSet(final AtomicReaderContext context, final Bits acceptDocs) throws IOException {
AtomicReader reader = context.reader();
// all Solr DocSets that are used as filters only include live docs
final Bits acceptDocs2 = acceptDocs == null ? null : (reader.getLiveDocs() == acceptDocs ? null : acceptDocs);
final int base = context.docBase;
final int maxDoc = reader.maxDoc();
final int max = base + maxDoc; // one past the max doc in this segment.
int sidx = Math.max(0,lastEndIdx);
if (sidx > 0 && docs[sidx-1] >= base) {
// oops, the lastEndIdx isn't correct... we must have been used
// in a multi-threaded context, or the indexreaders are being
// used out-of-order. start at 0.
sidx = 0;
}
if (sidx < docs.length && docs[sidx] < base) {
// if docs[sidx] is < base, we need to seek to find the real start.
sidx = findIndex(docs, base, sidx, docs.length-1);
}
final int startIdx = sidx;
// Largest possible end index is limited to the start index
// plus the number of docs contained in the segment. Subtract 1 since
// the end index is inclusive.
int eidx = Math.min(docs.length, startIdx + maxDoc) - 1;
// find the real end
eidx = findIndex(docs, max, startIdx, eidx) - 1;
final int endIdx = eidx;
lastEndIdx = endIdx;
return BitsFilteredDocIdSet.wrap(new DocIdSet() {
@Override
public DocIdSetIterator iterator() throws IOException {
return new DocIdSetIterator() {
int idx = startIdx;
int adjustedDoc = -1;
@Override
public int docID() {
return adjustedDoc;
}
@Override
public int nextDoc() throws IOException {
return adjustedDoc = (idx > endIdx) ? NO_MORE_DOCS : (docs[idx++] - base);
}
@Override
public int advance(int target) throws IOException {
if (idx > endIdx || target==NO_MORE_DOCS) return adjustedDoc=NO_MORE_DOCS;
target += base;
// probe next
int rawDoc = docs[idx++];
if (rawDoc >= target) return adjustedDoc=rawDoc-base;
int high = endIdx;
// TODO: probe more before resorting to binary search?
// binary search
while (idx <= high) {
int mid = (idx+high) >>> 1;
rawDoc = docs[mid];
if (rawDoc < target) {
idx = mid+1;
}
else if (rawDoc > target) {
high = mid-1;
}
else {
idx=mid+1;
return adjustedDoc=rawDoc - base;
}
}
// low is on the insertion point...
if (idx <= endIdx) {
return adjustedDoc = docs[idx++] - base;
} else {
return adjustedDoc=NO_MORE_DOCS;
}
}
};
}
@Override
public boolean isCacheable() {
return true;
}
@Override
public Bits bits() throws IOException {
// random access is expensive for this set
return null;
}
}, acceptDocs2);
}
};
}
// in solr/core/src/java/org/apache/solr/search/SortedIntDocSet.java
Override
public DocIdSet getDocIdSet(final AtomicReaderContext context, final Bits acceptDocs) throws IOException {
AtomicReader reader = context.reader();
// all Solr DocSets that are used as filters only include live docs
final Bits acceptDocs2 = acceptDocs == null ? null : (reader.getLiveDocs() == acceptDocs ? null : acceptDocs);
final int base = context.docBase;
final int maxDoc = reader.maxDoc();
final int max = base + maxDoc; // one past the max doc in this segment.
int sidx = Math.max(0,lastEndIdx);
if (sidx > 0 && docs[sidx-1] >= base) {
// oops, the lastEndIdx isn't correct... we must have been used
// in a multi-threaded context, or the indexreaders are being
// used out-of-order. start at 0.
sidx = 0;
}
if (sidx < docs.length && docs[sidx] < base) {
// if docs[sidx] is < base, we need to seek to find the real start.
sidx = findIndex(docs, base, sidx, docs.length-1);
}
final int startIdx = sidx;
// Largest possible end index is limited to the start index
// plus the number of docs contained in the segment. Subtract 1 since
// the end index is inclusive.
int eidx = Math.min(docs.length, startIdx + maxDoc) - 1;
// find the real end
eidx = findIndex(docs, max, startIdx, eidx) - 1;
final int endIdx = eidx;
lastEndIdx = endIdx;
return BitsFilteredDocIdSet.wrap(new DocIdSet() {
@Override
public DocIdSetIterator iterator() throws IOException {
return new DocIdSetIterator() {
int idx = startIdx;
int adjustedDoc = -1;
@Override
public int docID() {
return adjustedDoc;
}
@Override
public int nextDoc() throws IOException {
return adjustedDoc = (idx > endIdx) ? NO_MORE_DOCS : (docs[idx++] - base);
}
@Override
public int advance(int target) throws IOException {
if (idx > endIdx || target==NO_MORE_DOCS) return adjustedDoc=NO_MORE_DOCS;
target += base;
// probe next
int rawDoc = docs[idx++];
if (rawDoc >= target) return adjustedDoc=rawDoc-base;
int high = endIdx;
// TODO: probe more before resorting to binary search?
// binary search
while (idx <= high) {
int mid = (idx+high) >>> 1;
rawDoc = docs[mid];
if (rawDoc < target) {
idx = mid+1;
}
else if (rawDoc > target) {
high = mid-1;
}
else {
idx=mid+1;
return adjustedDoc=rawDoc - base;
}
}
// low is on the insertion point...
if (idx <= endIdx) {
return adjustedDoc = docs[idx++] - base;
} else {
return adjustedDoc=NO_MORE_DOCS;
}
}
};
}
@Override
public boolean isCacheable() {
return true;
}
@Override
public Bits bits() throws IOException {
// random access is expensive for this set
return null;
}
}, acceptDocs2);
}
// in solr/core/src/java/org/apache/solr/search/SortedIntDocSet.java
Override
public DocIdSetIterator iterator() throws IOException {
return new DocIdSetIterator() {
int idx = startIdx;
int adjustedDoc = -1;
@Override
public int docID() {
return adjustedDoc;
}
@Override
public int nextDoc() throws IOException {
return adjustedDoc = (idx > endIdx) ? NO_MORE_DOCS : (docs[idx++] - base);
}
@Override
public int advance(int target) throws IOException {
if (idx > endIdx || target==NO_MORE_DOCS) return adjustedDoc=NO_MORE_DOCS;
target += base;
// probe next
int rawDoc = docs[idx++];
if (rawDoc >= target) return adjustedDoc=rawDoc-base;
int high = endIdx;
// TODO: probe more before resorting to binary search?
// binary search
while (idx <= high) {
int mid = (idx+high) >>> 1;
rawDoc = docs[mid];
if (rawDoc < target) {
idx = mid+1;
}
else if (rawDoc > target) {
high = mid-1;
}
else {
idx=mid+1;
return adjustedDoc=rawDoc - base;
}
}
// low is on the insertion point...
if (idx <= endIdx) {
return adjustedDoc = docs[idx++] - base;
} else {
return adjustedDoc=NO_MORE_DOCS;
}
}
};
}
// in solr/core/src/java/org/apache/solr/search/SortedIntDocSet.java
Override
public int nextDoc() throws IOException {
return adjustedDoc = (idx > endIdx) ? NO_MORE_DOCS : (docs[idx++] - base);
}
// in solr/core/src/java/org/apache/solr/search/SortedIntDocSet.java
Override
public int advance(int target) throws IOException {
if (idx > endIdx || target==NO_MORE_DOCS) return adjustedDoc=NO_MORE_DOCS;
target += base;
// probe next
int rawDoc = docs[idx++];
if (rawDoc >= target) return adjustedDoc=rawDoc-base;
int high = endIdx;
// TODO: probe more before resorting to binary search?
// binary search
while (idx <= high) {
int mid = (idx+high) >>> 1;
rawDoc = docs[mid];
if (rawDoc < target) {
idx = mid+1;
}
else if (rawDoc > target) {
high = mid-1;
}
else {
idx=mid+1;
return adjustedDoc=rawDoc - base;
}
}
// low is on the insertion point...
if (idx <= endIdx) {
return adjustedDoc = docs[idx++] - base;
} else {
return adjustedDoc=NO_MORE_DOCS;
}
}
// in solr/core/src/java/org/apache/solr/search/SortedIntDocSet.java
Override
public Bits bits() throws IOException {
// random access is expensive for this set
return null;
}
// in solr/core/src/java/org/apache/solr/search/LuceneQueryOptimizer.java
public TopDocs optimize(BooleanQuery original,
SolrIndexSearcher searcher,
int numHits,
Query[] queryOut,
Filter[] filterOut
)
throws IOException {
BooleanQuery query = new BooleanQuery();
BooleanQuery filterQuery = null;
for (BooleanClause c : original.clauses()) {
/***
System.out.println("required="+c.required);
System.out.println("boost="+c.query.getBoost());
System.out.println("isTermQuery="+(c.query instanceof TermQuery));
if (c.query instanceof TermQuery) {
System.out.println("term="+((TermQuery)c.query).getTerm());
System.out.println("docFreq="+searcher.docFreq(((TermQuery)c.query).getTerm()));
}
***/
Query q = c.getQuery();
if (c.isRequired() // required
&& q.getBoost() == 0.0f // boost is zero
&& q instanceof TermQuery // TermQuery
&& (searcher.docFreq(((TermQuery)q).getTerm())
/ (float)searcher.maxDoc()) >= threshold) { // check threshold
if (filterQuery == null)
filterQuery = new BooleanQuery();
filterQuery.add(q, BooleanClause.Occur.MUST); // filter it
//System.out.println("WooHoo... qualified to be hoisted to a filter!");
} else {
query.add(c); // query it
}
}
Filter filter = null;
if (filterQuery != null) {
synchronized (cache) { // check cache
filter = (Filter)cache.get(filterQuery);
}
if (filter == null) { // miss
filter = new CachingWrapperFilter(new QueryWrapperFilter(filterQuery)); // construct new entry
synchronized (cache) {
cache.put(filterQuery, filter); // cache it
}
}
}
// YCS: added code to pass out optimized query and filter
// so they can be used with Hits
if (queryOut != null && filterOut != null) {
queryOut[0] = query; filterOut[0] = filter;
return null;
} else {
return searcher.search(query, filter, numHits);
}
}
// in solr/core/src/java/org/apache/solr/search/MissingStringLastComparatorSource.java
Override
public FieldComparator newComparator(String fieldname, int numHits, int sortPos, boolean reversed) throws IOException {
return new TermOrdValComparator_SML(numHits, fieldname, sortPos, reversed, missingValueProxy);
}
// in solr/core/src/java/org/apache/solr/search/MissingStringLastComparatorSource.java
Override
public FieldComparator setNextReader(AtomicReaderContext context) throws IOException {
return TermOrdValComparator_SML.createComparator(context.reader(), this);
}
// in solr/core/src/java/org/apache/solr/search/MissingStringLastComparatorSource.java
Override
public FieldComparator setNextReader(AtomicReaderContext context) throws IOException {
return TermOrdValComparator_SML.createComparator(context.reader(), parent);
}
// in solr/core/src/java/org/apache/solr/search/MissingStringLastComparatorSource.java
public static FieldComparator createComparator(AtomicReader reader, TermOrdValComparator_SML parent) throws IOException {
parent.termsIndex = FieldCache.DEFAULT.getTermsIndex(reader, parent.field);
final PackedInts.Reader docToOrd = parent.termsIndex.getDocToOrd();
PerSegmentComparator perSegComp = null;
if (docToOrd.hasArray()) {
final Object arr = docToOrd.getArray();
if (arr instanceof byte[]) {
perSegComp = new ByteOrdComparator((byte[]) arr, parent);
} else if (arr instanceof short[]) {
perSegComp = new ShortOrdComparator((short[]) arr, parent);
} else if (arr instanceof int[]) {
perSegComp = new IntOrdComparator((int[]) arr, parent);
}
}
if (perSegComp == null) {
perSegComp = new AnyOrdComparator(docToOrd, parent);
}
if (perSegComp.bottomSlot != -1) {
perSegComp.setBottom(perSegComp.bottomSlot);
}
parent.current = perSegComp;
return perSegComp;
}
// in solr/core/src/java/org/apache/solr/search/WrappedQuery.java
Override
public Weight createWeight(IndexSearcher searcher) throws IOException {
return q.createWeight(searcher);
}
// in solr/core/src/java/org/apache/solr/search/WrappedQuery.java
Override
public Query rewrite(IndexReader reader) throws IOException {
// currently no need to continue wrapping at this point.
return q.rewrite(reader);
}
// in solr/core/src/java/org/apache/solr/search/DocSetBase.java
public Filter getTopFilter() {
final OpenBitSet bs = getBits();
return new Filter() {
@Override
public DocIdSet getDocIdSet(final AtomicReaderContext context, Bits acceptDocs) throws IOException {
AtomicReader reader = context.reader();
// all Solr DocSets that are used as filters only include live docs
final Bits acceptDocs2 = acceptDocs == null ? null : (reader.getLiveDocs() == acceptDocs ? null : acceptDocs);
if (context.isTopLevel) {
return BitsFilteredDocIdSet.wrap(bs, acceptDocs);
}
final int base = context.docBase;
final int maxDoc = reader.maxDoc();
final int max = base + maxDoc; // one past the max doc in this segment.
return BitsFilteredDocIdSet.wrap(new DocIdSet() {
@Override
public DocIdSetIterator iterator() throws IOException {
return new DocIdSetIterator() {
int pos=base-1;
int adjustedDoc=-1;
@Override
public int docID() {
return adjustedDoc;
}
@Override
public int nextDoc() throws IOException {
pos = bs.nextSetBit(pos+1);
return adjustedDoc = (pos>=0 && pos<max) ? pos-base : NO_MORE_DOCS;
}
@Override
public int advance(int target) throws IOException {
if (target==NO_MORE_DOCS) return adjustedDoc=NO_MORE_DOCS;
pos = bs.nextSetBit(target+base);
return adjustedDoc = (pos>=0 && pos<max) ? pos-base : NO_MORE_DOCS;
}
};
}
@Override
public boolean isCacheable() {
return true;
}
@Override
public Bits bits() throws IOException {
// sparse filters should not use random access
return null;
}
}, acceptDocs2);
}
};
}
// in solr/core/src/java/org/apache/solr/search/DocSetBase.java
Override
public DocIdSet getDocIdSet(final AtomicReaderContext context, Bits acceptDocs) throws IOException {
AtomicReader reader = context.reader();
// all Solr DocSets that are used as filters only include live docs
final Bits acceptDocs2 = acceptDocs == null ? null : (reader.getLiveDocs() == acceptDocs ? null : acceptDocs);
if (context.isTopLevel) {
return BitsFilteredDocIdSet.wrap(bs, acceptDocs);
}
final int base = context.docBase;
final int maxDoc = reader.maxDoc();
final int max = base + maxDoc; // one past the max doc in this segment.
return BitsFilteredDocIdSet.wrap(new DocIdSet() {
@Override
public DocIdSetIterator iterator() throws IOException {
return new DocIdSetIterator() {
int pos=base-1;
int adjustedDoc=-1;
@Override
public int docID() {
return adjustedDoc;
}
@Override
public int nextDoc() throws IOException {
pos = bs.nextSetBit(pos+1);
return adjustedDoc = (pos>=0 && pos<max) ? pos-base : NO_MORE_DOCS;
}
@Override
public int advance(int target) throws IOException {
if (target==NO_MORE_DOCS) return adjustedDoc=NO_MORE_DOCS;
pos = bs.nextSetBit(target+base);
return adjustedDoc = (pos>=0 && pos<max) ? pos-base : NO_MORE_DOCS;
}
};
}
@Override
public boolean isCacheable() {
return true;
}
@Override
public Bits bits() throws IOException {
// sparse filters should not use random access
return null;
}
}, acceptDocs2);
}
// in solr/core/src/java/org/apache/solr/search/DocSetBase.java
Override
public DocIdSetIterator iterator() throws IOException {
return new DocIdSetIterator() {
int pos=base-1;
int adjustedDoc=-1;
@Override
public int docID() {
return adjustedDoc;
}
@Override
public int nextDoc() throws IOException {
pos = bs.nextSetBit(pos+1);
return adjustedDoc = (pos>=0 && pos<max) ? pos-base : NO_MORE_DOCS;
}
@Override
public int advance(int target) throws IOException {
if (target==NO_MORE_DOCS) return adjustedDoc=NO_MORE_DOCS;
pos = bs.nextSetBit(target+base);
return adjustedDoc = (pos>=0 && pos<max) ? pos-base : NO_MORE_DOCS;
}
};
}
// in solr/core/src/java/org/apache/solr/search/DocSetBase.java
Override
public int nextDoc() throws IOException {
pos = bs.nextSetBit(pos+1);
return adjustedDoc = (pos>=0 && pos<max) ? pos-base : NO_MORE_DOCS;
}
// in solr/core/src/java/org/apache/solr/search/DocSetBase.java
Override
public int advance(int target) throws IOException {
if (target==NO_MORE_DOCS) return adjustedDoc=NO_MORE_DOCS;
pos = bs.nextSetBit(target+base);
return adjustedDoc = (pos>=0 && pos<max) ? pos-base : NO_MORE_DOCS;
}
// in solr/core/src/java/org/apache/solr/search/DocSetBase.java
Override
public Bits bits() throws IOException {
// sparse filters should not use random access
return null;
}
// in solr/core/src/java/org/apache/solr/search/DocSetCollector.java
Override
public void collect(int doc) throws IOException {
doc += base;
// optimistically collect the first docs in an array
// in case the total number will be small enough to represent
// as a small set like SortedIntDocSet instead...
// Storing in this array will be quicker to convert
// than scanning through a potentially huge bit vector.
// FUTURE: when search methods all start returning docs in order, maybe
// we could have a ListDocSet() and use the collected array directly.
if (pos < scratch.length) {
scratch[pos]=doc;
} else {
// this conditional could be removed if BitSet was preallocated, but that
// would take up more memory, and add more GC time...
if (bits==null) bits = new OpenBitSet(maxDoc);
bits.fastSet(doc);
}
pos++;
}
// in solr/core/src/java/org/apache/solr/search/DocSetCollector.java
Override
public void setScorer(Scorer scorer) throws IOException {
}
// in solr/core/src/java/org/apache/solr/search/DocSetCollector.java
Override
public void setNextReader(AtomicReaderContext context) throws IOException {
this.base = context.docBase;
}
// in solr/core/src/java/org/apache/solr/search/ValueSourceParser.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
return new LongDocValues(this) {
@Override
public float floatVal(int doc) {
return fv;
}
@Override
public int intVal(int doc) {
return (int) constant;
}
@Override
public long longVal(int doc) {
return constant;
}
@Override
public double doubleVal(int doc) {
return dv;
}
@Override
public String toString(int doc) {
return description();
}
};
}
// in solr/core/src/java/org/apache/solr/search/ValueSourceParser.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
final FunctionValues vals = source.getValues(context, readerContext);
return new DoubleDocValues(this) {
@Override
public double doubleVal(int doc) {
return func(doc, vals);
}
@Override
public String toString(int doc) {
return name() + '(' + vals.toString(doc) + ')';
}
};
}
// in solr/core/src/java/org/apache/solr/search/ValueSourceParser.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
final FunctionValues aVals = a.getValues(context, readerContext);
final FunctionValues bVals = b.getValues(context, readerContext);
return new DoubleDocValues(this) {
@Override
public double doubleVal(int doc) {
return func(doc, aVals, bVals);
}
@Override
public String toString(int doc) {
return name() + '(' + aVals.toString(doc) + ',' + bVals.toString(doc) + ')';
}
};
}
// in solr/core/src/java/org/apache/solr/search/ValueSourceParser.java
Override
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
}
// in solr/core/src/java/org/apache/solr/search/ValueSourceParser.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
return new BoolDocValues(this) {
@Override
public boolean boolVal(int doc) {
return constant;
}
};
}
// in solr/core/src/java/org/apache/solr/search/ValueSourceParser.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
if (context.get(this) == null) {
SolrRequestInfo requestInfo = SolrRequestInfo.getRequestInfo();
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "testfunc: unweighted value source detected. delegate="+source + " request=" + (requestInfo==null ? "null" : requestInfo.getReq()));
}
return source.getValues(context, readerContext);
}
// in solr/core/src/java/org/apache/solr/search/ValueSourceParser.java
Override
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
context.put(this, this);
}
// in solr/core/src/java/org/apache/solr/search/ValueSourceParser.java
Override
public SortField getSortField(boolean reverse) throws IOException {
return super.getSortField(reverse);
}
// in solr/core/src/java/org/apache/solr/search/SolrFilter.java
Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
return getDocIdSet(null, context, acceptDocs);
}
// in solr/core/src/java/org/apache/solr/search/FastLRUCache.java
public void warm(SolrIndexSearcher searcher, SolrCache old) throws IOException {
if (regenerator == null) return;
long warmingStartTime = System.currentTimeMillis();
FastLRUCache other = (FastLRUCache) old;
// warm entries
if (isAutowarmingOn()) {
int sz = autowarm.getWarmCount(other.size());
Map items = other.cache.getLatestAccessedItems(sz);
Map.Entry[] itemsArr = new Map.Entry[items.size()];
int counter = 0;
for (Object mapEntry : items.entrySet()) {
itemsArr[counter++] = (Map.Entry) mapEntry;
}
for (int i = itemsArr.length - 1; i >= 0; i--) {
try {
boolean continueRegen = regenerator.regenerateItem(searcher,
this, old, itemsArr[i].getKey(), itemsArr[i].getValue());
if (!continueRegen) break;
}
catch (Throwable e) {
SolrException.log(log, "Error during auto-warming of key:" + itemsArr[i].getKey(), e);
}
}
}
warmupTime = System.currentTimeMillis() - warmingStartTime;
}
// in solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
protected Highlighter getPhraseHighlighter(Query query, String fieldName, SolrQueryRequest request, CachingTokenFilter tokenStream) throws IOException {
SolrParams params = request.getParams();
Highlighter highlighter = null;
highlighter = new Highlighter(
getFormatter(fieldName, params),
getEncoder(fieldName, params),
getSpanQueryScorer(query, fieldName, tokenStream, request));
highlighter.setTextFragmenter(getFragmenter(fieldName, params));
return highlighter;
}
// in solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
private QueryScorer getSpanQueryScorer(Query query, String fieldName, TokenStream tokenStream, SolrQueryRequest request) throws IOException {
boolean reqFieldMatch = request.getParams().getFieldBool(fieldName, HighlightParams.FIELD_MATCH, false);
Boolean highlightMultiTerm = request.getParams().getBool(HighlightParams.HIGHLIGHT_MULTI_TERM, true);
if(highlightMultiTerm == null) {
highlightMultiTerm = false;
}
QueryScorer scorer;
if (reqFieldMatch) {
scorer = new QueryScorer(query, fieldName);
}
else {
scorer = new QueryScorer(query, null);
}
scorer.setExpandMultiTermQuery(highlightMultiTerm);
return scorer;
}
// in solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
private void doHighlightingByHighlighter( Query query, SolrQueryRequest req, NamedList docSummaries,
int docId, Document doc, String fieldName ) throws IOException {
final SolrIndexSearcher searcher = req.getSearcher();
final IndexSchema schema = searcher.getSchema();
// TODO: Currently in trunk highlighting numeric fields is broken (Lucene) -
// so we disable them until fixed (see LUCENE-3080)!
// BEGIN: Hack
final SchemaField schemaField = schema.getFieldOrNull(fieldName);
if (schemaField != null && (
(schemaField.getType() instanceof org.apache.solr.schema.TrieField) ||
(schemaField.getType() instanceof org.apache.solr.schema.TrieDateField)
)) return;
// END: Hack
SolrParams params = req.getParams();
IndexableField[] docFields = doc.getFields(fieldName);
List<String> listFields = new ArrayList<String>();
for (IndexableField field : docFields) {
listFields.add(field.stringValue());
}
String[] docTexts = (String[]) listFields.toArray(new String[listFields.size()]);
// according to Document javadoc, doc.getValues() never returns null. check empty instead of null
if (docTexts.length == 0) return;
TokenStream tstream = null;
int numFragments = getMaxSnippets(fieldName, params);
boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params);
String[] summaries = null;
List<TextFragment> frags = new ArrayList<TextFragment>();
TermOffsetsTokenStream tots = null; // to be non-null iff we're using TermOffsets optimization
try {
TokenStream tvStream = TokenSources.getTokenStream(searcher.getIndexReader(), docId, fieldName);
if (tvStream != null) {
tots = new TermOffsetsTokenStream(tvStream);
}
}
catch (IllegalArgumentException e) {
// No problem. But we can't use TermOffsets optimization.
}
for (int j = 0; j < docTexts.length; j++) {
if( tots != null ) {
// if we're using TermOffsets optimization, then get the next
// field value's TokenStream (i.e. get field j's TokenStream) from tots:
tstream = tots.getMultiValuedTokenStream( docTexts[j].length() );
} else {
// fall back to analyzer
tstream = createAnalyzerTStream(schema, fieldName, docTexts[j]);
}
int maxCharsToAnalyze = params.getFieldInt(fieldName,
HighlightParams.MAX_CHARS,
Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE);
Highlighter highlighter;
if (Boolean.valueOf(req.getParams().get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true"))) {
if (maxCharsToAnalyze < 0) {
tstream = new CachingTokenFilter(tstream);
} else {
tstream = new CachingTokenFilter(new OffsetLimitTokenFilter(tstream, maxCharsToAnalyze));
}
// get highlighter
highlighter = getPhraseHighlighter(query, fieldName, req, (CachingTokenFilter) tstream);
// after highlighter initialization, reset tstream since construction of highlighter already used it
tstream.reset();
}
else {
// use "the old way"
highlighter = getHighlighter(query, fieldName, req);
}
if (maxCharsToAnalyze < 0) {
highlighter.setMaxDocCharsToAnalyze(docTexts[j].length());
} else {
highlighter.setMaxDocCharsToAnalyze(maxCharsToAnalyze);
}
try {
TextFragment[] bestTextFragments = highlighter.getBestTextFragments(tstream, docTexts[j], mergeContiguousFragments, numFragments);
for (int k = 0; k < bestTextFragments.length; k++) {
if ((bestTextFragments[k] != null) && (bestTextFragments[k].getScore() > 0)) {
frags.add(bestTextFragments[k]);
}
}
} catch (InvalidTokenOffsetsException e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
}
}
// sort such that the fragments with the highest score come first
Collections.sort(frags, new Comparator<TextFragment>() {
public int compare(TextFragment arg0, TextFragment arg1) {
return Math.round(arg1.getScore() - arg0.getScore());
}
});
// convert fragments back into text
// TODO: we can include score and position information in output as snippet attributes
if (frags.size() > 0) {
ArrayList<String> fragTexts = new ArrayList<String>();
for (TextFragment fragment: frags) {
if ((fragment != null) && (fragment.getScore() > 0)) {
fragTexts.add(fragment.toString());
}
if (fragTexts.size() >= numFragments) break;
}
summaries = fragTexts.toArray(new String[0]);
if (summaries.length > 0)
docSummaries.add(fieldName, summaries);
}
// no summeries made, copy text from alternate field
if (summaries == null || summaries.length == 0) {
alternateField( docSummaries, params, doc, fieldName );
}
}
// in solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
private void doHighlightingByFastVectorHighlighter( FastVectorHighlighter highlighter, FieldQuery fieldQuery,
SolrQueryRequest req, NamedList docSummaries, int docId, Document doc,
String fieldName ) throws IOException {
SolrParams params = req.getParams();
SolrFragmentsBuilder solrFb = getSolrFragmentsBuilder( fieldName, params );
String[] snippets = highlighter.getBestFragments( fieldQuery, req.getSearcher().getIndexReader(), docId, fieldName,
params.getFieldInt( fieldName, HighlightParams.FRAGSIZE, 100 ),
params.getFieldInt( fieldName, HighlightParams.SNIPPETS, 1 ),
getFragListBuilder( fieldName, params ),
getFragmentsBuilder( fieldName, params ),
solrFb.getPreTags( params, fieldName ),
solrFb.getPostTags( params, fieldName ),
getEncoder( fieldName, params ) );
if( snippets != null && snippets.length > 0 )
docSummaries.add( fieldName, snippets );
else
alternateField( docSummaries, params, doc, fieldName );
}
// in solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
private TokenStream createAnalyzerTStream(IndexSchema schema, String fieldName, String docText) throws IOException {
TokenStream tstream;
TokenStream ts = schema.getAnalyzer().tokenStream(fieldName, new StringReader(docText));
ts.reset();
tstream = new TokenOrderingFilter(ts, 10);
return tstream;
}
// in solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
Override
public boolean incrementToken() throws IOException {
while (!done && queue.size() < windowSize) {
if (!input.incrementToken()) {
done = true;
break;
}
// reverse iterating for better efficiency since we know the
// list is already sorted, and most token start offsets will be too.
ListIterator<OrderedToken> iter = queue.listIterator(queue.size());
while(iter.hasPrevious()) {
if (offsetAtt.startOffset() >= iter.previous().startOffset) {
// insertion will be before what next() would return (what
// we just compared against), so move back one so the insertion
// will be after.
iter.next();
break;
}
}
OrderedToken ot = new OrderedToken();
ot.state = captureState();
ot.startOffset = offsetAtt.startOffset();
iter.add(ot);
}
if (queue.isEmpty()) {
return false;
} else {
restoreState(queue.removeFirst().state);
return true;
}
}
// in solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
Override
public boolean incrementToken() throws IOException {
while( true ){
if( bufferedToken == null ) {
if (!bufferedTokenStream.incrementToken())
return false;
bufferedToken = bufferedTokenStream.captureState();
bufferedStartOffset = bufferedOffsetAtt.startOffset();
bufferedEndOffset = bufferedOffsetAtt.endOffset();
}
if( startOffset <= bufferedStartOffset &&
bufferedEndOffset <= endOffset ){
restoreState(bufferedToken);
bufferedToken = null;
offsetAtt.setOffset( offsetAtt.startOffset() - startOffset, offsetAtt.endOffset() - startOffset );
return true;
}
else if( bufferedEndOffset > endOffset ){
startOffset += length + 1;
return false;
}
bufferedToken = null;
}
}
// in solr/core/src/java/org/apache/solr/spelling/SpellingQueryConverter.java
protected void analyze(Collection<Token> result, Reader text, int offset) throws IOException {
TokenStream stream = analyzer.tokenStream("", text);
// TODO: support custom attributes
CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
FlagsAttribute flagsAtt = stream.addAttribute(FlagsAttribute.class);
TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class);
PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class);
PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class);
OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
stream.reset();
while (stream.incrementToken()) {
Token token = new Token();
token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
token.setStartOffset(offset + offsetAtt.startOffset());
token.setEndOffset(offset + offsetAtt.endOffset());
token.setFlags(flagsAtt.getFlags());
token.setType(typeAtt.type());
token.setPayload(payloadAtt.getPayload());
token.setPositionIncrement(posIncAtt.getPositionIncrement());
result.add(token);
}
stream.end();
stream.close();
}
// in solr/core/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java
Override
public SpellingResult getSuggestions(SpellingOptions options) throws IOException {
SpellingResult result = new SpellingResult(options.tokens);
IndexReader reader = determineReader(options.reader);
Term term = field != null ? new Term(field, "") : null;
float theAccuracy = (options.accuracy == Float.MIN_VALUE) ? spellChecker.getAccuracy() : options.accuracy;
int count = Math.max(options.count, AbstractLuceneSpellChecker.DEFAULT_SUGGESTION_COUNT);
for (Token token : options.tokens) {
String tokenText = new String(token.buffer(), 0, token.length());
term = new Term(field, tokenText);
int docFreq = 0;
if (reader != null) {
docFreq = reader.docFreq(term);
}
String[] suggestions = spellChecker.suggestSimilar(tokenText,
((options.alternativeTermCount == null || docFreq == 0) ? count
: options.alternativeTermCount), field != null ? reader : null, // workaround LUCENE-1295
field, options.suggestMode, theAccuracy);
if (suggestions.length == 1 && suggestions[0].equals(tokenText)
&& options.alternativeTermCount == null) {
// These are spelled the same, continue on
continue;
}
// If considering alternatives to "correctly-spelled" terms, then add the
// original as a viable suggestion.
if (options.alternativeTermCount != null && docFreq > 0) {
boolean foundOriginal = false;
String[] suggestionsWithOrig = new String[suggestions.length + 1];
for (int i = 0; i < suggestions.length; i++) {
if (suggestions[i].equals(tokenText)) {
foundOriginal = true;
break;
}
suggestionsWithOrig[i + 1] = suggestions[i];
}
if (!foundOriginal) {
suggestionsWithOrig[0] = tokenText;
suggestions = suggestionsWithOrig;
}
}
if (options.extendedResults == true && reader != null && field != null) {
result.addFrequency(token, docFreq);
int countLimit = Math.min(options.count, suggestions.length);
if(countLimit>0)
{
for (int i = 0; i < countLimit; i++) {
term = new Term(field, suggestions[i]);
result.add(token, suggestions[i], reader.docFreq(term));
}
} else {
List<String> suggList = Collections.emptyList();
result.add(token, suggList);
}
} else {
if (suggestions.length > 0) {
List<String> suggList = Arrays.asList(suggestions);
if (suggestions.length > options.count) {
suggList = suggList.subList(0, options.count);
}
result.add(token, suggList);
} else {
List<String> suggList = Collections.emptyList();
result.add(token, suggList);
}
}
}
return result;
}
// in solr/core/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java
Override
public void reload(SolrCore core, SolrIndexSearcher searcher) throws IOException {
spellChecker.setSpellIndex(index);
}
// in solr/core/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java
protected void initIndex() throws IOException {
if (indexDir != null) {
index = FSDirectory.open(new File(indexDir));
} else {
index = new RAMDirectory();
}
}
// in solr/core/src/java/org/apache/solr/spelling/suggest/Suggester.java
Override
public void reload(SolrCore core, SolrIndexSearcher searcher) throws IOException {
LOG.info("reload()");
if (dictionary == null && storeDir != null) {
// this may be a firstSearcher event, try loading it
if (lookup.load(new FileInputStream(new File(storeDir, factory.storeFileName())))) {
return; // loaded ok
}
LOG.debug("load failed, need to build Lookup again");
}
// loading was unsuccessful - build it again
build(core, searcher);
}
// in solr/core/src/java/org/apache/solr/spelling/suggest/Suggester.java
Override
public SpellingResult getSuggestions(SpellingOptions options) throws IOException {
LOG.debug("getSuggestions: " + options.tokens);
if (lookup == null) {
LOG.info("Lookup is null - invoke spellchecker.build first");
return EMPTY_RESULT;
}
SpellingResult res = new SpellingResult();
CharsRef scratch = new CharsRef();
for (Token t : options.tokens) {
scratch.chars = t.buffer();
scratch.offset = 0;
scratch.length = t.length();
List<LookupResult> suggestions = lookup.lookup(scratch,
(options.suggestMode == SuggestMode.SUGGEST_MORE_POPULAR), options.count);
if (suggestions == null) {
continue;
}
if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR) {
Collections.sort(suggestions);
}
for (LookupResult lr : suggestions) {
res.add(t, lr.key.toString(), (int)lr.value);
}
}
return res;
}
// in solr/core/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java
Override
public void reload(SolrCore core, SolrIndexSearcher searcher)
throws IOException {}
// in solr/core/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java
Override
public SpellingResult getSuggestions(SpellingOptions options)
throws IOException {
LOG.debug("getSuggestions: " + options.tokens);
SpellingResult result = new SpellingResult();
float accuracy = (options.accuracy == Float.MIN_VALUE) ? checker.getAccuracy() : options.accuracy;
for (Token token : options.tokens) {
String tokenText = token.toString();
Term term = new Term(field, tokenText);
int freq = options.reader.docFreq(term);
int count = (options.alternativeTermCount != null && freq > 0) ? options.alternativeTermCount: options.count;
SuggestWord[] suggestions = checker.suggestSimilar(term, count,options.reader, options.suggestMode, accuracy);
result.addFrequency(token, freq);
// If considering alternatives to "correctly-spelled" terms, then add the
// original as a viable suggestion.
if (options.alternativeTermCount != null && freq > 0) {
boolean foundOriginal = false;
SuggestWord[] suggestionsWithOrig = new SuggestWord[suggestions.length + 1];
for (int i = 0; i < suggestions.length; i++) {
if (suggestions[i].string.equals(tokenText)) {
foundOriginal = true;
break;
}
suggestionsWithOrig[i + 1] = suggestions[i];
}
if (!foundOriginal) {
SuggestWord orig = new SuggestWord();
orig.freq = freq;
orig.string = tokenText;
suggestionsWithOrig[0] = orig;
suggestions = suggestionsWithOrig;
}
}
if(suggestions.length==0 && freq==0) {
List<String> empty = Collections.emptyList();
result.add(token, empty);
} else {
for (SuggestWord suggestion : suggestions) {
result.add(token, suggestion.string, suggestion.freq);
}
}
}
return result;
}
// in solr/core/src/java/org/apache/solr/spelling/IndexBasedSpellChecker.java
Override
public void reload(SolrCore core, SolrIndexSearcher searcher) throws IOException {
super.reload(core, searcher);
//reload the source
initSourceReader();
}
// in solr/core/src/java/org/apache/solr/cloud/SyncStrategy.java
private boolean syncWithReplicas(ZkController zkController, SolrCore core,
ZkNodeProps props, String collection, String shardId)
throws MalformedURLException, SolrServerException, IOException {
List<ZkCoreNodeProps> nodes = zkController.getZkStateReader()
.getReplicaProps(collection, shardId,
props.get(ZkStateReader.NODE_NAME_PROP),
props.get(ZkStateReader.CORE_NAME_PROP), ZkStateReader.ACTIVE); // TODO:
// should
// there
// be a
// state
// filter?
if (nodes == null) {
// I have no replicas
return true;
}
List<String> syncWith = new ArrayList<String>();
for (ZkCoreNodeProps node : nodes) {
// if we see a leader, must be stale state, and this is the guy that went down
if (!node.getNodeProps().keySet().contains(ZkStateReader.LEADER_PROP)) {
syncWith.add(node.getCoreUrl());
}
}
PeerSync peerSync = new PeerSync(core, syncWith, core.getUpdateHandler().getUpdateLog().numRecordsToKeep);
return peerSync.sync();
}
// in solr/core/src/java/org/apache/solr/cloud/SyncStrategy.java
private void syncToMe(ZkController zkController, String collection,
String shardId, ZkNodeProps leaderProps) throws MalformedURLException,
SolrServerException, IOException {
// sync everyone else
// TODO: we should do this in parallel at least
List<ZkCoreNodeProps> nodes = zkController
.getZkStateReader()
.getReplicaProps(collection, shardId,
leaderProps.get(ZkStateReader.NODE_NAME_PROP),
leaderProps.get(ZkStateReader.CORE_NAME_PROP), ZkStateReader.ACTIVE);
if (nodes == null) {
// System.out.println("I have no replicas");
// I have no replicas
return;
}
//System.out.println("tell my replicas to sync");
ZkCoreNodeProps zkLeader = new ZkCoreNodeProps(leaderProps);
for (ZkCoreNodeProps node : nodes) {
try {
// System.out
// .println("try and ask " + node.getCoreUrl() + " to sync");
log.info("try and ask " + node.getCoreUrl() + " to sync");
requestSync(zkLeader.getCoreUrl(), node.getCoreName());
} catch (Exception e) {
SolrException.log(log, "Error syncing replica to leader", e);
}
}
for(;;) {
ShardResponse srsp = shardHandler.takeCompletedOrError();
if (srsp == null) break;
boolean success = handleResponse(srsp);
//System.out.println("got response:" + success);
if (!success) {
try {
log.info("Sync failed - asking replica to recover.");
//System.out.println("Sync failed - asking replica to recover.");
RequestRecovery recoverRequestCmd = new RequestRecovery();
recoverRequestCmd.setAction(CoreAdminAction.REQUESTRECOVERY);
recoverRequestCmd.setCoreName(((SyncShardRequest)srsp.getShardRequest()).coreName);
HttpSolrServer server = new HttpSolrServer(zkLeader.getBaseUrl());
server.request(recoverRequestCmd);
} catch (Exception e) {
log.info("Could not tell a replica to recover", e);
}
shardHandler.cancelAll();
break;
}
}
}
// in solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
Override
void runLeaderProcess(boolean weAreReplacement)
throws KeeperException, InterruptedException, IOException {
try {
zkClient.makePath(leaderPath,
leaderProps == null ? null : ZkStateReader.toJSON(leaderProps),
CreateMode.EPHEMERAL, true);
} catch (NodeExistsException e) {
// if a previous leader ephemeral still exists for some reason, try and
// remove it
zkClient.delete(leaderPath, -1, true);
zkClient.makePath(leaderPath,
leaderProps == null ? null : ZkStateReader.toJSON(leaderProps),
CreateMode.EPHEMERAL, true);
}
}
// in solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
Override
void runLeaderProcess(boolean weAreReplacement)
throws KeeperException, InterruptedException, IOException {
if (cc != null) {
String coreName = leaderProps.get(ZkStateReader.CORE_NAME_PROP);
SolrCore core = null;
try {
// the first time we are run, we will get a startupCore - after
// we will get null and must use cc.getCore
core = cc.getCore(coreName);
if (core == null) {
cancelElection();
throw new SolrException(ErrorCode.SERVER_ERROR, "Fatal Error, SolrCore not found:" + coreName + " in " + cc.getCoreNames());
}
// should I be leader?
if (weAreReplacement && !shouldIBeLeader(leaderProps)) {
// System.out.println("there is a better leader candidate it appears");
rejoinLeaderElection(leaderSeqPath, core);
return;
}
if (weAreReplacement) {
if (zkClient.exists(leaderPath, true)) {
zkClient.delete(leaderPath, -1, true);
}
// System.out.println("I may be the new Leader:" + leaderPath
// + " - I need to try and sync");
boolean success = syncStrategy.sync(zkController, core, leaderProps);
if (!success && anyoneElseActive()) {
rejoinLeaderElection(leaderSeqPath, core);
return;
}
}
// If I am going to be the leader I have to be active
// System.out.println("I am leader go active");
core.getUpdateHandler().getSolrCoreState().cancelRecovery();
zkController.publish(core.getCoreDescriptor(), ZkStateReader.ACTIVE);
} finally {
if (core != null ) {
core.close();
}
}
}
super.runLeaderProcess(weAreReplacement);
}
// in solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
private void rejoinLeaderElection(String leaderSeqPath, SolrCore core)
throws InterruptedException, KeeperException, IOException {
// remove our ephemeral and re join the election
// System.out.println("sync failed, delete our election node:"
// + leaderSeqPath);
zkController.publish(core.getCoreDescriptor(), ZkStateReader.DOWN);
cancelElection();
core.getUpdateHandler().getSolrCoreState().doRecovery(cc, core.getName());
leaderElector.joinElection(this);
}
// in solr/core/src/java/org/apache/solr/cloud/SolrZkServer.java
Override
public void parseProperties(Properties zkProp)
throws IOException, ConfigException {
for (Entry<Object, Object> entry : zkProp.entrySet()) {
String key = entry.getKey().toString().trim();
String value = entry.getValue().toString().trim();
if (key.equals("dataDir")) {
dataDir = value;
} else if (key.equals("dataLogDir")) {
dataLogDir = value;
} else if (key.equals("clientPort")) {
setClientPort(Integer.parseInt(value));
} else if (key.equals("tickTime")) {
tickTime = Integer.parseInt(value);
} else if (key.equals("initLimit")) {
initLimit = Integer.parseInt(value);
} else if (key.equals("syncLimit")) {
syncLimit = Integer.parseInt(value);
} else if (key.equals("electionAlg")) {
electionAlg = Integer.parseInt(value);
} else if (key.equals("maxClientCnxns")) {
maxClientCnxns = Integer.parseInt(value);
} else if (key.startsWith("server.")) {
int dot = key.indexOf('.');
long sid = Long.parseLong(key.substring(dot + 1));
String parts[] = value.split(":");
if ((parts.length != 2) && (parts.length != 3)) {
LOG.error(value
+ " does not have the form host:port or host:port:port");
}
InetSocketAddress addr = new InetSocketAddress(parts[0],
Integer.parseInt(parts[1]));
if (parts.length == 2) {
servers.put(Long.valueOf(sid), new QuorumPeer.QuorumServer(sid, addr));
} else if (parts.length == 3) {
InetSocketAddress electionAddr = new InetSocketAddress(
parts[0], Integer.parseInt(parts[2]));
servers.put(Long.valueOf(sid), new QuorumPeer.QuorumServer(sid, addr,
electionAddr));
}
} else if (key.startsWith("group")) {
int dot = key.indexOf('.');
long gid = Long.parseLong(key.substring(dot + 1));
numGroups++;
String parts[] = value.split(":");
for(String s : parts){
long sid = Long.parseLong(s);
if(serverGroup.containsKey(sid))
throw new ConfigException("Server " + sid + "is in multiple groups");
else
serverGroup.put(sid, gid);
}
} else if(key.startsWith("weight")) {
int dot = key.indexOf('.');
long sid = Long.parseLong(key.substring(dot + 1));
serverWeight.put(sid, Long.parseLong(value));
} else {
System.setProperty("zookeeper." + key, value);
}
}
if (dataDir == null) {
throw new IllegalArgumentException("dataDir is not set");
}
if (dataLogDir == null) {
dataLogDir = dataDir;
} else {
if (!new File(dataLogDir).isDirectory()) {
throw new IllegalArgumentException("dataLogDir " + dataLogDir
+ " is missing.");
}
}
if (tickTime == 0) {
throw new IllegalArgumentException("tickTime is not set");
}
if (servers.size() > 1) {
if (initLimit == 0) {
throw new IllegalArgumentException("initLimit is not set");
}
if (syncLimit == 0) {
throw new IllegalArgumentException("syncLimit is not set");
}
/*
* If using FLE, then every server requires a separate election
* port.
*/
if (electionAlg != 0) {
for (QuorumPeer.QuorumServer s : servers.values()) {
if (s.electionAddr == null)
throw new IllegalArgumentException(
"Missing election port for server: " + s.id);
}
}
/*
* Default of quorum config is majority
*/
if(serverGroup.size() > 0){
if(servers.size() != serverGroup.size())
throw new ConfigException("Every server must be in exactly one group");
/*
* The deafult weight of a server is 1
*/
for(QuorumPeer.QuorumServer s : servers.values()){
if(!serverWeight.containsKey(s.id))
serverWeight.put(s.id, (long) 1);
}
/*
* Set the quorumVerifier to be QuorumHierarchical
*/
quorumVerifier = new QuorumHierarchical(numGroups,
serverWeight, serverGroup);
} else {
/*
* The default QuorumVerifier is QuorumMaj
*/
LOG.info("Defaulting to majority quorums");
quorumVerifier = new QuorumMaj(servers.size());
}
File myIdFile = new File(dataDir, "myid");
if (!myIdFile.exists()) {
///////////////// ADDED FOR SOLR //////
Long myid = getMySeverId();
if (myid != null) {
serverId = myid;
return;
}
if (zkRun == null) return;
//////////////// END ADDED FOR SOLR //////
throw new IllegalArgumentException(myIdFile.toString()
+ " file is missing");
}
BufferedReader br = new BufferedReader(new FileReader(myIdFile));
String myIdString;
try {
myIdString = br.readLine();
} finally {
br.close();
}
try {
serverId = Long.parseLong(myIdString);
} catch (NumberFormatException e) {
throw new IllegalArgumentException("serverid " + myIdString
+ " is not a number");
}
}
}
// in solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
private void replicate(String nodeName, SolrCore core, ZkNodeProps leaderprops, String baseUrl)
throws SolrServerException, IOException {
String leaderBaseUrl = leaderprops.get(ZkStateReader.BASE_URL_PROP);
ZkCoreNodeProps leaderCNodeProps = new ZkCoreNodeProps(leaderprops);
String leaderUrl = leaderCNodeProps.getCoreUrl();
log.info("Attempting to replicate from " + leaderUrl);
// if we are the leader, either we are trying to recover faster
// then our ephemeral timed out or we are the only node
if (!leaderBaseUrl.equals(baseUrl)) {
// send commit
commitOnLeader(leaderUrl);
// use rep handler directly, so we can do this sync rather than async
SolrRequestHandler handler = core.getRequestHandler(REPLICATION_HANDLER);
if (handler instanceof LazyRequestHandlerWrapper) {
handler = ((LazyRequestHandlerWrapper)handler).getWrappedHandler();
}
ReplicationHandler replicationHandler = (ReplicationHandler) handler;
if (replicationHandler == null) {
throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE,
"Skipping recovery, no " + REPLICATION_HANDLER + " handler found");
}
ModifiableSolrParams solrParams = new ModifiableSolrParams();
solrParams.set(ReplicationHandler.MASTER_URL, leaderUrl + "replication");
if (isClosed()) retries = INTERRUPTED;
boolean success = replicationHandler.doFetch(solrParams, true); // TODO: look into making sure force=true does not download files we already have
if (!success) {
throw new SolrException(ErrorCode.SERVER_ERROR, "Replication for recovery failed.");
}
// solrcloud_debug
// try {
// RefCounted<SolrIndexSearcher> searchHolder = core.getNewestSearcher(false);
// SolrIndexSearcher searcher = searchHolder.get();
// try {
// System.out.println(core.getCoreDescriptor().getCoreContainer().getZkController().getNodeName() + " replicated "
// + searcher.search(new MatchAllDocsQuery(), 1).totalHits + " from " + leaderUrl + " gen:" + core.getDeletionPolicy().getLatestCommit().getGeneration() + " data:" + core.getDataDir());
// } finally {
// searchHolder.decref();
// }
// } catch (Exception e) {
//
// }
}
}
// in solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
private void commitOnLeader(String leaderUrl) throws MalformedURLException,
SolrServerException, IOException {
HttpSolrServer server = new HttpSolrServer(leaderUrl);
server.setConnectionTimeout(30000);
server.setSoTimeout(30000);
UpdateRequest ureq = new UpdateRequest();
ureq.setParams(new ModifiableSolrParams());
ureq.getParams().set(DistributedUpdateProcessor.COMMIT_END_POINT, true);
ureq.setAction(AbstractUpdateRequest.ACTION.COMMIT, false, true).process(
server);
server.shutdown();
}
// in solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
private void sendPrepRecoveryCmd(String leaderBaseUrl,
String leaderCoreName) throws MalformedURLException, SolrServerException,
IOException {
HttpSolrServer server = new HttpSolrServer(leaderBaseUrl);
server.setConnectionTimeout(45000);
server.setSoTimeout(45000);
WaitForState prepCmd = new WaitForState();
prepCmd.setCoreName(leaderCoreName);
prepCmd.setNodeName(zkController.getNodeName());
prepCmd.setCoreNodeName(coreZkNodeName);
prepCmd.setState(ZkStateReader.RECOVERING);
prepCmd.setCheckLive(true);
prepCmd.setPauseFor(6000);
server.request(prepCmd);
server.shutdown();
}
// in solr/core/src/java/org/apache/solr/cloud/ZkController.java
private String getHostAddress(String host) throws IOException {
if (host == null) {
host = "http://" + InetAddress.getLocalHost().getHostName();
} else {
Matcher m = URL_PREFIX.matcher(host);
if (m.matches()) {
String prefix = m.group(1);
host = prefix + host;
} else {
host = "http://" + host;
}
}
return host;
}
// in solr/core/src/java/org/apache/solr/cloud/ZkController.java
public String readConfigName(String collection) throws KeeperException,
InterruptedException, IOException {
String configName = null;
String path = ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection;
if (log.isInfoEnabled()) {
log.info("Load collection config from:" + path);
}
byte[] data = zkClient.getData(path, null, null, true);
if(data != null) {
ZkNodeProps props = ZkNodeProps.load(data);
configName = props.get(CONFIGNAME_PROP);
}
if (configName != null && !zkClient.exists(CONFIGS_ZKNODE + "/" + configName, true)) {
log.error("Specified config does not exist in ZooKeeper:" + configName);
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR,
"Specified config does not exist in ZooKeeper:" + configName);
}
return configName;
}
// in solr/core/src/java/org/apache/solr/cloud/ZkController.java
private void joinElection(CoreDescriptor cd) throws InterruptedException, KeeperException, IOException {
String shardId = cd.getCloudDescriptor().getShardId();
Map<String,String> props = new HashMap<String,String>();
// we only put a subset of props into the leader node
props.put(ZkStateReader.BASE_URL_PROP, getBaseUrl());
props.put(ZkStateReader.CORE_NAME_PROP, cd.getName());
props.put(ZkStateReader.NODE_NAME_PROP, getNodeName());
final String coreZkNodeName = getNodeName() + "_" + cd.getName();
ZkNodeProps ourProps = new ZkNodeProps(props);
String collection = cd.getCloudDescriptor()
.getCollectionName();
ElectionContext context = new ShardLeaderElectionContext(leaderElector, shardId,
collection, coreZkNodeName, ourProps, this, cc);
leaderElector.setup(context);
electionContexts.put(coreZkNodeName, context);
leaderElector.joinElection(context);
}
// in solr/core/src/java/org/apache/solr/cloud/ZkController.java
private boolean checkRecovery(String coreName, final CoreDescriptor desc,
boolean recoverReloadedCores, final boolean isLeader,
final CloudDescriptor cloudDesc, final String collection,
final String shardZkNodeName, String shardId, ZkNodeProps leaderProps,
SolrCore core, CoreContainer cc) throws InterruptedException,
KeeperException, IOException, ExecutionException {
if (SKIP_AUTO_RECOVERY) {
log.warn("Skipping recovery according to sys prop solrcloud.skip.autorecovery");
return false;
}
boolean doRecovery = true;
if (!isLeader) {
if (core.isReloaded() && !recoverReloadedCores) {
doRecovery = false;
}
if (doRecovery) {
log.info("Core needs to recover:" + core.getName());
core.getUpdateHandler().getSolrCoreState().doRecovery(cc, coreName);
return true;
}
} else {
log.info("I am the leader, no recovery necessary");
}
return false;
}
// in solr/core/src/java/org/apache/solr/cloud/ZkController.java
public void uploadToZK(File dir, String zkPath) throws IOException, KeeperException, InterruptedException {
uploadToZK(zkClient, dir, zkPath);
}
// in solr/core/src/java/org/apache/solr/cloud/ZkController.java
public void uploadConfigDir(File dir, String configName) throws IOException, KeeperException, InterruptedException {
uploadToZK(zkClient, dir, ZkController.CONFIGS_ZKNODE + "/" + configName);
}
// in solr/core/src/java/org/apache/solr/cloud/ZkController.java
public void createCollectionZkNode(CloudDescriptor cd) throws KeeperException, InterruptedException, IOException {
String collection = cd.getCollectionName();
log.info("Check for collection zkNode:" + collection);
String collectionPath = ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection;
try {
if(!zkClient.exists(collectionPath, true)) {
log.info("Creating collection in ZooKeeper:" + collection);
SolrParams params = cd.getParams();
try {
Map<String,String> collectionProps = new HashMap<String,String>();
// TODO: if collection.configName isn't set, and there isn't already a conf in zk, just use that?
String defaultConfigName = System.getProperty(COLLECTION_PARAM_PREFIX+CONFIGNAME_PROP, collection);
// params passed in - currently only done via core admin (create core commmand).
if (params != null) {
Iterator<String> iter = params.getParameterNamesIterator();
while (iter.hasNext()) {
String paramName = iter.next();
if (paramName.startsWith(COLLECTION_PARAM_PREFIX)) {
collectionProps.put(paramName.substring(COLLECTION_PARAM_PREFIX.length()), params.get(paramName));
}
}
// if the config name wasn't passed in, use the default
if (!collectionProps.containsKey(CONFIGNAME_PROP))
getConfName(collection, collectionPath, collectionProps);
} else if(System.getProperty("bootstrap_confdir") != null) {
// if we are bootstrapping a collection, default the config for
// a new collection to the collection we are bootstrapping
log.info("Setting config for collection:" + collection + " to " + defaultConfigName);
Properties sysProps = System.getProperties();
for (String sprop : System.getProperties().stringPropertyNames()) {
if (sprop.startsWith(COLLECTION_PARAM_PREFIX)) {
collectionProps.put(sprop.substring(COLLECTION_PARAM_PREFIX.length()), sysProps.getProperty(sprop));
}
}
// if the config name wasn't passed in, use the default
if (!collectionProps.containsKey(CONFIGNAME_PROP))
collectionProps.put(CONFIGNAME_PROP, defaultConfigName);
} else if (Boolean.getBoolean("bootstrap_conf")) {
// the conf name should should be the collection name of this core
collectionProps.put(CONFIGNAME_PROP, cd.getCollectionName());
} else {
getConfName(collection, collectionPath, collectionProps);
}
ZkNodeProps zkProps = new ZkNodeProps(collectionProps);
zkClient.makePath(collectionPath, ZkStateReader.toJSON(zkProps), CreateMode.PERSISTENT, null, true);
// ping that there is a new collection
zkClient.setData(ZkStateReader.COLLECTIONS_ZKNODE, (byte[])null, true);
} catch (KeeperException e) {
// its okay if the node already exists
if (e.code() != KeeperException.Code.NODEEXISTS) {
throw e;
}
}
} else {
log.info("Collection zkNode exists");
}
} catch (KeeperException e) {
// its okay if another beats us creating the node
if (e.code() != KeeperException.Code.NODEEXISTS) {
throw e;
}
}
}
// in solr/core/src/java/org/apache/solr/cloud/ZkController.java
public static void uploadToZK(SolrZkClient zkClient, File dir, String zkPath) throws IOException, KeeperException, InterruptedException {
File[] files = dir.listFiles();
if (files == null) {
throw new IllegalArgumentException("Illegal directory: " + dir);
}
for(File file : files) {
if (!file.getName().startsWith(".")) {
if (!file.isDirectory()) {
zkClient.makePath(zkPath + "/" + file.getName(), file, false, true);
} else {
uploadToZK(zkClient, file, zkPath + "/" + file.getName());
}
}
}
}
// in solr/core/src/java/org/apache/solr/cloud/ZkController.java
public static void uploadConfigDir(SolrZkClient zkClient, File dir, String configName) throws IOException, KeeperException, InterruptedException {
uploadToZK(zkClient, dir, ZkController.CONFIGS_ZKNODE + "/" + configName);
}
// in solr/core/src/java/org/apache/solr/cloud/ZkController.java
public static void bootstrapConf(SolrZkClient zkClient, Config cfg, String solrHome) throws IOException,
KeeperException, InterruptedException {
NodeList nodes = (NodeList)cfg.evaluate("solr/cores/core", XPathConstants.NODESET);
for (int i=0; i<nodes.getLength(); i++) {
Node node = nodes.item(i);
String rawName = DOMUtil.getAttr(node, "name", null);
String instanceDir = DOMUtil.getAttr(node, "instanceDir", null);
File idir = new File(instanceDir);
if (!idir.isAbsolute()) {
idir = new File(solrHome, instanceDir);
}
String confName = DOMUtil.getAttr(node, "collection", null);
if (confName == null) {
confName = rawName;
}
ZkController.uploadConfigDir(zkClient, new File(idir, "conf"), confName);
}
}
// in solr/core/src/java/org/apache/solr/cloud/LeaderElector.java
private void checkIfIamLeader(final int seq, final ElectionContext context, boolean replacement) throws KeeperException,
InterruptedException, IOException {
// get all other numbers...
final String holdElectionPath = context.electionPath + ELECTION_NODE;
List<String> seqs = zkClient.getChildren(holdElectionPath, null, true);
sortSeqs(seqs);
List<Integer> intSeqs = getSeqs(seqs);
if (seq <= intSeqs.get(0)) {
runIamLeaderProcess(context, replacement);
} else {
// I am not the leader - watch the node below me
int i = 1;
for (; i < intSeqs.size(); i++) {
int s = intSeqs.get(i);
if (seq < s) {
// we found who we come before - watch the guy in front
break;
}
}
int index = i - 2;
if (index < 0) {
log.warn("Our node is no longer in line to be leader");
return;
}
try {
zkClient.getData(holdElectionPath + "/" + seqs.get(index),
new Watcher() {
@Override
public void process(WatchedEvent event) {
// am I the next leader?
try {
checkIfIamLeader(seq, context, true);
} catch (InterruptedException e) {
// Restore the interrupted status
Thread.currentThread().interrupt();
log.warn("", e);
} catch (IOException e) {
log.warn("", e);
} catch (Exception e) {
log.warn("", e);
}
}
}, null, true);
} catch (KeeperException.SessionExpiredException e) {
throw e;
} catch (KeeperException e) {
// we couldn't set our watch - the node before us may already be down?
// we need to check if we are the leader again
checkIfIamLeader(seq, context, true);
}
}
// in solr/core/src/java/org/apache/solr/cloud/LeaderElector.java
protected void runIamLeaderProcess(final ElectionContext context, boolean weAreReplacement) throws KeeperException,
InterruptedException, IOException {
context.runLeaderProcess(weAreReplacement);
}
// in solr/core/src/java/org/apache/solr/cloud/LeaderElector.java
public int joinElection(ElectionContext context) throws KeeperException, InterruptedException, IOException {
final String shardsElectZkPath = context.electionPath + LeaderElector.ELECTION_NODE;
long sessionId = zkClient.getSolrZooKeeper().getSessionId();
String id = sessionId + "-" + context.id;
String leaderSeqPath = null;
boolean cont = true;
int tries = 0;
while (cont) {
try {
leaderSeqPath = zkClient.create(shardsElectZkPath + "/" + id + "-n_", null,
CreateMode.EPHEMERAL_SEQUENTIAL, false);
context.leaderSeqPath = leaderSeqPath;
cont = false;
} catch (ConnectionLossException e) {
// we don't know if we made our node or not...
List<String> entries = zkClient.getChildren(shardsElectZkPath, null, true);
boolean foundId = false;
for (String entry : entries) {
String nodeId = getNodeId(entry);
if (id.equals(nodeId)) {
// we did create our node...
foundId = true;
break;
}
}
if (!foundId) {
throw e;
}
} catch (KeeperException.NoNodeException e) {
// we must have failed in creating the election node - someone else must
// be working on it, lets try again
if (tries++ > 9) {
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR,
"", e);
}
cont = true;
Thread.sleep(50);
}
}
int seq = getSeq(leaderSeqPath);
checkIfIamLeader(seq, context, false);
return seq;
}
// in solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java
private void deleteAll() throws IOException {
SolrCore.log.info(core.getLogId()+"REMOVING ALL DOCUMENTS FROM INDEX");
solrCoreState.getIndexWriter(core).deleteAll();
}
// in solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java
protected void rollbackWriter() throws IOException {
numDocsPending.set(0);
solrCoreState.rollbackIndexWriter(core);
}
// in solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java
Override
public int addDoc(AddUpdateCommand cmd) throws IOException {
IndexWriter writer = solrCoreState.getIndexWriter(core);
addCommands.incrementAndGet();
addCommandsCumulative.incrementAndGet();
int rc=-1;
// if there is no ID field, don't overwrite
if( idField == null ) {
cmd.overwrite = false;
}
try {
if (cmd.overwrite) {
Term updateTerm;
Term idTerm = new Term(idField.getName(), cmd.getIndexedId());
boolean del = false;
if (cmd.updateTerm == null) {
updateTerm = idTerm;
} else {
del = true;
updateTerm = cmd.updateTerm;
}
Document luceneDocument = cmd.getLuceneDocument();
// SolrCore.verbose("updateDocument",updateTerm,luceneDocument,writer);
writer.updateDocument(updateTerm, luceneDocument);
// SolrCore.verbose("updateDocument",updateTerm,"DONE");
if(del) { // ensure id remains unique
BooleanQuery bq = new BooleanQuery();
bq.add(new BooleanClause(new TermQuery(updateTerm), Occur.MUST_NOT));
bq.add(new BooleanClause(new TermQuery(idTerm), Occur.MUST));
writer.deleteDocuments(bq);
}
} else {
// allow duplicates
writer.addDocument(cmd.getLuceneDocument());
}
// Add to the transaction log *after* successfully adding to the index, if there was no error.
// This ordering ensures that if we log it, it's definitely been added to the the index.
// This also ensures that if a commit sneaks in-between, that we know everything in a particular
// log version was definitely committed.
if (ulog != null) ulog.add(cmd);
if ((cmd.getFlags() & UpdateCommand.IGNORE_AUTOCOMMIT) == 0) {
commitTracker.addedDocument( -1 );
softCommitTracker.addedDocument( cmd.commitWithin );
}
rc = 1;
} finally {
if (rc!=1) {
numErrors.incrementAndGet();
numErrorsCumulative.incrementAndGet();
} else {
numDocsPending.incrementAndGet();
}
}
return rc;
}
// in solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java
Override
public void delete(DeleteUpdateCommand cmd) throws IOException {
deleteByIdCommands.incrementAndGet();
deleteByIdCommandsCumulative.incrementAndGet();
IndexWriter writer = solrCoreState.getIndexWriter(core);
Term deleteTerm = new Term(idField.getName(), cmd.getIndexedId());
// SolrCore.verbose("deleteDocuments",deleteTerm,writer);
writer.deleteDocuments(deleteTerm);
// SolrCore.verbose("deleteDocuments",deleteTerm,"DONE");
if (ulog != null) ulog.delete(cmd);
updateDeleteTrackers(cmd);
}
// in solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java
Override
public void deleteByQuery(DeleteUpdateCommand cmd) throws IOException {
deleteByQueryCommands.incrementAndGet();
deleteByQueryCommandsCumulative.incrementAndGet();
boolean madeIt=false;
try {
Query q;
try {
// TODO: move this higher in the stack?
QParser parser = QParser.getParser(cmd.query, "lucene", cmd.req);
q = parser.getQuery();
q = QueryUtils.makeQueryable(q);
// peer-sync can cause older deleteByQueries to be executed and could
// delete newer documents. We prevent this by adding a clause restricting
// version.
if ((cmd.getFlags() & UpdateCommand.PEER_SYNC) != 0) {
BooleanQuery bq = new BooleanQuery();
bq.add(q, Occur.MUST);
SchemaField sf = core.getSchema().getField(VersionInfo.VERSION_FIELD);
ValueSource vs = sf.getType().getValueSource(sf, null);
ValueSourceRangeFilter filt = new ValueSourceRangeFilter(vs, null, Long.toString(Math.abs(cmd.version)), true, true);
FunctionRangeQuery range = new FunctionRangeQuery(filt);
bq.add(range, Occur.MUST);
q = bq;
}
} catch (ParseException e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
}
boolean delAll = MatchAllDocsQuery.class == q.getClass();
//
// synchronized to prevent deleteByQuery from running during the "open new searcher"
// part of a commit. DBQ needs to signal that a fresh reader will be needed for
// a realtime view of the index. When a new searcher is opened after a DBQ, that
// flag can be cleared. If those thing happen concurrently, it's not thread safe.
//
synchronized (this) {
if (delAll) {
deleteAll();
} else {
solrCoreState.getIndexWriter(core).deleteDocuments(q);
}
if (ulog != null) ulog.deleteByQuery(cmd);
}
madeIt = true;
updateDeleteTrackers(cmd);
} finally {
if (!madeIt) {
numErrors.incrementAndGet();
numErrorsCumulative.incrementAndGet();
}
}
}
// in solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java
Override
public int mergeIndexes(MergeIndexesCommand cmd) throws IOException {
mergeIndexesCommands.incrementAndGet();
int rc;
log.info("start " + cmd);
IndexReader[] readers = cmd.readers;
if (readers != null && readers.length > 0) {
solrCoreState.getIndexWriter(core).addIndexes(readers);
rc = 1;
} else {
rc = 0;
}
log.info("end_mergeIndexes");
// TODO: consider soft commit issues
if (rc == 1 && commitTracker.getTimeUpperBound() > 0) {
commitTracker.scheduleCommitWithin(commitTracker.getTimeUpperBound());
} else if (rc == 1 && softCommitTracker.getTimeUpperBound() > 0) {
softCommitTracker.scheduleCommitWithin(softCommitTracker.getTimeUpperBound());
}
return rc;
}
// in solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java
public void prepareCommit(CommitUpdateCommand cmd) throws IOException {
boolean error=true;
try {
log.info("start "+cmd);
IndexWriter writer = solrCoreState.getIndexWriter(core);
writer.prepareCommit();
log.info("end_prepareCommit");
error=false;
}
finally {
if (error) numErrors.incrementAndGet();
}
}
// in solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java
Override
public void commit(CommitUpdateCommand cmd) throws IOException {
if (cmd.prepareCommit) {
prepareCommit(cmd);
return;
}
IndexWriter writer = solrCoreState.getIndexWriter(core);
if (cmd.optimize) {
optimizeCommands.incrementAndGet();
} else {
commitCommands.incrementAndGet();
if (cmd.expungeDeletes) expungeDeleteCommands.incrementAndGet();
}
Future[] waitSearcher = null;
if (cmd.waitSearcher) {
waitSearcher = new Future[1];
}
boolean error=true;
try {
// only allow one hard commit to proceed at once
if (!cmd.softCommit) {
commitLock.lock();
}
log.info("start "+cmd);
// We must cancel pending commits *before* we actually execute the commit.
if (cmd.openSearcher) {
// we can cancel any pending soft commits if this commit will open a new searcher
softCommitTracker.cancelPendingCommit();
}
if (!cmd.softCommit && (cmd.openSearcher || !commitTracker.getOpenSearcher())) {
// cancel a pending hard commit if this commit is of equal or greater "strength"...
// If the autoCommit has openSearcher=true, then this commit must have openSearcher=true
// to cancel.
commitTracker.cancelPendingCommit();
}
if (cmd.optimize) {
writer.forceMerge(cmd.maxOptimizeSegments);
} else if (cmd.expungeDeletes) {
writer.forceMergeDeletes();
}
if (!cmd.softCommit) {
synchronized (this) { // sync is currently needed to prevent preCommit from being called between preSoft and postSoft... see postSoft comments.
if (ulog != null) ulog.preCommit(cmd);
}
// SolrCore.verbose("writer.commit() start writer=",writer);
final Map<String,String> commitData = new HashMap<String,String>();
commitData.put(SolrIndexWriter.COMMIT_TIME_MSEC_KEY, String.valueOf(System.currentTimeMillis()));
writer.commit(commitData);
// SolrCore.verbose("writer.commit() end");
numDocsPending.set(0);
callPostCommitCallbacks();
} else {
callPostSoftCommitCallbacks();
}
if (cmd.optimize) {
callPostOptimizeCallbacks();
}
if (cmd.softCommit) {
// ulog.preSoftCommit();
synchronized (this) {
if (ulog != null) ulog.preSoftCommit(cmd);
core.getSearcher(true, false, waitSearcher, true);
if (ulog != null) ulog.postSoftCommit(cmd);
}
// ulog.postSoftCommit();
} else {
synchronized (this) {
if (ulog != null) ulog.preSoftCommit(cmd);
if (cmd.openSearcher) {
core.getSearcher(true, false, waitSearcher);
} else {
// force open a new realtime searcher so realtime-get and versioning code can see the latest
RefCounted<SolrIndexSearcher> searchHolder = core.openNewSearcher(true, true);
searchHolder.decref();
}
if (ulog != null) ulog.postSoftCommit(cmd);
}
if (ulog != null) ulog.postCommit(cmd); // postCommit currently means new searcher has
// also been opened
}
// reset commit tracking
if (cmd.softCommit) {
softCommitTracker.didCommit();
} else {
commitTracker.didCommit();
}
log.info("end_commit_flush");
error=false;
}
finally {
if (!cmd.softCommit) {
commitLock.unlock();
}
addCommands.set(0);
deleteByIdCommands.set(0);
deleteByQueryCommands.set(0);
if (error) numErrors.incrementAndGet();
}
// if we are supposed to wait for the searcher to be registered, then we should do it
// outside any synchronized block so that other update operations can proceed.
if (waitSearcher!=null && waitSearcher[0] != null) {
try {
waitSearcher[0].get();
} catch (InterruptedException e) {
SolrException.log(log,e);
} catch (ExecutionException e) {
SolrException.log(log,e);
}
}
}
// in solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java
Override
public void newIndexWriter() throws IOException {
solrCoreState.newIndexWriter(core);
}
// in solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java
Override
public void rollback(RollbackUpdateCommand cmd) throws IOException {
rollbackCommands.incrementAndGet();
boolean error=true;
try {
log.info("start "+cmd);
rollbackWriter();
//callPostRollbackCallbacks();
// reset commit tracking
commitTracker.didRollback();
softCommitTracker.didRollback();
log.info("end_rollback");
error=false;
}
finally {
addCommandsCumulative.set(
addCommandsCumulative.get() - addCommands.getAndSet( 0 ) );
deleteByIdCommandsCumulative.set(
deleteByIdCommandsCumulative.get() - deleteByIdCommands.getAndSet( 0 ) );
deleteByQueryCommandsCumulative.set(
deleteByQueryCommandsCumulative.get() - deleteByQueryCommands.getAndSet( 0 ) );
if (error) numErrors.incrementAndGet();
}
}
// in solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java
Override
public void close() throws IOException {
log.info("closing " + this);
commitTracker.close();
softCommitTracker.close();
numDocsPending.set(0);
solrCoreState.decref(this);
}
// in solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java
Override
public void closeWriter(IndexWriter writer) throws IOException {
boolean clearRequestInfo = false;
commitLock.lock();
try {
SolrQueryRequest req = new LocalSolrQueryRequest(core, new ModifiableSolrParams());
SolrQueryResponse rsp = new SolrQueryResponse();
if (SolrRequestInfo.getRequestInfo() == null) {
clearRequestInfo = true;
SolrRequestInfo.setRequestInfo(new SolrRequestInfo(req, rsp)); // important for debugging
}
if (!commitOnClose) {
if (writer != null) {
writer.rollback();
}
// we shouldn't close the transaction logs either, but leaving them open
// means we can't delete them on windows (needed for tests)
if (ulog != null) ulog.close(false);
return;
}
// do a commit before we quit?
boolean tryToCommit = writer != null && ulog != null && ulog.hasUncommittedChanges() && ulog.getState() == UpdateLog.State.ACTIVE;
try {
if (tryToCommit) {
CommitUpdateCommand cmd = new CommitUpdateCommand(req, false);
cmd.openSearcher = false;
cmd.waitSearcher = false;
cmd.softCommit = false;
// TODO: keep other commit callbacks from being called?
// this.commit(cmd); // too many test failures using this method... is it because of callbacks?
synchronized (this) {
ulog.preCommit(cmd);
}
// todo: refactor this shared code (or figure out why a real CommitUpdateCommand can't be used)
final Map<String,String> commitData = new HashMap<String,String>();
commitData.put(SolrIndexWriter.COMMIT_TIME_MSEC_KEY, String.valueOf(System.currentTimeMillis()));
writer.commit(commitData);
synchronized (this) {
ulog.postCommit(cmd);
}
}
} catch (Throwable th) {
log.error("Error in final commit", th);
}
// we went through the normal process to commit, so we don't have to artificially
// cap any ulog files.
try {
if (ulog != null) ulog.close(false);
} catch (Throwable th) {
log.error("Error closing log files", th);
}
if (writer != null) writer.close();
} finally {
commitLock.unlock();
if (clearRequestInfo) SolrRequestInfo.clearRequestInfo();
}
}
// in solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
Override
public void processAdd(AddUpdateCommand cmd) throws IOException {
// TODO: check for id field?
int hash = 0;
if (zkEnabled) {
zkCheck();
hash = hash(cmd);
nodes = setupRequest(hash);
} else {
isLeader = getNonZkLeaderAssumption(req);
}
boolean dropCmd = false;
if (!forwardToLeader) {
dropCmd = versionAdd(cmd);
}
if (dropCmd) {
// TODO: do we need to add anything to the response?
return;
}
ModifiableSolrParams params = null;
if (nodes != null) {
params = new ModifiableSolrParams(req.getParams());
params.set(DISTRIB_UPDATE_PARAM,
(isLeader ?
DistribPhase.FROMLEADER.toString() :
DistribPhase.TOLEADER.toString()));
params.remove("commit"); // this will be distributed from the local commit
cmdDistrib.distribAdd(cmd, nodes, params);
}
// TODO: what to do when no idField?
if (returnVersions && rsp != null && idField != null) {
if (addsResponse == null) {
addsResponse = new NamedList<String>();
rsp.add("adds",addsResponse);
}
if (scratch == null) scratch = new CharsRef();
idField.getType().indexedToReadable(cmd.getIndexedId(), scratch);
addsResponse.add(scratch.toString(), cmd.getVersion());
}
// TODO: keep track of errors? needs to be done at a higher level though since
// an id may fail before it gets to this processor.
// Given that, it may also make sense to move the version reporting out of this
// processor too.
}
// in solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
private void doLocalAdd(AddUpdateCommand cmd) throws IOException {
super.processAdd(cmd);
}
// in solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
private void doLocalDelete(DeleteUpdateCommand cmd) throws IOException {
super.processDelete(cmd);
}
// in solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
private boolean versionAdd(AddUpdateCommand cmd) throws IOException {
BytesRef idBytes = cmd.getIndexedId();
if (vinfo == null || idBytes == null) {
super.processAdd(cmd);
return false;
}
// This is only the hash for the bucket, and must be based only on the uniqueKey (i.e. do not use a pluggable hash here)
int bucketHash = Hash.murmurhash3_x86_32(idBytes.bytes, idBytes.offset, idBytes.length, 0);
// at this point, there is an update we need to try and apply.
// we may or may not be the leader.
// Find any existing version in the document
// TODO: don't reuse update commands any more!
long versionOnUpdate = cmd.getVersion();
if (versionOnUpdate == 0) {
SolrInputField versionField = cmd.getSolrInputDocument().getField(VersionInfo.VERSION_FIELD);
if (versionField != null) {
Object o = versionField.getValue();
versionOnUpdate = o instanceof Number ? ((Number) o).longValue() : Long.parseLong(o.toString());
} else {
// Find the version
String versionOnUpdateS = req.getParams().get(VERSION_FIELD);
versionOnUpdate = versionOnUpdateS == null ? 0 : Long.parseLong(versionOnUpdateS);
}
}
boolean isReplay = (cmd.getFlags() & UpdateCommand.REPLAY) != 0;
boolean leaderLogic = isLeader && !isReplay;
VersionBucket bucket = vinfo.bucket(bucketHash);
vinfo.lockForUpdate();
try {
synchronized (bucket) {
// we obtain the version when synchronized and then do the add so we can ensure that
// if version1 < version2 then version1 is actually added before version2.
// even if we don't store the version field, synchronizing on the bucket
// will enable us to know what version happened first, and thus enable
// realtime-get to work reliably.
// TODO: if versions aren't stored, do we need to set on the cmd anyway for some reason?
// there may be other reasons in the future for a version on the commands
if (versionsStored) {
long bucketVersion = bucket.highest;
if (leaderLogic) {
boolean updated = getUpdatedDocument(cmd);
if (updated && versionOnUpdate == -1) {
versionOnUpdate = 1; // implied "doc must exist" for now...
}
if (versionOnUpdate != 0) {
Long lastVersion = vinfo.lookupVersion(cmd.getIndexedId());
long foundVersion = lastVersion == null ? -1 : lastVersion;
if ( versionOnUpdate == foundVersion || (versionOnUpdate < 0 && foundVersion < 0) || (versionOnUpdate==1 && foundVersion > 0) ) {
// we're ok if versions match, or if both are negative (all missing docs are equal), or if cmd
// specified it must exist (versionOnUpdate==1) and it does.
} else {
throw new SolrException(ErrorCode.CONFLICT, "version conflict for " + cmd.getPrintableId() + " expected=" + versionOnUpdate + " actual=" + foundVersion);
}
}
long version = vinfo.getNewClock();
cmd.setVersion(version);
cmd.getSolrInputDocument().setField(VersionInfo.VERSION_FIELD, version);
bucket.updateHighest(version);
} else {
// The leader forwarded us this update.
cmd.setVersion(versionOnUpdate);
if (ulog.getState() != UpdateLog.State.ACTIVE && (cmd.getFlags() & UpdateCommand.REPLAY) == 0) {
// we're not in an active state, and this update isn't from a replay, so buffer it.
cmd.setFlags(cmd.getFlags() | UpdateCommand.BUFFERING);
ulog.add(cmd);
return true;
}
// if we aren't the leader, then we need to check that updates were not re-ordered
if (bucketVersion != 0 && bucketVersion < versionOnUpdate) {
// we're OK... this update has a version higher than anything we've seen
// in this bucket so far, so we know that no reordering has yet occured.
bucket.updateHighest(versionOnUpdate);
} else {
// there have been updates higher than the current update. we need to check
// the specific version for this id.
Long lastVersion = vinfo.lookupVersion(cmd.getIndexedId());
if (lastVersion != null && Math.abs(lastVersion) >= versionOnUpdate) {
// This update is a repeat, or was reordered. We need to drop this update.
return true;
}
}
}
}
doLocalAdd(cmd);
} // end synchronized (bucket)
} finally {
vinfo.unlockForUpdate();
}
return false;
}
// in solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
boolean getUpdatedDocument(AddUpdateCommand cmd) throws IOException {
SolrInputDocument sdoc = cmd.getSolrInputDocument();
boolean update = false;
for (SolrInputField sif : sdoc.values()) {
if (sif.getValue() instanceof Map) {
update = true;
break;
}
}
if (!update) return false;
BytesRef id = cmd.getIndexedId();
SolrInputDocument oldDoc = RealTimeGetComponent.getInputDocument(cmd.getReq().getCore(), id);
if (oldDoc == null) {
// not found... allow this in the future (depending on the details of the update, or if the user explicitly sets it).
// could also just not change anything here and let the optimistic locking throw the error
throw new SolrException(ErrorCode.CONFLICT, "Document not found for update. id=" + cmd.getPrintableId());
}
oldDoc.remove(VERSION_FIELD);
for (SolrInputField sif : sdoc.values()) {
Object val = sif.getValue();
if (val instanceof Map) {
for (Entry<String,Object> entry : ((Map<String,Object>) val).entrySet()) {
String key = entry.getKey();
Object fieldVal = entry.getValue();
if ("add".equals(key)) {
oldDoc.addField( sif.getName(), fieldVal, sif.getBoost());
} else if ("set".equals(key)) {
oldDoc.setField(sif.getName(), fieldVal, sif.getBoost());
} else if ("inc".equals(key)) {
SolrInputField numericField = oldDoc.get(sif.getName());
if (numericField == null) {
oldDoc.setField(sif.getName(), fieldVal, sif.getBoost());
} else {
// TODO: fieldtype needs externalToObject?
String oldValS = numericField.getFirstValue().toString();
SchemaField sf = cmd.getReq().getSchema().getField(sif.getName());
BytesRef term = new BytesRef();
sf.getType().readableToIndexed(oldValS, term);
Object oldVal = sf.getType().toObject(sf, term);
String fieldValS = fieldVal.toString();
Number result;
if (oldVal instanceof Long) {
result = ((Long) oldVal).longValue() + Long.parseLong(fieldValS);
} else if (oldVal instanceof Float) {
result = ((Float) oldVal).floatValue() + Float.parseFloat(fieldValS);
} else if (oldVal instanceof Double) {
result = ((Double) oldVal).doubleValue() + Double.parseDouble(fieldValS);
} else {
// int, short, byte
result = ((Integer) oldVal).intValue() + Integer.parseInt(fieldValS);
}
oldDoc.setField(sif.getName(), result, sif.getBoost());
}
}
}
} else {
// normal fields are treated as a "set"
oldDoc.put(sif.getName(), sif);
}
}
cmd.solrDoc = oldDoc;
return true;
}
// in solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
Override
public void processDelete(DeleteUpdateCommand cmd) throws IOException {
if (!cmd.isDeleteById()) {
doDeleteByQuery(cmd);
return;
}
int hash = 0;
if (zkEnabled) {
zkCheck();
hash = hash(cmd);
nodes = setupRequest(hash);
} else {
isLeader = getNonZkLeaderAssumption(req);
}
boolean dropCmd = false;
if (!forwardToLeader) {
dropCmd = versionDelete(cmd);
}
if (dropCmd) {
// TODO: do we need to add anything to the response?
return;
}
ModifiableSolrParams params = null;
if (nodes != null) {
params = new ModifiableSolrParams(req.getParams());
params.set(DISTRIB_UPDATE_PARAM,
(isLeader ?
DistribPhase.FROMLEADER.toString() :
DistribPhase.TOLEADER.toString()));
params.remove("commit"); // we already will have forwarded this from our local commit
cmdDistrib.distribDelete(cmd, nodes, params);
}
// cmd.getIndexId == null when delete by query
// TODO: what to do when no idField?
if (returnVersions && rsp != null && cmd.getIndexedId() != null && idField != null) {
if (deleteResponse == null) {
deleteResponse = new NamedList<String>();
rsp.add("deletes",deleteResponse);
}
if (scratch == null) scratch = new CharsRef();
idField.getType().indexedToReadable(cmd.getIndexedId(), scratch);
deleteResponse.add(scratch.toString(), cmd.getVersion()); // we're returning the version of the delete.. not the version of the doc we deleted.
}
}
// in solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
public void doDeleteByQuery(DeleteUpdateCommand cmd) throws IOException {
// even in non zk mode, tests simulate updates from a leader
if(!zkEnabled) {
isLeader = getNonZkLeaderAssumption(req);
} else {
zkCheck();
}
// NONE: we are the first to receive this deleteByQuery
// - it must be forwarded to the leader of every shard
// TO: we are a leader receiving a forwarded deleteByQuery... we must:
// - block all updates (use VersionInfo)
// - flush *all* updates going to our replicas
// - forward the DBQ to our replicas and wait for the response
// - log + execute the local DBQ
// FROM: we are a replica receiving a DBQ from our leader
// - log + execute the local DBQ
DistribPhase phase =
DistribPhase.parseParam(req.getParams().get(DISTRIB_UPDATE_PARAM));
if (zkEnabled && DistribPhase.NONE == phase) {
boolean leaderForAnyShard = false; // start off by assuming we are not a leader for any shard
Map<String,Slice> slices = zkController.getCloudState().getSlices(collection);
if (slices == null) {
throw new SolrException(ErrorCode.BAD_REQUEST,
"Cannot find collection:" + collection + " in "
+ zkController.getCloudState().getCollections());
}
ModifiableSolrParams params = new ModifiableSolrParams(req.getParams());
params.set(DISTRIB_UPDATE_PARAM, DistribPhase.TOLEADER.toString());
List<Node> leaders = new ArrayList<Node>(slices.size());
for (Map.Entry<String,Slice> sliceEntry : slices.entrySet()) {
String sliceName = sliceEntry.getKey();
ZkNodeProps leaderProps;
try {
leaderProps = zkController.getZkStateReader().getLeaderProps(collection, sliceName);
} catch (InterruptedException e) {
throw new SolrException(ErrorCode.SERVER_ERROR, "Exception finding leader for shard " + sliceName, e);
}
// TODO: What if leaders changed in the meantime?
// should we send out slice-at-a-time and if a node returns "hey, I'm not a leader" (or we get an error because it went down) then look up the new leader?
// Am I the leader for this slice?
ZkCoreNodeProps coreLeaderProps = new ZkCoreNodeProps(leaderProps);
String leaderNodeName = coreLeaderProps.getCoreNodeName();
String coreName = req.getCore().getName();
String coreNodeName = zkController.getNodeName() + "_" + coreName;
isLeader = coreNodeName.equals(leaderNodeName);
if (isLeader) {
// don't forward to ourself
leaderForAnyShard = true;
} else {
leaders.add(new StdNode(coreLeaderProps));
}
}
params.remove("commit"); // this will be distributed from the local commit
cmdDistrib.distribDelete(cmd, leaders, params);
if (!leaderForAnyShard) {
return;
}
// change the phase to TOLEADER so we look up and forward to our own replicas (if any)
phase = DistribPhase.TOLEADER;
}
List<Node> replicas = null;
if (zkEnabled && DistribPhase.TOLEADER == phase) {
// This core should be a leader
replicas = setupRequest();
}
if (vinfo == null) {
super.processDelete(cmd);
return;
}
// at this point, there is an update we need to try and apply.
// we may or may not be the leader.
// Find the version
long versionOnUpdate = cmd.getVersion();
if (versionOnUpdate == 0) {
String versionOnUpdateS = req.getParams().get(VERSION_FIELD);
versionOnUpdate = versionOnUpdateS == null ? 0 : Long.parseLong(versionOnUpdateS);
}
versionOnUpdate = Math.abs(versionOnUpdate); // normalize to positive version
boolean isReplay = (cmd.getFlags() & UpdateCommand.REPLAY) != 0;
boolean leaderLogic = isLeader && !isReplay;
if (!leaderLogic && versionOnUpdate==0) {
throw new SolrException(ErrorCode.BAD_REQUEST, "missing _version_ on update from leader");
}
vinfo.blockUpdates();
try {
if (versionsStored) {
if (leaderLogic) {
long version = vinfo.getNewClock();
cmd.setVersion(-version);
// TODO update versions in all buckets
doLocalDelete(cmd);
} else {
cmd.setVersion(-versionOnUpdate);
if (ulog.getState() != UpdateLog.State.ACTIVE && (cmd.getFlags() & UpdateCommand.REPLAY) == 0) {
// we're not in an active state, and this update isn't from a replay, so buffer it.
cmd.setFlags(cmd.getFlags() | UpdateCommand.BUFFERING);
ulog.deleteByQuery(cmd);
return;
}
doLocalDelete(cmd);
}
}
// since we don't know which documents were deleted, the easiest thing to do is to invalidate
// all real-time caches (i.e. UpdateLog) which involves also getting a new version of the IndexReader
// (so cache misses will see up-to-date data)
} finally {
vinfo.unblockUpdates();
}
// TODO: need to handle reorders to replicas somehow
// forward to all replicas
if (leaderLogic && replicas != null) {
ModifiableSolrParams params = new ModifiableSolrParams(req.getParams());
params.set(VERSION_FIELD, Long.toString(cmd.getVersion()));
params.set(DISTRIB_UPDATE_PARAM, DistribPhase.FROMLEADER.toString());
cmdDistrib.distribDelete(cmd, replicas, params);
cmdDistrib.finish();
}
if (returnVersions && rsp != null) {
if (deleteByQueryResponse == null) {
deleteByQueryResponse = new NamedList<String>();
rsp.add("deleteByQuery",deleteByQueryResponse);
}
deleteByQueryResponse.add(cmd.getQuery(), cmd.getVersion());
}
}
// in solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
private boolean versionDelete(DeleteUpdateCommand cmd) throws IOException {
BytesRef idBytes = cmd.getIndexedId();
if (vinfo == null || idBytes == null) {
super.processDelete(cmd);
return false;
}
// This is only the hash for the bucket, and must be based only on the uniqueKey (i.e. do not use a pluggable hash here)
int bucketHash = Hash.murmurhash3_x86_32(idBytes.bytes, idBytes.offset, idBytes.length, 0);
// at this point, there is an update we need to try and apply.
// we may or may not be the leader.
// Find the version
long versionOnUpdate = cmd.getVersion();
if (versionOnUpdate == 0) {
String versionOnUpdateS = req.getParams().get(VERSION_FIELD);
versionOnUpdate = versionOnUpdateS == null ? 0 : Long.parseLong(versionOnUpdateS);
}
long signedVersionOnUpdate = versionOnUpdate;
versionOnUpdate = Math.abs(versionOnUpdate); // normalize to positive version
boolean isReplay = (cmd.getFlags() & UpdateCommand.REPLAY) != 0;
boolean leaderLogic = isLeader && !isReplay;
if (!leaderLogic && versionOnUpdate==0) {
throw new SolrException(ErrorCode.BAD_REQUEST, "missing _version_ on update from leader");
}
VersionBucket bucket = vinfo.bucket(bucketHash);
vinfo.lockForUpdate();
try {
synchronized (bucket) {
if (versionsStored) {
long bucketVersion = bucket.highest;
if (leaderLogic) {
if (signedVersionOnUpdate != 0) {
Long lastVersion = vinfo.lookupVersion(cmd.getIndexedId());
long foundVersion = lastVersion == null ? -1 : lastVersion;
if ( (signedVersionOnUpdate == foundVersion) || (signedVersionOnUpdate < 0 && foundVersion < 0) || (signedVersionOnUpdate == 1 && foundVersion > 0) ) {
// we're ok if versions match, or if both are negative (all missing docs are equal), or if cmd
// specified it must exist (versionOnUpdate==1) and it does.
} else {
throw new SolrException(ErrorCode.CONFLICT, "version conflict for " + cmd.getId() + " expected=" + signedVersionOnUpdate + " actual=" + foundVersion);
}
}
long version = vinfo.getNewClock();
cmd.setVersion(-version);
bucket.updateHighest(version);
} else {
cmd.setVersion(-versionOnUpdate);
if (ulog.getState() != UpdateLog.State.ACTIVE && (cmd.getFlags() & UpdateCommand.REPLAY) == 0) {
// we're not in an active state, and this update isn't from a replay, so buffer it.
cmd.setFlags(cmd.getFlags() | UpdateCommand.BUFFERING);
ulog.delete(cmd);
return true;
}
// if we aren't the leader, then we need to check that updates were not re-ordered
if (bucketVersion != 0 && bucketVersion < versionOnUpdate) {
// we're OK... this update has a version higher than anything we've seen
// in this bucket so far, so we know that no reordering has yet occured.
bucket.updateHighest(versionOnUpdate);
} else {
// there have been updates higher than the current update. we need to check
// the specific version for this id.
Long lastVersion = vinfo.lookupVersion(cmd.getIndexedId());
if (lastVersion != null && Math.abs(lastVersion) >= versionOnUpdate) {
// This update is a repeat, or was reordered. We need to drop this update.
return true;
}
}
}
}
doLocalDelete(cmd);
return false;
} // end synchronized (bucket)
} finally {
vinfo.unlockForUpdate();
}
}
// in solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
Override
public void processCommit(CommitUpdateCommand cmd) throws IOException {
if (zkEnabled) {
zkCheck();
}
if (vinfo != null) {
vinfo.lockForUpdate();
}
try {
if (ulog == null || ulog.getState() == UpdateLog.State.ACTIVE || (cmd.getFlags() & UpdateCommand.REPLAY) != 0) {
super.processCommit(cmd);
} else {
log.info("Ignoring commit while not ACTIVE - state: " + ulog.getState() + " replay:" + (cmd.getFlags() & UpdateCommand.REPLAY));
}
} finally {
if (vinfo != null) {
vinfo.unlockForUpdate();
}
}
// TODO: we should consider this? commit everyone in the current collection
if (zkEnabled) {
ModifiableSolrParams params = new ModifiableSolrParams(req.getParams());
if (!params.getBool(COMMIT_END_POINT, false)) {
params.set(COMMIT_END_POINT, true);
String nodeName = req.getCore().getCoreDescriptor().getCoreContainer()
.getZkController().getNodeName();
String shardZkNodeName = nodeName + "_" + req.getCore().getName();
List<Node> nodes = getCollectionUrls(req, req.getCore().getCoreDescriptor()
.getCloudDescriptor().getCollectionName(), shardZkNodeName);
if (nodes != null) {
cmdDistrib.distribCommit(cmd, nodes, params);
finish();
}
}
}
}
// in solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
Override
public void finish() throws IOException {
doFinish();
if (next != null && nodes == null) next.finish();
}
// in solr/core/src/java/org/apache/solr/update/processor/RunUpdateProcessorFactory.java
Override
public void processAdd(AddUpdateCommand cmd) throws IOException {
updateHandler.addDoc(cmd);
super.processAdd(cmd);
changesSinceCommit = true;
}
// in solr/core/src/java/org/apache/solr/update/processor/RunUpdateProcessorFactory.java
Override
public void processDelete(DeleteUpdateCommand cmd) throws IOException {
if( cmd.isDeleteById()) {
updateHandler.delete(cmd);
}
else {
updateHandler.deleteByQuery(cmd);
}
super.processDelete(cmd);
changesSinceCommit = true;
}
// in solr/core/src/java/org/apache/solr/update/processor/RunUpdateProcessorFactory.java
Override
public void processMergeIndexes(MergeIndexesCommand cmd) throws IOException {
updateHandler.mergeIndexes(cmd);
super.processMergeIndexes(cmd);
}
// in solr/core/src/java/org/apache/solr/update/processor/RunUpdateProcessorFactory.java
Override
public void processCommit(CommitUpdateCommand cmd) throws IOException
{
updateHandler.commit(cmd);
super.processCommit(cmd);
changesSinceCommit = false;
}
// in solr/core/src/java/org/apache/solr/update/processor/RunUpdateProcessorFactory.java
Override
public void processRollback(RollbackUpdateCommand cmd) throws IOException
{
updateHandler.rollback(cmd);
super.processRollback(cmd);
changesSinceCommit = false;
}
// in solr/core/src/java/org/apache/solr/update/processor/RunUpdateProcessorFactory.java
Override
public void finish() throws IOException {
if (changesSinceCommit && updateHandler.getUpdateLog() != null) {
updateHandler.getUpdateLog().finish(null);
}
super.finish();
}
// in solr/core/src/java/org/apache/solr/update/processor/UpdateRequestProcessor.java
public void processAdd(AddUpdateCommand cmd) throws IOException {
if (next != null) next.processAdd(cmd);
}
// in solr/core/src/java/org/apache/solr/update/processor/UpdateRequestProcessor.java
public void processDelete(DeleteUpdateCommand cmd) throws IOException {
if (next != null) next.processDelete(cmd);
}
// in solr/core/src/java/org/apache/solr/update/processor/UpdateRequestProcessor.java
public void processMergeIndexes(MergeIndexesCommand cmd) throws IOException {
if (next != null) next.processMergeIndexes(cmd);
}
// in solr/core/src/java/org/apache/solr/update/processor/UpdateRequestProcessor.java
public void processCommit(CommitUpdateCommand cmd) throws IOException
{
if (next != null) next.processCommit(cmd);
}
// in solr/core/src/java/org/apache/solr/update/processor/UpdateRequestProcessor.java
public void processRollback(RollbackUpdateCommand cmd) throws IOException
{
if (next != null) next.processRollback(cmd);
}
// in solr/core/src/java/org/apache/solr/update/processor/UpdateRequestProcessor.java
public void finish() throws IOException {
if (next != null) next.finish();
}
// in solr/core/src/java/org/apache/solr/update/processor/FieldMutatingUpdateProcessor.java
Override
public void processAdd(AddUpdateCommand cmd) throws IOException {
final SolrInputDocument doc = cmd.getSolrInputDocument();
// make a copy we can iterate over while mutating the doc
final Collection<String> fieldNames
= new ArrayList<String>(doc.getFieldNames());
for (final String fname : fieldNames) {
if (! selector.shouldMutate(fname)) continue;
final SolrInputField src = doc.get(fname);
SolrInputField dest = null;
try {
dest = mutate(src);
} catch (SolrException e) {
String msg = "Unable to mutate field '"+fname+"': "+e.getMessage();
SolrException.log(log, msg, e);
throw new SolrException(BAD_REQUEST, msg, e);
}
if (null == dest) {
doc.remove(fname);
} else {
// semantics of what happens if dest has diff name are hard
// we could treat it as a copy, or a rename
// for now, don't allow it.
if (! fname.equals(dest.getName()) ) {
throw new SolrException(SERVER_ERROR,
"mutute returned field with different name: "
+ fname + " => " + dest.getName());
}
doc.put(dest.getName(), dest);
}
}
super.processAdd(cmd);
}
// in solr/core/src/java/org/apache/solr/update/processor/URLClassifyProcessor.java
Override
public void processAdd(AddUpdateCommand command) throws IOException {
if (isEnabled()) {
SolrInputDocument document = command.getSolrInputDocument();
if (document.containsKey(urlFieldname)) {
String url = (String) document.getFieldValue(urlFieldname);
try {
URL normalizedURL = getNormalizedURL(url);
document.setField(lengthFieldname, length(normalizedURL));
document.setField(levelsFieldname, levels(normalizedURL));
document.setField(toplevelpageFieldname, isTopLevelPage(normalizedURL) ? 1 : 0);
document.setField(landingpageFieldname, isLandingPage(normalizedURL) ? 1 : 0);
if (domainFieldname != null) {
document.setField(domainFieldname, normalizedURL.getHost());
}
if (canonicalUrlFieldname != null) {
document.setField(canonicalUrlFieldname, getCanonicalUrl(normalizedURL));
}
log.debug(document.toString());
} catch (MalformedURLException e) {
log.warn("cannot get the normalized url for \"" + url + "\" due to " + e.getMessage());
} catch (URISyntaxException e) {
log.warn("cannot get the normalized url for \"" + url + "\" due to " + e.getMessage());
}
}
}
super.processAdd(command);
}
// in solr/core/src/java/org/apache/solr/update/processor/SignatureUpdateProcessorFactory.java
Override
public void processAdd(AddUpdateCommand cmd) throws IOException {
if (enabled) {
SolrInputDocument doc = cmd.getSolrInputDocument();
List<String> currDocSigFields = null;
if (sigFields == null || sigFields.size() == 0) {
Collection<String> docFields = doc.getFieldNames();
currDocSigFields = new ArrayList<String>(docFields.size());
currDocSigFields.addAll(docFields);
Collections.sort(currDocSigFields);
} else {
currDocSigFields = sigFields;
}
Signature sig = req.getCore().getResourceLoader().newInstance(signatureClass, Signature.class);
sig.init(params);
for (String field : currDocSigFields) {
SolrInputField f = doc.getField(field);
if (f != null) {
sig.add(field);
Object o = f.getValue();
if (o instanceof Collection) {
for (Object oo : (Collection)o) {
sig.add(String.valueOf(oo));
}
} else {
sig.add(String.valueOf(o));
}
}
}
byte[] signature = sig.getSignature();
char[] arr = new char[signature.length<<1];
for (int i=0; i<signature.length; i++) {
int b = signature[i];
int idx = i<<1;
arr[idx]= StrUtils.HEX_DIGITS[(b >> 4) & 0xf];
arr[idx+1]= StrUtils.HEX_DIGITS[b & 0xf];
}
String sigString = new String(arr);
doc.addField(signatureField, sigString);
if (overwriteDupes) {
cmd.updateTerm = new Term(signatureField, sigString);
}
}
if (next != null)
next.processAdd(cmd);
}
// in solr/core/src/java/org/apache/solr/update/processor/LogUpdateProcessorFactory.java
Override
public void processAdd(AddUpdateCommand cmd) throws IOException {
if (logDebug) { log.debug("PRE_UPDATE " + cmd.toString()); }
// call delegate first so we can log things like the version that get set later
if (next != null) next.processAdd(cmd);
// Add a list of added id's to the response
if (adds == null) {
adds = new ArrayList<String>();
toLog.add("add",adds);
}
if (adds.size() < maxNumToLog) {
long version = cmd.getVersion();
String msg = cmd.getPrintableId();
if (version != 0) msg = msg + " (" + version + ')';
adds.add(msg);
}
numAdds++;
}
// in solr/core/src/java/org/apache/solr/update/processor/LogUpdateProcessorFactory.java
Override
public void processDelete( DeleteUpdateCommand cmd ) throws IOException {
if (logDebug) { log.debug("PRE_UPDATE " + cmd.toString()); }
if (next != null) next.processDelete(cmd);
if (cmd.isDeleteById()) {
if (deletes == null) {
deletes = new ArrayList<String>();
toLog.add("delete",deletes);
}
if (deletes.size() < maxNumToLog) {
long version = cmd.getVersion();
String msg = cmd.getId();
if (version != 0) msg = msg + " (" + version + ')';
deletes.add(msg);
}
} else {
if (toLog.size() < maxNumToLog) {
long version = cmd.getVersion();
String msg = cmd.query;
if (version != 0) msg = msg + " (" + version + ')';
toLog.add("deleteByQuery", msg);
}
}
numDeletes++;
}
// in solr/core/src/java/org/apache/solr/update/processor/LogUpdateProcessorFactory.java
Override
public void processMergeIndexes(MergeIndexesCommand cmd) throws IOException {
if (logDebug) { log.debug("PRE_UPDATE " + cmd.toString()); }
if (next != null) next.processMergeIndexes(cmd);
toLog.add("mergeIndexes", cmd.toString());
}
// in solr/core/src/java/org/apache/solr/update/processor/LogUpdateProcessorFactory.java
Override
public void processCommit( CommitUpdateCommand cmd ) throws IOException {
if (logDebug) { log.debug("PRE_UPDATE " + cmd.toString()); }
if (next != null) next.processCommit(cmd);
final String msg = cmd.optimize ? "optimize" : "commit";
toLog.add(msg, "");
}
// in solr/core/src/java/org/apache/solr/update/processor/LogUpdateProcessorFactory.java
Override
public void processRollback( RollbackUpdateCommand cmd ) throws IOException {
if (logDebug) { log.debug("PRE_UPDATE " + cmd.toString()); }
if (next != null) next.processRollback(cmd);
toLog.add("rollback", "");
}
// in solr/core/src/java/org/apache/solr/update/processor/LogUpdateProcessorFactory.java
Override
public void finish() throws IOException {
if (logDebug) { log.debug("PRE_UPDATE finish()"); }
if (next != null) next.finish();
// LOG A SUMMARY WHEN ALL DONE (INFO LEVEL)
NamedList<Object> stdLog = rsp.getToLog();
StringBuilder sb = new StringBuilder(req.getCore().getLogId());
for (int i=0; i<stdLog.size(); i++) {
String name = stdLog.getName(i);
Object val = stdLog.getVal(i);
if (name != null) {
sb.append(name).append('=');
}
sb.append(val).append(' ');
}
stdLog.clear(); // make it so SolrCore.exec won't log this again
// if id lists were truncated, show how many more there were
if (adds != null && numAdds > maxNumToLog) {
adds.add("... (" + numAdds + " adds)");
}
if (deletes != null && numDeletes > maxNumToLog) {
deletes.add("... (" + numDeletes + " deletes)");
}
long elapsed = rsp.getEndTime() - req.getStartTime();
sb.append(toLog).append(" 0 ").append(elapsed);
log.info(sb.toString());
}
// in solr/core/src/java/org/apache/solr/update/processor/UniqFieldsUpdateProcessorFactory.java
Override
public void processAdd(AddUpdateCommand cmd) throws IOException {
if(fields != null){
SolrInputDocument solrInputDocument = cmd.getSolrInputDocument();
List<Object> uniqList = new ArrayList<Object>();
for (String field : fields) {
uniqList.clear();
Collection<Object> col = solrInputDocument.getFieldValues(field);
if (col != null) {
for (Object o : col) {
if(!uniqList.contains(o))
uniqList.add(o);
}
solrInputDocument.remove(field);
for (Object o : uniqList) {
solrInputDocument.addField(field, o);
}
}
}
}
super.processAdd(cmd);
}
// in solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java
Override
public synchronized IndexWriter getIndexWriter(SolrCore core) throws IOException {
if (indexWriter == null) {
indexWriter = createMainIndexWriter(core, "DirectUpdateHandler2", false, false);
}
return indexWriter;
}
// in solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java
Override
public synchronized void newIndexWriter(SolrCore core) throws IOException {
if (indexWriter != null) {
indexWriter.close();
}
indexWriter = createMainIndexWriter(core, "DirectUpdateHandler2",
false, true);
}
// in solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java
Override
public void decref(IndexWriterCloser closer) throws IOException {
synchronized (this) {
refCnt--;
if (refCnt == 0) {
try {
if (closer != null) {
closer.closeWriter(indexWriter);
} else if (indexWriter != null) {
indexWriter.close();
}
} catch (Throwable t) {
log.error("Error during shutdown of writer.", t);
}
try {
directoryFactory.close();
} catch (Throwable t) {
log.error("Error during shutdown of directory factory.", t);
}
try {
cancelRecovery();
} catch (Throwable t) {
log.error("Error cancelling recovery", t);
}
closed = true;
}
}
}
// in solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java
Override
public synchronized void rollbackIndexWriter(SolrCore core) throws IOException {
indexWriter.rollback();
newIndexWriter(core);
}
// in solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java
protected SolrIndexWriter createMainIndexWriter(SolrCore core, String name,
boolean removeAllExisting, boolean forceNewDirectory) throws IOException {
return new SolrIndexWriter(name, core.getNewIndexDir(),
core.getDirectoryFactory(), removeAllExisting, core.getSchema(),
core.getSolrConfig().indexConfig, core.getDeletionPolicy(), core.getCodec(), forceNewDirectory);
}
// in solr/core/src/java/org/apache/solr/update/SolrIndexWriter.java
private static InfoStream toInfoStream(SolrIndexConfig config) throws IOException {
String infoStreamFile = config.infoStreamFile;
if (infoStreamFile != null) {
File f = new File(infoStreamFile);
File parent = f.getParentFile();
if (parent != null) parent.mkdirs();
FileOutputStream fos = new FileOutputStream(f, true);
return new PrintStreamInfoStream(new PrintStream(fos, true));
} else {
return InfoStream.NO_OUTPUT;
}
}
// in solr/core/src/java/org/apache/solr/update/SolrIndexWriter.java
Override
public void close() throws IOException {
log.debug("Closing Writer " + name);
Directory directory = getDirectory();
final InfoStream infoStream = isClosed ? null : getConfig().getInfoStream();
try {
super.close();
if(infoStream != null) {
infoStream.close();
}
} finally {
isClosed = true;
directoryFactory.release(directory);
numCloses.incrementAndGet();
}
}
// in solr/core/src/java/org/apache/solr/update/SolrIndexWriter.java
Override
public void rollback() throws IOException {
try {
super.rollback();
} finally {
isClosed = true;
}
}
// in solr/core/src/java/org/apache/solr/update/SolrCmdDistributor.java
public void distribDelete(DeleteUpdateCommand cmd, List<Node> urls, ModifiableSolrParams params) throws IOException {
checkResponses(false);
if (cmd.isDeleteById()) {
doDelete(cmd, urls, params);
} else {
doDelete(cmd, urls, params);
}
}
// in solr/core/src/java/org/apache/solr/update/SolrCmdDistributor.java
public void distribAdd(AddUpdateCommand cmd, List<Node> nodes, ModifiableSolrParams params) throws IOException {
checkResponses(false);
// make sure any pending deletes are flushed
flushDeletes(1);
// TODO: this is brittle
// need to make a clone since these commands may be reused
AddUpdateCommand clone = new AddUpdateCommand(null);
clone.solrDoc = cmd.solrDoc;
clone.commitWithin = cmd.commitWithin;
clone.overwrite = cmd.overwrite;
clone.setVersion(cmd.getVersion());
AddRequest addRequest = new AddRequest();
addRequest.cmd = clone;
addRequest.params = params;
for (Node node : nodes) {
List<AddRequest> alist = adds.get(node);
if (alist == null) {
alist = new ArrayList<AddRequest>(2);
adds.put(node, alist);
}
alist.add(addRequest);
}
flushAdds(maxBufferedAddsPerServer);
}
// in solr/core/src/java/org/apache/solr/update/SolrCmdDistributor.java
public void distribCommit(CommitUpdateCommand cmd, List<Node> nodes,
ModifiableSolrParams params) throws IOException {
// Wait for all outstanding responses to make sure that a commit
// can't sneak in ahead of adds or deletes we already sent.
// We could do this on a per-server basis, but it's more complex
// and this solution will lead to commits happening closer together.
checkResponses(true);
// currently, we dont try to piggy back on outstanding adds or deletes
UpdateRequestExt ureq = new UpdateRequestExt();
ureq.setParams(params);
addCommit(ureq, cmd);
for (Node node : nodes) {
submit(ureq, node);
}
// if the command wanted to block until everything was committed,
// then do that here.
if (cmd.waitSearcher) {
checkResponses(true);
}
}
// in solr/core/src/java/org/apache/solr/update/SolrCmdDistributor.java
private void doDelete(DeleteUpdateCommand cmd, List<Node> nodes,
ModifiableSolrParams params) throws IOException {
flushAdds(1);
DeleteUpdateCommand clonedCmd = clone(cmd);
DeleteRequest deleteRequest = new DeleteRequest();
deleteRequest.cmd = clonedCmd;
deleteRequest.params = params;
for (Node node : nodes) {
List<DeleteRequest> dlist = deletes.get(node);
if (dlist == null) {
dlist = new ArrayList<DeleteRequest>(2);
deletes.put(node, dlist);
}
dlist.add(deleteRequest);
}
flushDeletes(maxBufferedDeletesPerServer);
}
// in solr/core/src/java/org/apache/solr/update/TransactionLog.java
Override
public Object resolve(Object o, JavaBinCodec codec) throws IOException {
if (o instanceof BytesRef) {
BytesRef br = (BytesRef)o;
codec.writeByteArray(br.bytes, br.offset, br.length);
return null;
}
return o;
}
// in solr/core/src/java/org/apache/solr/update/TransactionLog.java
Override
public void writeExternString(String s) throws IOException {
if (s == null) {
writeTag(NULL);
return;
}
// no need to synchronize globalStringMap - it's only updated before the first record is written to the log
Integer idx = globalStringMap.get(s);
if (idx == null) {
// write a normal string
writeStr(s);
} else {
// write the extern string
writeTag(EXTERN_STRING, idx);
}
}
// in solr/core/src/java/org/apache/solr/update/TransactionLog.java
Override
public String readExternString(FastInputStream fis) throws IOException {
int idx = readSize(fis);
if (idx != 0) {// idx != 0 is the index of the extern string
// no need to synchronize globalStringList - it's only updated before the first record is written to the log
return globalStringList.get(idx - 1);
} else {// idx == 0 means it has a string value
// this shouldn't happen with this codec subclass.
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Corrupt transaction log");
}
}
// in solr/core/src/java/org/apache/solr/update/TransactionLog.java
public boolean endsWithCommit() throws IOException {
long size;
synchronized (this) {
fos.flush();
size = fos.size();
}
// the end of the file should have the end message (added during a commit) plus a 4 byte size
byte[] buf = new byte[ END_MESSAGE.length() ];
long pos = size - END_MESSAGE.length() - 4;
if (pos < 0) return false;
ChannelFastInputStream is = new ChannelFastInputStream(channel, pos);
is.read(buf);
for (int i=0; i<buf.length; i++) {
if (buf[i] != END_MESSAGE.charAt(i)) return false;
}
return true;
}
// in solr/core/src/java/org/apache/solr/update/TransactionLog.java
public void rollback(long pos) throws IOException {
synchronized (this) {
assert snapshot_size == pos;
fos.flush();
raf.setLength(pos);
fos.setWritten(pos);
assert fos.size() == pos;
numRecords = snapshot_numRecords;
}
}
// in solr/core/src/java/org/apache/solr/update/TransactionLog.java
private void readHeader(FastInputStream fis) throws IOException {
// read existing header
fis = fis != null ? fis : new ChannelFastInputStream(channel, 0);
LogCodec codec = new LogCodec();
Map header = (Map)codec.unmarshal(fis);
fis.readInt(); // skip size
// needed to read other records
synchronized (this) {
globalStringList = (List<String>)header.get("strings");
globalStringMap = new HashMap<String, Integer>(globalStringList.size());
for (int i=0; i<globalStringList.size(); i++) {
globalStringMap.put( globalStringList.get(i), i+1);
}
}
}
// in solr/core/src/java/org/apache/solr/update/TransactionLog.java
private void writeLogHeader(LogCodec codec) throws IOException {
long pos = fos.size();
assert pos == 0;
Map header = new LinkedHashMap<String,Object>();
header.put("SOLR_TLOG",1); // a magic string + version number
header.put("strings",globalStringList);
codec.marshal(header, fos);
endRecord(pos);
}
// in solr/core/src/java/org/apache/solr/update/TransactionLog.java
private void endRecord(long startRecordPosition) throws IOException {
fos.writeInt((int)(fos.size() - startRecordPosition));
numRecords++;
}
// in solr/core/src/java/org/apache/solr/update/TransactionLog.java
public ReverseReader getReverseReader() throws IOException {
return new ReverseReader();
}
// in solr/core/src/java/org/apache/solr/update/TransactionLog.java
public Object next() throws IOException, InterruptedException {
long pos = fis.position();
synchronized (TransactionLog.this) {
if (trace) {
log.trace("Reading log record. pos="+pos+" currentSize="+fos.size());
}
if (pos >= fos.size()) {
return null;
}
fos.flushBuffer();
}
if (pos == 0) {
readHeader(fis);
// shouldn't currently happen - header and first record are currently written at the same time
synchronized (TransactionLog.this) {
if (fis.position() >= fos.size()) {
return null;
}
pos = fis.position();
}
}
Object o = codec.readVal(fis);
// skip over record size
int size = fis.readInt();
assert size == fis.position() - pos - 4;
return o;
}
// in solr/core/src/java/org/apache/solr/update/TransactionLog.java
Override
public SolrInputDocument readSolrInputDocument(FastInputStream dis) throws IOException {
// Given that the SolrInputDocument is last in an add record, it's OK to just skip
// reading it completely.
return null;
}
// in solr/core/src/java/org/apache/solr/update/TransactionLog.java
public Object next() throws IOException {
if (prevPos <= 0) return null;
long endOfThisRecord = prevPos;
int thisLength = nextLength;
long recordStart = prevPos - thisLength; // back up to the beginning of the next record
prevPos = recordStart - 4; // back up 4 more to read the length of the next record
if (prevPos <= 0) return null; // this record is the header
long bufferPos = fis.getBufferPos();
if (prevPos >= bufferPos) {
// nothing to do... we're within the current buffer
} else {
// Position buffer so that this record is at the end.
// For small records, this will cause subsequent calls to next() to be within the buffer.
long seekPos = endOfThisRecord - fis.getBufferSize();
seekPos = Math.min(seekPos, prevPos); // seek to the start of the record if it's larger then the block size.
seekPos = Math.max(seekPos, 0);
fis.seek(seekPos);
fis.peek(); // cause buffer to be filled
}
fis.seek(prevPos);
nextLength = fis.readInt(); // this is the length of the *next* record (i.e. closer to the beginning)
// TODO: optionally skip document data
Object o = codec.readVal(fis);
// assert fis.position() == prevPos + 4 + thisLength; // this is only true if we read all the data (and we currently skip reading SolrInputDocument
return o;
}
// in solr/core/src/java/org/apache/solr/update/TransactionLog.java
Override
public int readWrappedStream(byte[] target, int offset, int len) throws IOException {
ByteBuffer bb = ByteBuffer.wrap(target, offset, len);
int ret = ch.read(bb, readFromStream);
return ret;
}
// in solr/core/src/java/org/apache/solr/update/TransactionLog.java
public void seek(long position) throws IOException {
if (position <= readFromStream && position >= getBufferPos()) {
// seek within buffer
pos = (int)(position - getBufferPos());
} else {
// long currSize = ch.size(); // not needed - underlying read should handle (unless read never done)
// if (position > currSize) throw new EOFException("Read past EOF: seeking to " + position + " on file of size " + currSize + " file=" + ch);
readFromStream = position;
end = pos = 0;
}
assert position() == position;
}
// in solr/core/src/java/org/apache/solr/update/TransactionLog.java
Override
public void close() throws IOException {
ch.close();
}
// in solr/core/src/java/org/apache/solr/core/StandardDirectoryFactory.java
Override
protected Directory create(String path) throws IOException {
return FSDirectory.open(new File(path));
}
// in solr/core/src/java/org/apache/solr/core/RAMDirectoryFactory.java
Override
protected Directory create(String path) throws IOException {
return new RAMDirectory();
}
// in solr/core/src/java/org/apache/solr/core/CachingDirectoryFactory.java
Override
public void close() throws IOException {
synchronized (this) {
for (CacheValue val : byDirectoryCache.values()) {
val.directory.close();
}
byDirectoryCache.clear();
byPathCache.clear();
}
}
// in solr/core/src/java/org/apache/solr/core/CachingDirectoryFactory.java
private void close(Directory directory) throws IOException {
synchronized (this) {
CacheValue cacheValue = byDirectoryCache.get(directory);
if (cacheValue == null) {
throw new IllegalArgumentException("Unknown directory: " + directory
+ " " + byDirectoryCache);
}
cacheValue.refCnt--;
if (cacheValue.refCnt == 0 && cacheValue.doneWithDir) {
directory.close();
byDirectoryCache.remove(directory);
byPathCache.remove(cacheValue.path);
}
}
}
// in solr/core/src/java/org/apache/solr/core/CachingDirectoryFactory.java
Override
public final Directory get(String path, String rawLockType)
throws IOException {
return get(path, rawLockType, false);
}
// in solr/core/src/java/org/apache/solr/core/CachingDirectoryFactory.java
Override
public final Directory get(String path, String rawLockType, boolean forceNew)
throws IOException {
String fullPath = new File(path).getAbsolutePath();
synchronized (this) {
CacheValue cacheValue = byPathCache.get(fullPath);
Directory directory = null;
if (cacheValue != null) {
directory = cacheValue.directory;
if (forceNew) {
cacheValue.doneWithDir = true;
if (cacheValue.refCnt == 0) {
close(cacheValue.directory);
}
}
}
if (directory == null || forceNew) {
directory = create(fullPath);
CacheValue newCacheValue = new CacheValue();
newCacheValue.directory = directory;
newCacheValue.path = fullPath;
injectLockFactory(directory, path, rawLockType);
byDirectoryCache.put(directory, newCacheValue);
byPathCache.put(fullPath, newCacheValue);
} else {
cacheValue.refCnt++;
}
return directory;
}
}
// in solr/core/src/java/org/apache/solr/core/CachingDirectoryFactory.java
Override
public void release(Directory directory) throws IOException {
if (directory == null) {
throw new NullPointerException();
}
close(directory);
}
// in solr/core/src/java/org/apache/solr/core/CachingDirectoryFactory.java
private static Directory injectLockFactory(Directory dir, String lockPath,
String rawLockType) throws IOException {
if (null == rawLockType) {
// we default to "simple" for backwards compatibility
log.warn("No lockType configured for " + dir + " assuming 'simple'");
rawLockType = "simple";
}
final String lockType = rawLockType.toLowerCase(Locale.ENGLISH).trim();
if ("simple".equals(lockType)) {
// multiple SimpleFSLockFactory instances should be OK
dir.setLockFactory(new SimpleFSLockFactory(lockPath));
} else if ("native".equals(lockType)) {
dir.setLockFactory(new NativeFSLockFactory(lockPath));
} else if ("single".equals(lockType)) {
if (!(dir.getLockFactory() instanceof SingleInstanceLockFactory)) dir
.setLockFactory(new SingleInstanceLockFactory());
} else if ("none".equals(lockType)) {
// Recipe for disaster
log.error("CONFIGURATION WARNING: locks are disabled on " + dir);
dir.setLockFactory(NoLockFactory.getNoLockFactory());
} else {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"Unrecognized lockType: " + rawLockType);
}
return dir;
}
// in solr/core/src/java/org/apache/solr/core/NIOFSDirectoryFactory.java
Override
protected Directory create(String path) throws IOException {
return new NIOFSDirectory(new File(path));
}
// in solr/core/src/java/org/apache/solr/core/MMapDirectoryFactory.java
Override
protected Directory create(String path) throws IOException {
MMapDirectory mapDirectory = new MMapDirectory(new File(path));
try {
mapDirectory.setUseUnmap(unmapHack);
} catch (Exception e) {
log.warn("Unmap not supported on this JVM, continuing on without setting unmap", e);
}
mapDirectory.setMaxChunkSize(maxChunk);
return mapDirectory;
}
// in solr/core/src/java/org/apache/solr/core/SolrCore.java
public SolrCore reload(SolrResourceLoader resourceLoader) throws IOException,
ParserConfigurationException, SAXException {
// TODO - what if indexwriter settings have changed
SolrConfig config = new SolrConfig(resourceLoader,
getSolrConfig().getName(), null);
IndexSchema schema = new IndexSchema(config,
getSchema().getResourceName(), null);
updateHandler.incref();
SolrCore core = new SolrCore(getName(), null, config,
schema, coreDescriptor, updateHandler);
return core;
}
// in solr/core/src/java/org/apache/solr/core/SolrCore.java
public SolrIndexSearcher newSearcher(String name) throws IOException {
return new SolrIndexSearcher(this, getNewIndexDir(), schema, getSolrConfig().indexConfig, name, false, directoryFactory);
}
// in solr/core/src/java/org/apache/solr/core/SolrCore.java
public RefCounted<SolrIndexSearcher> getSearcher(boolean forceNew, boolean returnSearcher, final Future[] waitSearcher) throws IOException {
return getSearcher(forceNew, returnSearcher, waitSearcher, false);
}
// in solr/core/src/java/org/apache/solr/core/SolrCore.java
public RefCounted<SolrIndexSearcher> getSearcher(boolean forceNew, boolean returnSearcher, final Future[] waitSearcher, boolean updateHandlerReopens) throws IOException {
// it may take some time to open an index.... we may need to make
// sure that two threads aren't trying to open one at the same time
// if it isn't necessary.
synchronized (searcherLock) {
// see if we can return the current searcher
if (_searcher!=null && !forceNew) {
if (returnSearcher) {
_searcher.incref();
return _searcher;
} else {
return null;
}
}
// check to see if we can wait for someone else's searcher to be set
if (onDeckSearchers>0 && !forceNew && _searcher==null) {
try {
searcherLock.wait();
} catch (InterruptedException e) {
log.info(SolrException.toStr(e));
}
}
// check again: see if we can return right now
if (_searcher!=null && !forceNew) {
if (returnSearcher) {
_searcher.incref();
return _searcher;
} else {
return null;
}
}
// At this point, we know we need to open a new searcher...
// first: increment count to signal other threads that we are
// opening a new searcher.
onDeckSearchers++;
if (onDeckSearchers < 1) {
// should never happen... just a sanity check
log.error(logid+"ERROR!!! onDeckSearchers is " + onDeckSearchers);
onDeckSearchers=1; // reset
} else if (onDeckSearchers > maxWarmingSearchers) {
onDeckSearchers--;
String msg="Error opening new searcher. exceeded limit of maxWarmingSearchers="+maxWarmingSearchers + ", try again later.";
log.warn(logid+""+ msg);
// HTTP 503==service unavailable, or 409==Conflict
throw new SolrException(SolrException.ErrorCode.SERVICE_UNAVAILABLE,msg);
} else if (onDeckSearchers > 1) {
log.warn(logid+"PERFORMANCE WARNING: Overlapping onDeckSearchers=" + onDeckSearchers);
}
}
// a signal to decrement onDeckSearchers if something goes wrong.
final boolean[] decrementOnDeckCount=new boolean[]{true};
RefCounted<SolrIndexSearcher> currSearcherHolder = null; // searcher we are autowarming from
RefCounted<SolrIndexSearcher> searchHolder = null;
boolean success = false;
openSearcherLock.lock();
try {
searchHolder = openNewSearcher(updateHandlerReopens, false);
// the searchHolder will be incremented once already (and it will eventually be assigned to _searcher when registered)
// increment it again if we are going to return it to the caller.
if (returnSearcher) {
searchHolder.incref();
}
final RefCounted<SolrIndexSearcher> newSearchHolder = searchHolder;
final SolrIndexSearcher newSearcher = newSearchHolder.get();
boolean alreadyRegistered = false;
synchronized (searcherLock) {
if (_searcher == null) {
// if there isn't a current searcher then we may
// want to register this one before warming is complete instead of waiting.
if (solrConfig.useColdSearcher) {
registerSearcher(newSearchHolder);
decrementOnDeckCount[0]=false;
alreadyRegistered=true;
}
} else {
// get a reference to the current searcher for purposes of autowarming.
currSearcherHolder=_searcher;
currSearcherHolder.incref();
}
}
final SolrIndexSearcher currSearcher = currSearcherHolder==null ? null : currSearcherHolder.get();
Future future=null;
// warm the new searcher based on the current searcher.
// should this go before the other event handlers or after?
if (currSearcher != null) {
future = searcherExecutor.submit(
new Callable() {
public Object call() throws Exception {
try {
newSearcher.warm(currSearcher);
} catch (Throwable e) {
SolrException.log(log,e);
}
return null;
}
}
);
}
if (currSearcher==null && firstSearcherListeners.size() > 0) {
future = searcherExecutor.submit(
new Callable() {
public Object call() throws Exception {
try {
for (SolrEventListener listener : firstSearcherListeners) {
listener.newSearcher(newSearcher,null);
}
} catch (Throwable e) {
SolrException.log(log,null,e);
}
return null;
}
}
);
}
// in solr/core/src/java/org/apache/solr/core/SolrCore.java
private void registerSearcher(RefCounted<SolrIndexSearcher> newSearcherHolder) throws IOException {
synchronized (searcherLock) {
try {
if (_searcher != null) {
_searcher.decref(); // dec refcount for this._searcher
_searcher=null;
}
_searcher = newSearcherHolder;
SolrIndexSearcher newSearcher = newSearcherHolder.get();
/***
// a searcher may have been warming asynchronously while the core was being closed.
// if this happens, just close the searcher.
if (isClosed()) {
// NOTE: this should not happen now - see close() for details.
// *BUT* if we left it enabled, this could still happen before
// close() stopped the executor - so disable this test for now.
log.error("Ignoring searcher register on closed core:" + newSearcher);
_searcher.decref();
}
***/
newSearcher.register(); // register subitems (caches)
log.info(logid+"Registered new searcher " + newSearcher);
} catch (Throwable e) {
// an exception in register() shouldn't be fatal.
log(e);
} finally {
// wake up anyone waiting for a searcher
// even in the face of errors.
onDeckSearchers--;
searcherLock.notifyAll();
}
}
}
// in solr/core/src/java/org/apache/solr/core/SolrCore.java
Override
public void write(Writer writer, SolrQueryRequest request, SolrQueryResponse response) throws IOException {
getWrappedWriter().write(writer, request, response);
}
// in solr/core/src/java/org/apache/solr/core/SolrResourceLoader.java
public List<String> getLines(String resource) throws IOException {
return getLines(resource, UTF_8);
}
// in solr/core/src/java/org/apache/solr/core/SolrResourceLoader.java
public List<String> getLines(String resource,
String encoding) throws IOException {
return getLines(resource, Charset.forName(encoding));
}
// in solr/core/src/java/org/apache/solr/core/SolrResourceLoader.java
public List<String> getLines(String resource, Charset charset) throws IOException{
BufferedReader input = null;
ArrayList<String> lines;
try {
input = new BufferedReader(new InputStreamReader(openResource(resource),
charset.newDecoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT)));
lines = new ArrayList<String>();
for (String word=null; (word=input.readLine())!=null;) {
// skip initial bom marker
if (lines.isEmpty() && word.length() > 0 && word.charAt(0) == '\uFEFF')
word = word.substring(1);
// skip comments
if (word.startsWith("#")) continue;
word=word.trim();
// skip blank lines
if (word.length()==0) continue;
lines.add(word);
}
}
// in solr/core/src/java/org/apache/solr/core/CoreContainer.java
public CoreContainer initialize() throws IOException,
ParserConfigurationException, SAXException {
CoreContainer cores = null;
String solrHome = SolrResourceLoader.locateSolrHome();
File fconf = new File(solrHome, containerConfigFilename == null ? "solr.xml"
: containerConfigFilename);
log.info("looking for solr.xml: " + fconf.getAbsolutePath());
cores = new CoreContainer(solrHome);
if (fconf.exists()) {
cores.load(solrHome, fconf);
} else {
log.info("no solr.xml file found - using default");
cores.load(solrHome, new InputSource(new ByteArrayInputStream(DEF_SOLR_XML.getBytes("UTF-8"))));
cores.configFile = fconf;
}
containerConfigFilename = cores.getConfigFile().getName();
return cores;
}
// in solr/core/src/java/org/apache/solr/core/CoreContainer.java
public void load(String dir, File configFile ) throws ParserConfigurationException, IOException, SAXException {
this.configFile = configFile;
this.load(dir, new InputSource(configFile.toURI().toASCIIString()));
}
// in solr/core/src/java/org/apache/solr/core/CoreContainer.java
public void load(String dir, InputSource cfgis)
throws ParserConfigurationException, IOException, SAXException {
if (null == dir) {
// don't rely on SolrResourceLoader(), determine explicitly first
dir = SolrResourceLoader.locateSolrHome();
}
log.info("Loading CoreContainer using Solr Home: '{}'", dir);
this.loader = new SolrResourceLoader(dir);
solrHome = loader.getInstanceDir();
Config cfg = new Config(loader, null, cfgis, null, false);
// keep orig config for persist to consult
try {
this.cfg = new Config(loader, null, copyDoc(cfg.getDocument()));
} catch (TransformerException e) {
throw new SolrException(ErrorCode.SERVER_ERROR, "", e);
}
cfg.substituteProperties();
// Initialize Logging
if(cfg.getBool("solr/logging/@enabled",true)) {
String slf4jImpl = null;
String fname = cfg.get("solr/logging/watcher/@class", null);
try {
slf4jImpl = StaticLoggerBinder.getSingleton().getLoggerFactoryClassStr();
if(fname==null) {
if( slf4jImpl.indexOf("Log4j") > 0) {
log.warn("Log watching is not yet implemented for log4j" );
}
else if( slf4jImpl.indexOf("JDK") > 0) {
fname = "JUL";
}
}
}
catch(Throwable ex) {
log.warn("Unable to read SLF4J version. LogWatcher will be disabled: "+ex);
}
// Now load the framework
if(fname!=null) {
if("JUL".equalsIgnoreCase(fname)) {
logging = new JulWatcher(slf4jImpl);
}
// else if( "Log4j".equals(fname) ) {
// logging = new Log4jWatcher(slf4jImpl);
// }
else {
try {
logging = loader.newInstance(fname, LogWatcher.class);
}
catch (Throwable e) {
log.warn("Unable to load LogWatcher", e);
}
}
if( logging != null ) {
ListenerConfig v = new ListenerConfig();
v.size = cfg.getInt("solr/logging/watcher/@size",50);
v.threshold = cfg.get("solr/logging/watcher/@threshold",null);
if(v.size>0) {
log.info("Registering Log Listener");
logging.registerListener(v, this);
}
}
}
}
String dcoreName = cfg.get("solr/cores/@defaultCoreName", null);
if(dcoreName != null && !dcoreName.isEmpty()) {
defaultCoreName = dcoreName;
}
persistent = cfg.getBool("solr/@persistent", false);
libDir = cfg.get("solr/@sharedLib", null);
zkHost = cfg.get("solr/@zkHost" , null);
adminPath = cfg.get("solr/cores/@adminPath", null);
shareSchema = cfg.getBool("solr/cores/@shareSchema", DEFAULT_SHARE_SCHEMA);
zkClientTimeout = cfg.getInt("solr/cores/@zkClientTimeout", DEFAULT_ZK_CLIENT_TIMEOUT);
hostPort = cfg.get("solr/cores/@hostPort", DEFAULT_HOST_PORT);
hostContext = cfg.get("solr/cores/@hostContext", DEFAULT_HOST_CONTEXT);
host = cfg.get("solr/cores/@host", null);
if(shareSchema){
indexSchemaCache = new ConcurrentHashMap<String ,IndexSchema>();
}
adminHandler = cfg.get("solr/cores/@adminHandler", null );
managementPath = cfg.get("solr/cores/@managementPath", null );
zkClientTimeout = Integer.parseInt(System.getProperty("zkClientTimeout", Integer.toString(zkClientTimeout)));
initZooKeeper(zkHost, zkClientTimeout);
if (libDir != null) {
File f = FileUtils.resolvePath(new File(dir), libDir);
log.info( "loading shared library: "+f.getAbsolutePath() );
libLoader = SolrResourceLoader.createClassLoader(f, null);
}
if (adminPath != null) {
if (adminHandler == null) {
coreAdminHandler = new CoreAdminHandler(this);
} else {
coreAdminHandler = this.createMultiCoreHandler(adminHandler);
}
}
try {
containerProperties = readProperties(cfg, ((NodeList) cfg.evaluate(DEFAULT_HOST_CONTEXT, XPathConstants.NODESET)).item(0));
} catch (Throwable e) {
SolrException.log(log,null,e);
}
NodeList nodes = (NodeList)cfg.evaluate("solr/cores/core", XPathConstants.NODESET);
for (int i=0; i<nodes.getLength(); i++) {
Node node = nodes.item(i);
try {
String rawName = DOMUtil.getAttr(node, "name", null);
if (null == rawName) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"Each core in solr.xml must have a 'name'");
}
String name = rawName;
CoreDescriptor p = new CoreDescriptor(this, name, DOMUtil.getAttr(node, "instanceDir", null));
// deal with optional settings
String opt = DOMUtil.getAttr(node, "config", null);
if (opt != null) {
p.setConfigName(opt);
}
opt = DOMUtil.getAttr(node, "schema", null);
if (opt != null) {
p.setSchemaName(opt);
}
if (zkController != null) {
opt = DOMUtil.getAttr(node, "shard", null);
if (opt != null && opt.length() > 0) {
p.getCloudDescriptor().setShardId(opt);
}
opt = DOMUtil.getAttr(node, "collection", null);
if (opt != null) {
p.getCloudDescriptor().setCollectionName(opt);
}
opt = DOMUtil.getAttr(node, "roles", null);
if(opt != null){
p.getCloudDescriptor().setRoles(opt);
}
}
opt = DOMUtil.getAttr(node, "properties", null);
if (opt != null) {
p.setPropertiesName(opt);
}
opt = DOMUtil.getAttr(node, CoreAdminParams.DATA_DIR, null);
if (opt != null) {
p.setDataDir(opt);
}
p.setCoreProperties(readProperties(cfg, node));
SolrCore core = create(p);
register(name, core, false);
// track original names
coreToOrigName.put(core, rawName);
}
catch (Throwable ex) {
SolrException.log(log,null,ex);
}
}
}
// in solr/core/src/java/org/apache/solr/core/CoreContainer.java
public SolrCore create(CoreDescriptor dcore) throws ParserConfigurationException, IOException, SAXException {
// Make the instanceDir relative to the cores instanceDir if not absolute
File idir = new File(dcore.getInstanceDir());
if (!idir.isAbsolute()) {
idir = new File(solrHome, dcore.getInstanceDir());
}
String instanceDir = idir.getPath();
log.info("Creating SolrCore '{}' using instanceDir: {}",
dcore.getName(), instanceDir);
// Initialize the solr config
SolrResourceLoader solrLoader = null;
SolrConfig config = null;
String zkConfigName = null;
if(zkController == null) {
solrLoader = new SolrResourceLoader(instanceDir, libLoader, getCoreProps(instanceDir, dcore.getPropertiesName(),dcore.getCoreProperties()));
config = new SolrConfig(solrLoader, dcore.getConfigName(), null);
} else {
try {
String collection = dcore.getCloudDescriptor().getCollectionName();
zkController.createCollectionZkNode(dcore.getCloudDescriptor());
zkConfigName = zkController.readConfigName(collection);
if (zkConfigName == null) {
log.error("Could not find config name for collection:" + collection);
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR,
"Could not find config name for collection:" + collection);
}
solrLoader = new ZkSolrResourceLoader(instanceDir, zkConfigName, libLoader, getCoreProps(instanceDir, dcore.getPropertiesName(),dcore.getCoreProperties()), zkController);
config = getSolrConfigFromZk(zkConfigName, dcore.getConfigName(), solrLoader);
} catch (KeeperException e) {
log.error("", e);
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR,
"", e);
} catch (InterruptedException e) {
// Restore the interrupted status
Thread.currentThread().interrupt();
log.error("", e);
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR,
"", e);
}
}
IndexSchema schema = null;
if (indexSchemaCache != null) {
if (zkController != null) {
File schemaFile = new File(dcore.getSchemaName());
if (!schemaFile.isAbsolute()) {
schemaFile = new File(solrLoader.getInstanceDir() + "conf"
+ File.separator + dcore.getSchemaName());
}
if (schemaFile.exists()) {
String key = schemaFile.getAbsolutePath()
+ ":"
+ new SimpleDateFormat("yyyyMMddHHmmss", Locale.US).format(new Date(
schemaFile.lastModified()));
schema = indexSchemaCache.get(key);
if (schema == null) {
log.info("creating new schema object for core: " + dcore.name);
schema = new IndexSchema(config, dcore.getSchemaName(), null);
indexSchemaCache.put(key, schema);
} else {
log.info("re-using schema object for core: " + dcore.name);
}
}
} else {
// TODO: handle caching from ZooKeeper - perhaps using ZooKeepers versioning
// Don't like this cache though - how does it empty as last modified changes?
}
}
if(schema == null){
if(zkController != null) {
try {
schema = getSchemaFromZk(zkConfigName, dcore.getSchemaName(), config, solrLoader);
} catch (KeeperException e) {
log.error("", e);
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR,
"", e);
} catch (InterruptedException e) {
// Restore the interrupted status
Thread.currentThread().interrupt();
log.error("", e);
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR,
"", e);
}
} else {
schema = new IndexSchema(config, dcore.getSchemaName(), null);
}
}
SolrCore core = new SolrCore(dcore.getName(), null, config, schema, dcore);
if (zkController == null && core.getUpdateHandler().getUpdateLog() != null) {
// always kick off recovery if we are in standalone mode.
core.getUpdateHandler().getUpdateLog().recoverFromLog();
}
return core;
}
// in solr/core/src/java/org/apache/solr/core/CoreContainer.java
public void reload(String name) throws ParserConfigurationException, IOException, SAXException {
name= checkDefault(name);
SolrCore core;
synchronized(cores) {
core = cores.get(name);
}
if (core == null)
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "No such core: " + name );
CoreDescriptor cd = core.getCoreDescriptor();
File instanceDir = new File(cd.getInstanceDir());
if (!instanceDir.isAbsolute()) {
instanceDir = new File(getSolrHome(), cd.getInstanceDir());
}
log.info("Reloading SolrCore '{}' using instanceDir: {}",
cd.getName(), instanceDir.getAbsolutePath());
SolrResourceLoader solrLoader;
if(zkController == null) {
solrLoader = new SolrResourceLoader(instanceDir.getAbsolutePath(), libLoader, getCoreProps(instanceDir.getAbsolutePath(), cd.getPropertiesName(),cd.getCoreProperties()));
} else {
try {
String collection = cd.getCloudDescriptor().getCollectionName();
zkController.createCollectionZkNode(cd.getCloudDescriptor());
String zkConfigName = zkController.readConfigName(collection);
if (zkConfigName == null) {
log.error("Could not find config name for collection:" + collection);
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR,
"Could not find config name for collection:" + collection);
}
solrLoader = new ZkSolrResourceLoader(instanceDir.getAbsolutePath(), zkConfigName, libLoader, getCoreProps(instanceDir.getAbsolutePath(), cd.getPropertiesName(),cd.getCoreProperties()), zkController);
} catch (KeeperException e) {
log.error("", e);
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR,
"", e);
} catch (InterruptedException e) {
// Restore the interrupted status
Thread.currentThread().interrupt();
log.error("", e);
throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR,
"", e);
}
}
SolrCore newCore = core.reload(solrLoader);
// keep core to orig name link
String origName = coreToOrigName.remove(core);
if (origName != null) {
coreToOrigName.put(newCore, origName);
}
register(name, newCore, false);
}
// in solr/core/src/java/org/apache/solr/core/CoreContainer.java
private SolrConfig getSolrConfigFromZk(String zkConfigName, String solrConfigFileName,
SolrResourceLoader resourceLoader) throws IOException,
ParserConfigurationException, SAXException, KeeperException,
InterruptedException {
byte[] config = zkController.getConfigFileData(zkConfigName, solrConfigFileName);
InputSource is = new InputSource(new ByteArrayInputStream(config));
is.setSystemId(SystemIdResolver.createSystemIdFromResourceName(solrConfigFileName));
SolrConfig cfg = solrConfigFileName == null ? new SolrConfig(
resourceLoader, SolrConfig.DEFAULT_CONF_FILE, is) : new SolrConfig(
resourceLoader, solrConfigFileName, is);
return cfg;
}
// in solr/core/src/java/org/apache/solr/core/SolrDeletionPolicy.java
public void onInit(List commits) throws IOException {
log.info("SolrDeletionPolicy.onInit: commits:" + str(commits));
updateCommits((List<IndexCommit>) commits);
}
// in solr/core/src/java/org/apache/solr/core/SolrDeletionPolicy.java
public void onCommit(List commits) throws IOException {
log.info("SolrDeletionPolicy.onCommit: commits:" + str(commits));
updateCommits((List<IndexCommit>) commits);
}
// in solr/core/src/java/org/apache/solr/core/IndexDeletionPolicyWrapper.java
public void onInit(List list) throws IOException {
List<IndexCommitWrapper> wrapperList = wrap(list);
deletionPolicy.onInit(wrapperList);
updateCommitPoints(wrapperList);
cleanReserves();
}
// in solr/core/src/java/org/apache/solr/core/IndexDeletionPolicyWrapper.java
public void onCommit(List list) throws IOException {
List<IndexCommitWrapper> wrapperList = wrap(list);
deletionPolicy.onCommit(wrapperList);
updateCommitPoints(wrapperList);
cleanReserves();
}
// in solr/core/src/java/org/apache/solr/core/IndexDeletionPolicyWrapper.java
Override
public Collection getFileNames() throws IOException {
return delegate.getFileNames();
}
// in solr/core/src/java/org/apache/solr/core/IndexDeletionPolicyWrapper.java
Override
public Map getUserData() throws IOException {
return delegate.getUserData();
}
// in solr/core/src/java/org/apache/solr/core/IndexDeletionPolicyWrapper.java
public static long getCommitTimestamp(IndexCommit commit) throws IOException {
final Map<String,String> commitData = commit.getUserData();
String commitTime = commitData.get(SolrIndexWriter.COMMIT_TIME_MSEC_KEY);
if (commitTime != null) {
return Long.parseLong(commitTime);
} else {
return 0;
}
}
// in solr/core/src/java/org/apache/solr/core/SolrXMLSerializer.java
void persist(Writer w, SolrXMLDef solrXMLDef) throws IOException {
w.write("<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n");
w.write("<solr");
Map<String,String> rootSolrAttribs = solrXMLDef.solrAttribs;
Set<String> solrAttribKeys = rootSolrAttribs.keySet();
for (String key : solrAttribKeys) {
String value = rootSolrAttribs.get(key);
writeAttribute(w, key, value);
}
w.write(">\n");
Properties containerProperties = solrXMLDef.containerProperties;
if (containerProperties != null && !containerProperties.isEmpty()) {
writeProperties(w, containerProperties, " ");
}
w.write(INDENT + "<cores");
Map<String,String> coresAttribs = solrXMLDef.coresAttribs;
Set<String> coreAttribKeys = coresAttribs.keySet();
for (String key : coreAttribKeys) {
String value = coresAttribs.get(key);
writeAttribute(w, key, value);
}
w.write(">\n");
for (SolrCoreXMLDef coreDef : solrXMLDef.coresDefs) {
persist(w, coreDef);
}
w.write(INDENT + "</cores>\n");
w.write("</solr>\n");
}
// in solr/core/src/java/org/apache/solr/core/SolrXMLSerializer.java
private void persist(Writer w, SolrCoreXMLDef coreDef) throws IOException {
w.write(INDENT + INDENT + "<core");
Set<String> keys = coreDef.coreAttribs.keySet();
for (String key : keys) {
writeAttribute(w, key, coreDef.coreAttribs.get(key));
}
Properties properties = coreDef.coreProperties;
if (properties == null || properties.isEmpty()) w.write("/>\n"); // core
else {
w.write(">\n");
writeProperties(w, properties, " ");
w.write(INDENT + INDENT + "</core>\n");
}
}
// in solr/core/src/java/org/apache/solr/core/SolrXMLSerializer.java
private void writeProperties(Writer w, Properties props, String indent)
throws IOException {
for (Map.Entry<Object,Object> entry : props.entrySet()) {
w.write(indent + "<property");
writeAttribute(w, "name", entry.getKey());
writeAttribute(w, "value", entry.getValue());
w.write("/>\n");
}
}
// in solr/core/src/java/org/apache/solr/core/SolrXMLSerializer.java
private void writeAttribute(Writer w, String name, Object value)
throws IOException {
if (value == null) return;
w.write(" ");
w.write(name);
w.write("=\"");
XML.escapeAttributeValue(value.toString(), w);
w.write("\"");
}
// in solr/core/src/java/org/apache/solr/core/SolrXMLSerializer.java
private static void fileCopy(File src, File dest) throws IOException {
IOException xforward = null;
FileInputStream fis = null;
FileOutputStream fos = null;
FileChannel fcin = null;
FileChannel fcout = null;
try {
fis = new FileInputStream(src);
fos = new FileOutputStream(dest);
fcin = fis.getChannel();
fcout = fos.getChannel();
// do the file copy 32Mb at a time
final int MB32 = 32 * 1024 * 1024;
long size = fcin.size();
long position = 0;
while (position < size) {
position += fcin.transferTo(position, MB32, fcout);
}
} catch (IOException xio) {
xforward = xio;
} finally {
if (fis != null) try {
fis.close();
fis = null;
} catch (IOException xio) {}
if (fos != null) try {
fos.close();
fos = null;
} catch (IOException xio) {}
if (fcin != null && fcin.isOpen()) try {
fcin.close();
fcin = null;
} catch (IOException xio) {}
if (fcout != null && fcout.isOpen()) try {
fcout.close();
fcout = null;
} catch (IOException xio) {}
}
if (xforward != null) {
throw xforward;
}
}
// in solr/core/src/java/org/apache/solr/core/SimpleFSDirectoryFactory.java
Override
protected Directory create(String path) throws IOException {
return new SimpleFSDirectory(new File(path));
}
// in solr/core/src/java/org/apache/solr/core/NRTCachingDirectoryFactory.java
Override
protected Directory create(String path) throws IOException {
return new NRTCachingDirectory(FSDirectory.open(new File(path)), 4, 48);
}
// in solr/core/src/java/org/apache/solr/core/StandardIndexReaderFactory.java
Override
public DirectoryReader newReader(Directory indexDir, SolrCore core) throws IOException {
return DirectoryReader.open(indexDir, termInfosIndexDivisor);
}
// in solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java
public static int numDocs(SolrIndexSearcher s, Query q, Query f)
throws IOException {
return (null == f) ? s.getDocSet(q).size() : s.numDocs(q,f);
}
// in solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java
public static void optimizePreFetchDocs(ResponseBuilder rb,
DocList docs,
Query query,
SolrQueryRequest req,
SolrQueryResponse res) throws IOException {
SolrIndexSearcher searcher = req.getSearcher();
if(!searcher.enableLazyFieldLoading) {
// nothing to do
return;
}
ReturnFields returnFields = res.getReturnFields();
if(returnFields.getLuceneFieldNames() != null) {
Set<String> fieldFilter = returnFields.getLuceneFieldNames();
if (rb.doHighlights) {
// copy return fields list
fieldFilter = new HashSet<String>(fieldFilter);
// add highlight fields
SolrHighlighter highlighter = HighlightComponent.getHighlighter(req.getCore());
for (String field: highlighter.getHighlightFields(query, req, null))
fieldFilter.add(field);
// fetch unique key if one exists.
SchemaField keyField = req.getSearcher().getSchema().getUniqueKeyField();
if(null != keyField)
fieldFilter.add(keyField.getName());
}
// get documents
DocIterator iter = docs.iterator();
for (int i=0; i<docs.size(); i++) {
searcher.doc(iter.nextDoc(), fieldFilter);
}
}
}
// in solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java
public static NamedList doStandardDebug(SolrQueryRequest req,
String userQuery,
Query query,
DocList results, boolean dbgQuery, boolean dbgResults)
throws IOException {
NamedList dbg = null;
dbg = new SimpleOrderedMap();
SolrIndexSearcher searcher = req.getSearcher();
IndexSchema schema = req.getSchema();
boolean explainStruct
= req.getParams().getBool(CommonParams.EXPLAIN_STRUCT, false);
if (dbgQuery) {
/* userQuery may have been pre-processed .. expose that */
dbg.add("rawquerystring", req.getParams().get(CommonParams.Q));
dbg.add("querystring", userQuery);
/* QueryParsing.toString isn't perfect, use it to see converted
* values, use regular toString to see any attributes of the
* underlying Query it may have missed.
*/
dbg.add("parsedquery", QueryParsing.toString(query, schema));
dbg.add("parsedquery_toString", query.toString());
}
if (dbgResults) {
NamedList<Explanation> explain
= getExplanations(query, results, searcher, schema);
dbg.add("explain", explainStruct ?
explanationsToNamedLists(explain) :
explanationsToStrings(explain));
String otherQueryS = req.getParams().get(CommonParams.EXPLAIN_OTHER);
if (otherQueryS != null && otherQueryS.length() > 0) {
DocList otherResults = doSimpleQuery
(otherQueryS, req, 0, 10);
dbg.add("otherQuery", otherQueryS);
NamedList<Explanation> explainO
= getExplanations(query, otherResults, searcher, schema);
dbg.add("explainOther", explainStruct ?
explanationsToNamedLists(explainO) :
explanationsToStrings(explainO));
}
}
return dbg;
}
// in solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java
public static NamedList<Explanation> getExplanations
(Query query,
DocList docs,
SolrIndexSearcher searcher,
IndexSchema schema) throws IOException {
NamedList<Explanation> explainList = new SimpleOrderedMap<Explanation>();
DocIterator iterator = docs.iterator();
for (int i=0; i<docs.size(); i++) {
int id = iterator.nextDoc();
Document doc = searcher.doc(id);
String strid = schema.printableUniqueKey(doc);
explainList.add(strid, searcher.explain(query, id) );
}
return explainList;
}
// in solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java
public static DocList doSimpleQuery(String sreq,
SolrQueryRequest req,
int start, int limit) throws IOException {
List<String> commands = StrUtils.splitSmart(sreq,';');
String qs = commands.size() >= 1 ? commands.get(0) : "";
try {
Query query = QParser.getParser(qs, null, req).getQuery();
// If the first non-query, non-filter command is a simple sort on an indexed field, then
// we can use the Lucene sort ability.
Sort sort = null;
if (commands.size() >= 2) {
sort = QueryParsing.parseSort(commands.get(1), req);
}
DocList results = req.getSearcher().getDocList(query,(DocSet)null, sort, start, limit);
return results;
} catch (ParseException e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error parsing query: " + qs);
}
}
// in solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java
public boolean regenerateItem(SolrIndexSearcher newSearcher,
SolrCache newCache,
SolrCache oldCache,
Object oldKey,
Object oldVal)
throws IOException {
newCache.put(oldKey,oldVal);
return true;
}
// in solr/core/src/java/org/apache/solr/util/SolrPluginUtils.java
public static SolrDocumentList docListToSolrDocumentList(
DocList docs,
SolrIndexSearcher searcher,
Set<String> fields,
Map<SolrDocument, Integer> ids ) throws IOException
{
IndexSchema schema = searcher.getSchema();
SolrDocumentList list = new SolrDocumentList();
list.setNumFound(docs.matches());
list.setMaxScore(docs.maxScore());
list.setStart(docs.offset());
DocIterator dit = docs.iterator();
while (dit.hasNext()) {
int docid = dit.nextDoc();
Document luceneDoc = searcher.doc(docid, fields);
SolrDocument doc = new SolrDocument();
for( IndexableField field : luceneDoc) {
if (null == fields || fields.contains(field.name())) {
SchemaField sf = schema.getField( field.name() );
doc.addField( field.name(), sf.getType().toObject( field ) );
}
}
if (docs.hasScores() && (null == fields || fields.contains("score"))) {
doc.addField("score", dit.score());
}
list.add( doc );
if( ids != null ) {
ids.put( doc, new Integer(docid) );
}
}
return list;
}
// in solr/core/src/java/org/apache/solr/util/FastWriter.java
Override
public void write(int c) throws IOException {
write((char)c);
}
// in solr/core/src/java/org/apache/solr/util/FastWriter.java
public void write(char c) throws IOException {
if (pos >= buf.length) {
sink.write(buf,0,pos);
pos=0;
}
buf[pos++] = c;
}
// in solr/core/src/java/org/apache/solr/util/FastWriter.java
Override
public FastWriter append(char c) throws IOException {
if (pos >= buf.length) {
sink.write(buf,0,pos);
pos=0;
}
buf[pos++] = c;
return this;
}
// in solr/core/src/java/org/apache/solr/util/FastWriter.java
Override
public void write(char cbuf[], int off, int len) throws IOException {
int space = buf.length - pos;
if (len < space) {
System.arraycopy(cbuf, off, buf, pos, len);
pos += len;
} else if (len<BUFSIZE) {
// if the data to write is small enough, buffer it.
System.arraycopy(cbuf, off, buf, pos, space);
sink.write(buf, 0, buf.length);
pos = len-space;
System.arraycopy(cbuf, off+space, buf, 0, pos);
} else {
sink.write(buf,0,pos); // flush
pos=0;
// don't buffer, just write to sink
sink.write(cbuf, off, len);
}
}
// in solr/core/src/java/org/apache/solr/util/FastWriter.java
Override
public void write(String str, int off, int len) throws IOException {
int space = buf.length - pos;
if (len < space) {
str.getChars(off, off+len, buf, pos);
pos += len;
} else if (len<BUFSIZE) {
// if the data to write is small enough, buffer it.
str.getChars(off, off+space, buf, pos);
sink.write(buf, 0, buf.length);
str.getChars(off+space, off+len, buf, 0);
pos = len-space;
} else {
sink.write(buf,0,pos); // flush
pos=0;
// don't buffer, just write to sink
sink.write(str, off, len);
}
}
// in solr/core/src/java/org/apache/solr/util/FastWriter.java
Override
public void flush() throws IOException {
sink.write(buf,0,pos);
pos=0;
sink.flush();
}
// in solr/core/src/java/org/apache/solr/util/FastWriter.java
Override
public void close() throws IOException {
flush();
sink.close();
}
// in solr/core/src/java/org/apache/solr/util/FastWriter.java
public void flushBuffer() throws IOException {
sink.write(buf, 0, pos);
pos=0;
}
// in solr/core/src/java/org/apache/solr/util/SystemIdResolver.java
URI resolveRelativeURI(String baseURI, String systemId) throws IOException,URISyntaxException {
URI uri;
// special case for backwards compatibility: if relative systemId starts with "/" (we convert that to an absolute solrres:-URI)
if (systemId.startsWith("/")) {
uri = new URI(RESOURCE_LOADER_URI_SCHEME, RESOURCE_LOADER_AUTHORITY_ABSOLUTE, "/", null, null).resolve(systemId);
} else {
// simply parse as URI
uri = new URI(systemId);
}
// do relative resolving
if (baseURI != null ) {
uri = new URI(baseURI).resolve(uri);
}
return uri;
}
// in solr/core/src/java/org/apache/solr/util/SystemIdResolver.java
public InputSource resolveEntity(String name, String publicId, String baseURI, String systemId) throws IOException {
if (systemId == null)
return null;
try {
final URI uri = resolveRelativeURI(baseURI, systemId);
// check schema and resolve with ResourceLoader
if (RESOURCE_LOADER_URI_SCHEME.equals(uri.getScheme())) {
String path = uri.getPath(), authority = uri.getAuthority();
if (!RESOURCE_LOADER_AUTHORITY_ABSOLUTE.equals(authority)) {
path = path.substring(1);
}
try {
final InputSource is = new InputSource(loader.openResource(path));
is.setSystemId(uri.toASCIIString());
is.setPublicId(publicId);
return is;
} catch (RuntimeException re) {
// unfortunately XInclude fallback only works with IOException, but openResource() never throws that one
throw (IOException) (new IOException(re.getMessage()).initCause(re));
}
} else {
// resolve all other URIs using the standard resolver
return null;
}
} catch (URISyntaxException use) {
log.warn("An URI systax problem occurred during resolving SystemId, falling back to default resolver", use);
return null;
}
}
// in solr/core/src/java/org/apache/solr/util/SystemIdResolver.java
public InputSource resolveEntity(String publicId, String systemId) throws IOException {
return resolveEntity(null, publicId, null, systemId);
}
// in solr/core/src/java/org/apache/solr/util/xslt/TransformerProvider.java
public synchronized Transformer getTransformer(SolrConfig solrConfig, String filename,int cacheLifetimeSeconds) throws IOException {
// For now, the Templates are blindly reloaded once cacheExpires is over.
// It'd be better to check the file modification time to reload only if needed.
if(lastTemplates!=null && filename.equals(lastFilename) && System.currentTimeMillis() < cacheExpires) {
if(log.isDebugEnabled()) {
log.debug("Using cached Templates:" + filename);
}
} else {
lastTemplates = getTemplates(solrConfig.getResourceLoader(), filename,cacheLifetimeSeconds);
}
Transformer result = null;
try {
result = lastTemplates.newTransformer();
} catch(TransformerConfigurationException tce) {
log.error(getClass().getName(), "getTransformer", tce);
final IOException ioe = new IOException("newTransformer fails ( " + lastFilename + ")");
ioe.initCause(tce);
throw ioe;
}
return result;
}
// in solr/core/src/java/org/apache/solr/util/xslt/TransformerProvider.java
private Templates getTemplates(ResourceLoader loader, String filename,int cacheLifetimeSeconds) throws IOException {
Templates result = null;
lastFilename = null;
try {
if(log.isDebugEnabled()) {
log.debug("compiling XSLT templates:" + filename);
}
final String fn = "xslt/" + filename;
final TransformerFactory tFactory = TransformerFactory.newInstance();
tFactory.setURIResolver(new SystemIdResolver(loader).asURIResolver());
tFactory.setErrorListener(xmllog);
final StreamSource src = new StreamSource(loader.openResource(fn),
SystemIdResolver.createSystemIdFromResourceName(fn));
try {
result = tFactory.newTemplates(src);
} finally {
// some XML parsers are broken and don't close the byte stream (but they should according to spec)
IOUtils.closeQuietly(src.getInputStream());
}
} catch (Exception e) {
log.error(getClass().getName(), "newTemplates", e);
final IOException ioe = new IOException("Unable to initialize Templates '" + filename + "'");
ioe.initCause(e);
throw ioe;
}
lastFilename = filename;
lastTemplates = result;
cacheExpires = System.currentTimeMillis() + (cacheLifetimeSeconds * 1000);
return result;
}
// in solr/core/src/java/org/apache/solr/util/SuggestMissingFactories.java
public static void main(String[] args) throws ClassNotFoundException, IOException, NoSuchMethodException {
final File[] files = new File[args.length];
for (int i = 0; i < args.length; i++) {
files[i] = new File(args[i]);
}
final FindClasses finder = new FindClasses(files);
final ClassLoader cl = finder.getClassLoader();
final Class TOKENSTREAM
= cl.loadClass("org.apache.lucene.analysis.TokenStream");
final Class TOKENIZER
= cl.loadClass("org.apache.lucene.analysis.Tokenizer");
final Class TOKENFILTER
= cl.loadClass("org.apache.lucene.analysis.TokenFilter");
final Class TOKENIZERFACTORY
= cl.loadClass("org.apache.solr.analysis.TokenizerFactory");
final Class TOKENFILTERFACTORY
= cl.loadClass("org.apache.solr.analysis.TokenFilterFactory");
final HashSet<Class> result
= new HashSet<Class>(finder.findExtends(TOKENIZER));
result.addAll(finder.findExtends(TOKENFILTER));
result.removeAll(finder.findMethodReturns
(finder.findExtends(TOKENIZERFACTORY),
"create",
Reader.class).values());
result.removeAll(finder.findMethodReturns
(finder.findExtends(TOKENFILTERFACTORY),
"create",
TOKENSTREAM).values());
for (final Class c : result) {
System.out.println(c.getName());
}
}
// in solr/core/src/java/org/apache/solr/util/SuggestMissingFactories.java
public static void main(String[] args)
throws ClassNotFoundException, IOException, NoSuchMethodException {
FindClasses finder = new FindClasses(new File(args[1]));
ClassLoader cl = finder.getClassLoader();
Class clazz = cl.loadClass(args[0]);
if (args.length == 2) {
System.out.println("Finding all extenders of " + clazz.getName());
for (Class c : finder.findExtends(clazz)) {
System.out.println(c.getName());
}
} else {
String methName = args[2];
System.out.println("Finding all extenders of " + clazz.getName() +
" with method: " + methName);
Class[] methArgs = new Class[args.length-3];
for (int i = 3; i < args.length; i++) {
methArgs[i-3] = cl.loadClass(args[i]);
}
Map<Class,Class> map = finder.findMethodReturns
(finder.findExtends(clazz),methName, methArgs);
for (Class key : map.keySet()) {
System.out.println(key.getName() + " => " + map.get(key).getName());
}
}
}
// in solr/core/src/java/org/apache/solr/util/SimplePostTool.java
private static void pipe(InputStream source, OutputStream dest) throws IOException {
byte[] buf = new byte[1024];
int read = 0;
while ( (read = source.read(buf) ) >= 0) {
if (null != dest) dest.write(buf, 0, read);
}
if (null != dest) dest.flush();
}
// in solr/core/src/java/org/apache/solr/util/FileUtils.java
public static void copyFile(File src , File destination) throws IOException {
FileChannel in = null;
FileChannel out = null;
try {
in = new FileInputStream(src).getChannel();
out = new FileOutputStream(destination).getChannel();
in.transferTo(0, in.size(), out);
} finally {
try { if (in != null) in.close(); } catch (IOException e) {}
try { if (out != null) out.close(); } catch (IOException e) {}
}
}
// in solr/core/src/java/org/apache/solr/util/FileUtils.java
public static void sync(File fullFile) throws IOException {
if (fullFile == null || !fullFile.exists())
throw new FileNotFoundException("File does not exist " + fullFile);
boolean success = false;
int retryCount = 0;
IOException exc = null;
while(!success && retryCount < 5) {
retryCount++;
RandomAccessFile file = null;
try {
try {
file = new RandomAccessFile(fullFile, "rw");
file.getFD().sync();
success = true;
} finally {
if (file != null)
file.close();
}
} catch (IOException ioe) {
if (exc == null)
exc = ioe;
try {
// Pause 5 msec
Thread.sleep(5);
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
}
}
}
if (!success)
// Throw original exception
throw exc;
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java
Override
public final boolean incrementToken() throws IOException {
assert !enableChecks || (streamState == State.RESET || streamState == State.INCREMENT)
: "incrementToken() called while in wrong state: " + streamState;
clearAttributes();
for (;;) {
int startOffset = off;
int cp = readCodePoint();
if (cp < 0) {
break;
} else if (isTokenChar(cp)) {
int endOffset;
do {
char chars[] = Character.toChars(normalize(cp));
for (int i = 0; i < chars.length; i++)
termAtt.append(chars[i]);
endOffset = off;
if (termAtt.length() >= maxTokenLength) {
break;
}
cp = readCodePoint();
} while (cp >= 0 && isTokenChar(cp));
int correctedStartOffset = correctOffset(startOffset);
int correctedEndOffset = correctOffset(endOffset);
assert correctedStartOffset >= 0;
assert correctedEndOffset >= 0;
assert correctedStartOffset >= lastOffset;
lastOffset = correctedStartOffset;
assert correctedEndOffset >= correctedStartOffset;
offsetAtt.setOffset(correctedStartOffset, correctedEndOffset);
streamState = State.INCREMENT;
return true;
}
}
streamState = State.INCREMENT_FALSE;
return false;
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java
protected int readCodePoint() throws IOException {
int ch = input.read();
if (ch < 0) {
return ch;
} else {
assert !Character.isLowSurrogate((char) ch);
off++;
if (Character.isHighSurrogate((char) ch)) {
int ch2 = input.read();
if (ch2 >= 0) {
off++;
assert Character.isLowSurrogate((char) ch2);
return Character.toCodePoint((char) ch, (char) ch2);
}
}
return ch;
}
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java
Override
public void reset() throws IOException {
super.reset();
state = runAutomaton.getInitialState();
lastOffset = off = 0;
assert !enableChecks || streamState != State.RESET : "double reset()";
streamState = State.RESET;
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java
Override
public void close() throws IOException {
super.close();
// in some exceptional cases (e.g. TestIndexWriterExceptions) a test can prematurely close()
// these tests should disable this check, by default we check the normal workflow.
// TODO: investigate the CachingTokenFilter "double-close"... for now we ignore this
assert !enableChecks || streamState == State.END || streamState == State.CLOSE : "close() called in wrong state: " + streamState;
streamState = State.CLOSE;
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java
Override
public void reset(Reader input) throws IOException {
super.reset(input);
assert !enableChecks || streamState == State.CLOSE : "setReader() called in wrong state: " + streamState;
streamState = State.SETREADER;
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenizer.java
Override
public void end() throws IOException {
int finalOffset = correctOffset(off);
offsetAtt.setOffset(finalOffset, finalOffset);
// some tokenizers, such as limiting tokenizers, call end() before incrementToken() returns false.
// these tests should disable this check (in general you should consume the entire stream)
try {
assert !enableChecks || streamState == State.INCREMENT_FALSE : "end() called before incrementToken() returned false!";
} finally {
streamState = State.END;
}
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/MockVariableLengthPayloadFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
random.nextBytes(bytes);
payload.setData(bytes, 0, random.nextInt(MAXLENGTH));
payloadAtt.setPayload(payload);
return true;
} else {
return false;
}
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/LookaheadTokenFilter.java
protected void insertToken() throws IOException {
if (tokenPending) {
positions.get(inputPos).add(captureState());
tokenPending = false;
}
assert !insertPending;
insertPending = true;
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/LookaheadTokenFilter.java
protected void afterPosition() throws IOException {
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/LookaheadTokenFilter.java
protected boolean peekToken() throws IOException {
if (DEBUG) {
System.out.println("LTF.peekToken inputPos=" + inputPos + " outputPos=" + outputPos + " tokenPending=" + tokenPending);
}
assert !end;
assert inputPos == -1 || outputPos <= inputPos;
if (tokenPending) {
positions.get(inputPos).add(captureState());
tokenPending = false;
}
final boolean gotToken = input.incrementToken();
if (DEBUG) {
System.out.println(" input.incrToken() returned " + gotToken);
}
if (gotToken) {
inputPos += posIncAtt.getPositionIncrement();
assert inputPos >= 0;
if (DEBUG) {
System.out.println(" now inputPos=" + inputPos);
}
final Position startPosData = positions.get(inputPos);
final Position endPosData = positions.get(inputPos + posLenAtt.getPositionLength());
final int startOffset = offsetAtt.startOffset();
if (startPosData.startOffset == -1) {
startPosData.startOffset = startOffset;
} else {
// Make sure our input isn't messing up offsets:
assert startPosData.startOffset == startOffset: "prev startOffset=" + startPosData.startOffset + " vs new startOffset=" + startOffset + " inputPos=" + inputPos;
}
final int endOffset = offsetAtt.endOffset();
if (endPosData.endOffset == -1) {
endPosData.endOffset = endOffset;
} else {
// Make sure our input isn't messing up offsets:
assert endPosData.endOffset == endOffset: "prev endOffset=" + endPosData.endOffset + " vs new endOffset=" + endOffset + " inputPos=" + inputPos;
}
tokenPending = true;
} else {
end = true;
}
return gotToken;
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/LookaheadTokenFilter.java
protected boolean nextToken() throws IOException {
//System.out.println(" nextToken: tokenPending=" + tokenPending);
if (DEBUG) {
System.out.println("LTF.nextToken inputPos=" + inputPos + " outputPos=" + outputPos + " tokenPending=" + tokenPending);
}
Position posData = positions.get(outputPos);
// While loop here in case we have to
// skip over a hole from the input:
while (true) {
//System.out.println(" check buffer @ outputPos=" +
//outputPos + " inputPos=" + inputPos + " nextRead=" +
//posData.nextRead + " vs size=" +
//posData.inputTokens.size());
// See if we have a previously buffered token to
// return at the current position:
if (posData.nextRead < posData.inputTokens.size()) {
if (DEBUG) {
System.out.println(" return previously buffered token");
}
// This position has buffered tokens to serve up:
if (tokenPending) {
positions.get(inputPos).add(captureState());
tokenPending = false;
}
restoreState(positions.get(outputPos).nextState());
//System.out.println(" return!");
return true;
}
if (inputPos == -1 || outputPos == inputPos) {
// No more buffered tokens:
// We may still get input tokens at this position
//System.out.println(" break buffer");
if (tokenPending) {
// Fast path: just return token we had just incr'd,
// without having captured/restored its state:
if (DEBUG) {
System.out.println(" pass-through: return pending token");
}
tokenPending = false;
return true;
} else if (end || !peekToken()) {
if (DEBUG) {
System.out.println(" END");
}
return false;
}
} else {
if (posData.startOffset != -1) {
// This position had at least one token leaving
if (DEBUG) {
System.out.println(" call afterPosition");
}
afterPosition();
if (insertPending) {
// Subclass inserted a token at this same
// position:
if (DEBUG) {
System.out.println(" return inserted token");
}
assert insertedTokenConsistent();
insertPending = false;
return true;
}
}
// Done with this position; move on:
outputPos++;
if (DEBUG) {
System.out.println(" next position: outputPos=" + outputPos);
}
positions.freeBefore(outputPos);
posData = positions.get(outputPos);
}
}
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/LookaheadTokenFilter.java
Override
public void reset() throws IOException {
super.reset();
positions.reset();
inputPos = -1;
outputPos = 0;
tokenPending = false;
end = false;
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/VocabularyAssert.java
public static void assertVocabulary(Analyzer a, InputStream voc, InputStream out)
throws IOException {
BufferedReader vocReader = new BufferedReader(
new InputStreamReader(voc, "UTF-8"));
BufferedReader outputReader = new BufferedReader(
new InputStreamReader(out, "UTF-8"));
String inputWord = null;
while ((inputWord = vocReader.readLine()) != null) {
String expectedWord = outputReader.readLine();
Assert.assertNotNull(expectedWord);
BaseTokenStreamTestCase.checkOneTermReuse(a, inputWord, expectedWord);
}
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/VocabularyAssert.java
public static void assertVocabulary(Analyzer a, InputStream vocOut)
throws IOException {
BufferedReader vocReader = new BufferedReader(
new InputStreamReader(vocOut, "UTF-8"));
String inputLine = null;
while ((inputLine = vocReader.readLine()) != null) {
if (inputLine.startsWith("#") || inputLine.trim().length() == 0)
continue; /* comment */
String words[] = inputLine.split("\t");
BaseTokenStreamTestCase.checkOneTermReuse(a, words[0], words[1]);
}
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/VocabularyAssert.java
public static void assertVocabulary(Analyzer a, File zipFile, String voc, String out)
throws IOException {
ZipFile zip = new ZipFile(zipFile);
InputStream v = zip.getInputStream(zip.getEntry(voc));
InputStream o = zip.getInputStream(zip.getEntry(out));
assertVocabulary(a, v, o);
v.close();
o.close();
zip.close();
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/VocabularyAssert.java
public static void assertVocabulary(Analyzer a, File zipFile, String vocOut)
throws IOException {
ZipFile zip = new ZipFile(zipFile);
InputStream vo = zip.getInputStream(zip.getEntry(vocOut));
assertVocabulary(a, vo);
vo.close();
zip.close();
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/MockPayloadAnalyzer.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
payloadAttr.setPayload(new Payload(("pos: " + pos).getBytes()));
int posIncr;
if (i % 2 == 1) {
posIncr = 1;
} else {
posIncr = 0;
}
posIncrAttr.setPositionIncrement(posIncr);
pos += posIncr;
i++;
return true;
} else {
return false;
}
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/MockPayloadAnalyzer.java
Override
public void reset() throws IOException {
super.reset();
i = 0;
pos = 0;
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/MockRandomLookaheadTokenFilter.java
Override
protected void afterPosition() throws IOException {
if (!end && random.nextInt(4) == 2) {
peekToken();
}
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/MockRandomLookaheadTokenFilter.java
Override
public boolean incrementToken() throws IOException {
if (DEBUG) {
System.out.println("\n" + Thread.currentThread().getName() + ": MRLTF.incrToken");
}
if (!end) {
while (true) {
if (random.nextInt(3) == 1) {
if (!peekToken()) {
if (DEBUG) {
System.out.println(" peek; inputPos=" + inputPos + " END");
}
break;
}
if (DEBUG) {
System.out.println(" peek; inputPos=" + inputPos + " token=" + termAtt);
}
} else {
if (DEBUG) {
System.out.println(" done peek");
}
break;
}
}
}
final boolean result = nextToken();
if (result) {
if (DEBUG) {
System.out.println(" return nextToken token=" + termAtt);
}
} else {
if (DEBUG) {
System.out.println(" return nextToken END");
}
}
return result;
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/MockRandomLookaheadTokenFilter.java
Override
public void reset() throws IOException {
super.reset();
random.setSeed(seed);
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/CannedTokenStream.java
Override
public boolean incrementToken() throws IOException {
if (upto < tokens.length) {
final Token token = tokens[upto++];
// TODO: can we just capture/restoreState so
// we get all attrs...?
clearAttributes();
termAtt.setEmpty();
termAtt.append(token.toString());
posIncrAtt.setPositionIncrement(token.getPositionIncrement());
posLengthAtt.setPositionLength(token.getPositionLength());
offsetAtt.setOffset(token.startOffset(), token.endOffset());
payloadAtt.setPayload(token.getPayload());
return true;
} else {
return false;
}
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/MockHoleInjectingTokenFilter.java
Override
public void reset() throws IOException {
super.reset();
random = new Random(randomSeed);
maxPos = -1;
pos = -1;
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/MockHoleInjectingTokenFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
final int posInc = posIncAtt.getPositionIncrement();
int nextPos = pos + posInc;
// Carefully inject a hole only where it won't mess up
// the graph:
if (posInc > 0 && maxPos <= nextPos && random.nextInt(5) == 3) {
final int holeSize = _TestUtil.nextInt(random, 1, 5);
posIncAtt.setPositionIncrement(posInc + holeSize);
nextPos += holeSize;
}
pos = nextPos;
maxPos = Math.max(maxPos, pos + posLenAtt.getPositionLength());
return true;
} else {
return false;
}
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/MockTokenFilter.java
Override
public boolean incrementToken() throws IOException {
// TODO: fix me when posInc=false, to work like FilteringTokenFilter in that case and not return
// initial token with posInc=0 ever
// return the first non-stop word found
int skippedPositions = 0;
while (input.incrementToken()) {
if (!filter.run(termAtt.buffer(), 0, termAtt.length())) {
if (enablePositionIncrements) {
posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
}
return true;
}
skippedPositions += posIncrAtt.getPositionIncrement();
}
// reached EOS -- return false
return false;
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/EmptyTokenizer.java
Override
public boolean incrementToken() throws IOException {
return false;
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], int posLengths[], Integer finalOffset,
boolean offsetsAreCorrect) throws IOException {
assertNotNull(output);
CheckClearAttributesAttribute checkClearAtt = ts.addAttribute(CheckClearAttributesAttribute.class);
assertTrue("has no CharTermAttribute", ts.hasAttribute(CharTermAttribute.class));
CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt = null;
if (startOffsets != null || endOffsets != null || finalOffset != null) {
assertTrue("has no OffsetAttribute", ts.hasAttribute(OffsetAttribute.class));
offsetAtt = ts.getAttribute(OffsetAttribute.class);
}
TypeAttribute typeAtt = null;
if (types != null) {
assertTrue("has no TypeAttribute", ts.hasAttribute(TypeAttribute.class));
typeAtt = ts.getAttribute(TypeAttribute.class);
}
PositionIncrementAttribute posIncrAtt = null;
if (posIncrements != null) {
assertTrue("has no PositionIncrementAttribute", ts.hasAttribute(PositionIncrementAttribute.class));
posIncrAtt = ts.getAttribute(PositionIncrementAttribute.class);
}
PositionLengthAttribute posLengthAtt = null;
if (posLengths != null) {
assertTrue("has no PositionLengthAttribute", ts.hasAttribute(PositionLengthAttribute.class));
posLengthAtt = ts.getAttribute(PositionLengthAttribute.class);
}
// Maps position to the start/end offset:
final Map<Integer,Integer> posToStartOffset = new HashMap<Integer,Integer>();
final Map<Integer,Integer> posToEndOffset = new HashMap<Integer,Integer>();
ts.reset();
int pos = -1;
int lastStartOffset = 0;
for (int i = 0; i < output.length; i++) {
// extra safety to enforce, that the state is not preserved and also assign bogus values
ts.clearAttributes();
termAtt.setEmpty().append("bogusTerm");
if (offsetAtt != null) offsetAtt.setOffset(14584724,24683243);
if (typeAtt != null) typeAtt.setType("bogusType");
if (posIncrAtt != null) posIncrAtt.setPositionIncrement(45987657);
if (posLengthAtt != null) posLengthAtt.setPositionLength(45987653);
checkClearAtt.getAndResetClearCalled(); // reset it, because we called clearAttribute() before
assertTrue("token "+i+" does not exist", ts.incrementToken());
assertTrue("clearAttributes() was not called correctly in TokenStream chain", checkClearAtt.getAndResetClearCalled());
assertEquals("term "+i, output[i], termAtt.toString());
if (startOffsets != null)
assertEquals("startOffset "+i, startOffsets[i], offsetAtt.startOffset());
if (endOffsets != null)
assertEquals("endOffset "+i, endOffsets[i], offsetAtt.endOffset());
if (types != null)
assertEquals("type "+i, types[i], typeAtt.type());
if (posIncrements != null)
assertEquals("posIncrement "+i, posIncrements[i], posIncrAtt.getPositionIncrement());
if (posLengths != null)
assertEquals("posLength "+i, posLengths[i], posLengthAtt.getPositionLength());
// we can enforce some basic things about a few attributes even if the caller doesn't check:
if (offsetAtt != null) {
final int startOffset = offsetAtt.startOffset();
final int endOffset = offsetAtt.endOffset();
assertTrue("startOffset must be >= 0", startOffset >= 0);
assertTrue("endOffset must be >= 0", endOffset >= 0);
assertTrue("endOffset must be >= startOffset, got startOffset=" + startOffset + ",endOffset=" + endOffset,
endOffset >= startOffset);
if (finalOffset != null) {
assertTrue("startOffset must be <= finalOffset", startOffset <= finalOffset.intValue());
assertTrue("endOffset must be <= finalOffset: got endOffset=" + endOffset + " vs finalOffset=" + finalOffset.intValue(),
endOffset <= finalOffset.intValue());
}
if (offsetsAreCorrect) {
assertTrue("offsets must not go backwards startOffset=" + startOffset + " is < lastStartOffset=" + lastStartOffset, offsetAtt.startOffset() >= lastStartOffset);
lastStartOffset = offsetAtt.startOffset();
}
if (offsetsAreCorrect && posLengthAtt != null && posIncrAtt != null) {
// Validate offset consistency in the graph, ie
// all tokens leaving from a certain pos have the
// same startOffset, and all tokens arriving to a
// certain pos have the same endOffset:
final int posInc = posIncrAtt.getPositionIncrement();
pos += posInc;
final int posLength = posLengthAtt.getPositionLength();
if (!posToStartOffset.containsKey(pos)) {
// First time we've seen a token leaving from this position:
posToStartOffset.put(pos, startOffset);
//System.out.println(" + s " + pos + " -> " + startOffset);
} else {
// We've seen a token leaving from this position
// before; verify the startOffset is the same:
//System.out.println(" + vs " + pos + " -> " + startOffset);
assertEquals("pos=" + pos + " posLen=" + posLength + " token=" + termAtt, posToStartOffset.get(pos).intValue(), startOffset);
}
final int endPos = pos + posLength;
if (!posToEndOffset.containsKey(endPos)) {
// First time we've seen a token arriving to this position:
posToEndOffset.put(endPos, endOffset);
//System.out.println(" + e " + endPos + " -> " + endOffset);
} else {
// We've seen a token arriving to this position
// before; verify the endOffset is the same:
//System.out.println(" + ve " + endPos + " -> " + endOffset);
assertEquals("pos=" + pos + " posLen=" + posLength + " token=" + termAtt, posToEndOffset.get(endPos).intValue(), endOffset);
}
}
}
if (posIncrAtt != null) {
if (i == 0) {
assertTrue("first posIncrement must be >= 1", posIncrAtt.getPositionIncrement() >= 1);
} else {
assertTrue("posIncrement must be >= 0", posIncrAtt.getPositionIncrement() >= 0);
}
}
if (posLengthAtt != null) {
assertTrue("posLength must be >= 1", posLengthAtt.getPositionLength() >= 1);
}
}
assertFalse("TokenStream has more tokens than expected (expected count=" + output.length + ")", ts.incrementToken());
ts.end();
if (finalOffset != null) {
assertEquals("finalOffset ", finalOffset.intValue(), offsetAtt.endOffset());
}
if (offsetAtt != null) {
assertTrue("finalOffset must be >= 0", offsetAtt.endOffset() >= 0);
}
ts.close();
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], int posLengths[], Integer finalOffset) throws IOException {
assertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements, posLengths, finalOffset, true);
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], Integer finalOffset) throws IOException {
assertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements, null, finalOffset);
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[]) throws IOException {
assertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements, null, null);
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
public static void assertTokenStreamContents(TokenStream ts, String[] output) throws IOException {
assertTokenStreamContents(ts, output, null, null, null, null, null, null);
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
public static void assertTokenStreamContents(TokenStream ts, String[] output, String[] types) throws IOException {
assertTokenStreamContents(ts, output, null, null, types, null, null, null);
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
public static void assertTokenStreamContents(TokenStream ts, String[] output, int[] posIncrements) throws IOException {
assertTokenStreamContents(ts, output, null, null, null, posIncrements, null, null);
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[]) throws IOException {
assertTokenStreamContents(ts, output, startOffsets, endOffsets, null, null, null, null);
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], Integer finalOffset) throws IOException {
assertTokenStreamContents(ts, output, startOffsets, endOffsets, null, null, null, finalOffset);
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], int[] posIncrements) throws IOException {
assertTokenStreamContents(ts, output, startOffsets, endOffsets, null, posIncrements, null, null);
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], int[] posIncrements, Integer finalOffset) throws IOException {
assertTokenStreamContents(ts, output, startOffsets, endOffsets, null, posIncrements, null, finalOffset);
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], int[] posIncrements, int[] posLengths, Integer finalOffset) throws IOException {
assertTokenStreamContents(ts, output, startOffsets, endOffsets, null, posIncrements, posLengths, finalOffset);
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[]) throws IOException {
assertTokenStreamContents(a.tokenStream("dummy", new StringReader(input)), output, startOffsets, endOffsets, types, posIncrements, null, input.length());
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], int posLengths[]) throws IOException {
assertTokenStreamContents(a.tokenStream("dummy", new StringReader(input)), output, startOffsets, endOffsets, types, posIncrements, posLengths, input.length());
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], int posLengths[], boolean offsetsAreCorrect) throws IOException {
assertTokenStreamContents(a.tokenStream("dummy", new StringReader(input)), output, startOffsets, endOffsets, types, posIncrements, posLengths, input.length(), offsetsAreCorrect);
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
public static void assertAnalyzesTo(Analyzer a, String input, String[] output) throws IOException {
assertAnalyzesTo(a, input, output, null, null, null, null, null);
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
public static void assertAnalyzesTo(Analyzer a, String input, String[] output, String[] types) throws IOException {
assertAnalyzesTo(a, input, output, null, null, types, null, null);
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int[] posIncrements) throws IOException {
assertAnalyzesTo(a, input, output, null, null, null, posIncrements, null);
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
public static void assertAnalyzesToPositions(Analyzer a, String input, String[] output, int[] posIncrements, int[] posLengths) throws IOException {
assertAnalyzesTo(a, input, output, null, null, null, posIncrements, posLengths);
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[]) throws IOException {
assertAnalyzesTo(a, input, output, startOffsets, endOffsets, null, null, null);
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], int[] posIncrements) throws IOException {
assertAnalyzesTo(a, input, output, startOffsets, endOffsets, null, posIncrements, null);
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[]) throws IOException {
assertTokenStreamContents(a.tokenStream("dummy", new StringReader(input)), output, startOffsets, endOffsets, types, posIncrements, null, input.length());
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output) throws IOException {
assertAnalyzesToReuse(a, input, output, null, null, null, null);
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, String[] types) throws IOException {
assertAnalyzesToReuse(a, input, output, null, null, types, null);
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, int[] posIncrements) throws IOException {
assertAnalyzesToReuse(a, input, output, null, null, null, posIncrements);
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[]) throws IOException {
assertAnalyzesToReuse(a, input, output, startOffsets, endOffsets, null, null);
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], int[] posIncrements) throws IOException {
assertAnalyzesToReuse(a, input, output, startOffsets, endOffsets, null, posIncrements);
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
public static void checkOneTerm(Analyzer a, final String input, final String expected) throws IOException {
assertAnalyzesTo(a, input, new String[]{expected});
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
public static void checkOneTermReuse(Analyzer a, final String input, final String expected) throws IOException {
assertAnalyzesToReuse(a, input, new String[]{expected});
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
public static void checkRandomData(Random random, Analyzer a, int iterations) throws IOException {
checkRandomData(random, a, iterations, 20, false, true);
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
public static void checkRandomData(Random random, Analyzer a, int iterations, int maxWordLength) throws IOException {
checkRandomData(random, a, iterations, maxWordLength, false, true);
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
public static void checkRandomData(Random random, Analyzer a, int iterations, boolean simple) throws IOException {
checkRandomData(random, a, iterations, 20, simple, true);
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
public static void checkRandomData(Random random, Analyzer a, int iterations, int maxWordLength, boolean simple) throws IOException {
checkRandomData(random, a, iterations, maxWordLength, simple, true);
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
public static void checkRandomData(Random random, Analyzer a, int iterations, int maxWordLength, boolean simple, boolean offsetsAreCorrect) throws IOException {
long seed = random.nextLong();
boolean useCharFilter = random.nextBoolean();
checkRandomData(new Random(seed), a, iterations, maxWordLength, useCharFilter, simple, offsetsAreCorrect);
// now test with multiple threads: note we do the EXACT same thing we did before in each thread,
// so this should only really fail from another thread if its an actual thread problem
int numThreads = _TestUtil.nextInt(random, 2, 4);
AnalysisThread threads[] = new AnalysisThread[numThreads];
for (int i = 0; i < threads.length; i++) {
threads[i] = new AnalysisThread(seed, a, iterations, maxWordLength, useCharFilter, simple, offsetsAreCorrect);
}
for (int i = 0; i < threads.length; i++) {
threads[i].start();
}
for (int i = 0; i < threads.length; i++) {
try {
threads[i].join();
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
for (int i = 0; i < threads.length; i++) {
if (threads[i].failed) {
throw new RuntimeException("some thread(s) failed");
}
}
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
private static void checkRandomData(Random random, Analyzer a, int iterations, int maxWordLength, boolean useCharFilter, boolean simple, boolean offsetsAreCorrect) throws IOException {
final LineFileDocs docs = new LineFileDocs(random);
try {
for (int i = 0; i < iterations; i++) {
String text;
if (random.nextInt(10) == 7) {
// real data from linedocs
text = docs.nextDoc().get("body");
if (text.length() > maxWordLength) {
// Take a random slice from the text...:
int startPos = random.nextInt(text.length() - maxWordLength);
if (startPos > 0 && Character.isLowSurrogate(text.charAt(startPos))) {
// Take care not to split up a surrogate pair:
startPos--;
assert Character.isHighSurrogate(text.charAt(startPos));
}
int endPos = startPos + maxWordLength - 1;
if (Character.isHighSurrogate(text.charAt(endPos))) {
// Take care not to split up a surrogate pair:
endPos--;
}
text = text.substring(startPos, 1+endPos);
}
} else {
// synthetic
text = randomAnalysisString(random, maxWordLength, simple);
}
try {
checkAnalysisConsistency(random, a, useCharFilter, text, offsetsAreCorrect);
} catch (Throwable t) {
// TODO: really we should pass a random seed to
// checkAnalysisConsistency then print it here too:
System.err.println("TEST FAIL: useCharFilter=" + useCharFilter + " text='" + escape(text) + "'");
Rethrow.rethrow(t);
}
}
} finally {
IOUtils.closeWhileHandlingException(docs);
}
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
public static void checkAnalysisConsistency(Random random, Analyzer a, boolean useCharFilter, String text) throws IOException {
checkAnalysisConsistency(random, a, useCharFilter, text, true);
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
public static void checkAnalysisConsistency(Random random, Analyzer a, boolean useCharFilter, String text, boolean offsetsAreCorrect) throws IOException {
if (VERBOSE) {
System.out.println(Thread.currentThread().getName() + ": NOTE: BaseTokenStreamTestCase: get first token stream now text=" + text);
}
int remainder = random.nextInt(10);
Reader reader = new StringReader(text);
TokenStream ts = a.tokenStream("dummy", useCharFilter ? new MockCharFilter(reader, remainder) : reader);
assertTrue("has no CharTermAttribute", ts.hasAttribute(CharTermAttribute.class));
CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt = ts.hasAttribute(OffsetAttribute.class) ? ts.getAttribute(OffsetAttribute.class) : null;
PositionIncrementAttribute posIncAtt = ts.hasAttribute(PositionIncrementAttribute.class) ? ts.getAttribute(PositionIncrementAttribute.class) : null;
PositionLengthAttribute posLengthAtt = ts.hasAttribute(PositionLengthAttribute.class) ? ts.getAttribute(PositionLengthAttribute.class) : null;
TypeAttribute typeAtt = ts.hasAttribute(TypeAttribute.class) ? ts.getAttribute(TypeAttribute.class) : null;
List<String> tokens = new ArrayList<String>();
List<String> types = new ArrayList<String>();
List<Integer> positions = new ArrayList<Integer>();
List<Integer> positionLengths = new ArrayList<Integer>();
List<Integer> startOffsets = new ArrayList<Integer>();
List<Integer> endOffsets = new ArrayList<Integer>();
ts.reset();
// First pass: save away "correct" tokens
while (ts.incrementToken()) {
tokens.add(termAtt.toString());
if (typeAtt != null) types.add(typeAtt.type());
if (posIncAtt != null) positions.add(posIncAtt.getPositionIncrement());
if (posLengthAtt != null) positionLengths.add(posLengthAtt.getPositionLength());
if (offsetAtt != null) {
startOffsets.add(offsetAtt.startOffset());
endOffsets.add(offsetAtt.endOffset());
}
}
ts.end();
ts.close();
// verify reusing is "reproducable" and also get the normal tokenstream sanity checks
if (!tokens.isEmpty()) {
// KWTokenizer (for example) can produce a token
// even when input is length 0:
if (text.length() != 0) {
// (Optional) second pass: do something evil:
final int evilness = random.nextInt(50);
if (evilness == 17) {
if (VERBOSE) {
System.out.println(Thread.currentThread().getName() + ": NOTE: BaseTokenStreamTestCase: re-run analysis w/ exception");
}
// Throw an errant exception from the Reader:
MockReaderWrapper evilReader = new MockReaderWrapper(random, new StringReader(text));
evilReader.throwExcAfterChar(random.nextInt(text.length()+1));
reader = evilReader;
try {
// NOTE: some Tokenizers go and read characters
// when you call .setReader(Reader), eg
// PatternTokenizer. This is a bit
// iffy... (really, they should only
// pull from the Reader when you call
// .incremenToken(), I think?), but we
// currently allow it, so, we must call
// a.tokenStream inside the try since we may
// hit the exc on init:
ts = a.tokenStream("dummy", useCharFilter ? new MockCharFilter(evilReader, remainder) : evilReader);
ts.reset();
while (ts.incrementToken());
fail("did not hit exception");
} catch (RuntimeException re) {
assertTrue(MockReaderWrapper.isMyEvilException(re));
}
try {
ts.end();
} catch (AssertionError ae) {
// Catch & ignore MockTokenizer's
// anger...
if ("end() called before incrementToken() returned false!".equals(ae.getMessage())) {
// OK
} else {
throw ae;
}
}
ts.close();
} else if (evilness == 7) {
// Only consume a subset of the tokens:
final int numTokensToRead = random.nextInt(tokens.size());
if (VERBOSE) {
System.out.println(Thread.currentThread().getName() + ": NOTE: BaseTokenStreamTestCase: re-run analysis, only consuming " + numTokensToRead + " of " + tokens.size() + " tokens");
}
reader = new StringReader(text);
ts = a.tokenStream("dummy", useCharFilter ? new MockCharFilter(reader, remainder) : reader);
ts.reset();
for(int tokenCount=0;tokenCount<numTokensToRead;tokenCount++) {
assertTrue(ts.incrementToken());
}
try {
ts.end();
} catch (AssertionError ae) {
// Catch & ignore MockTokenizer's
// anger...
if ("end() called before incrementToken() returned false!".equals(ae.getMessage())) {
// OK
} else {
throw ae;
}
}
ts.close();
}
}
}
// Final pass: verify clean tokenization matches
// results from first pass:
if (VERBOSE) {
System.out.println(Thread.currentThread().getName() + ": NOTE: BaseTokenStreamTestCase: re-run analysis; " + tokens.size() + " tokens");
}
reader = new StringReader(text);
if (random.nextInt(30) == 7) {
if (VERBOSE) {
System.out.println(Thread.currentThread().getName() + ": NOTE: BaseTokenStreamTestCase: using spoon-feed reader");
}
reader = new MockReaderWrapper(random, reader);
}
ts = a.tokenStream("dummy", useCharFilter ? new MockCharFilter(reader, remainder) : reader);
if (typeAtt != null && posIncAtt != null && posLengthAtt != null && offsetAtt != null) {
// offset + pos + posLength + type
assertTokenStreamContents(ts,
tokens.toArray(new String[tokens.size()]),
toIntArray(startOffsets),
toIntArray(endOffsets),
types.toArray(new String[types.size()]),
toIntArray(positions),
toIntArray(positionLengths),
text.length(),
offsetsAreCorrect);
} else if (typeAtt != null && posIncAtt != null && offsetAtt != null) {
// offset + pos + type
assertTokenStreamContents(ts,
tokens.toArray(new String[tokens.size()]),
toIntArray(startOffsets),
toIntArray(endOffsets),
types.toArray(new String[types.size()]),
toIntArray(positions),
null,
text.length(),
offsetsAreCorrect);
} else if (posIncAtt != null && posLengthAtt != null && offsetAtt != null) {
// offset + pos + posLength
assertTokenStreamContents(ts,
tokens.toArray(new String[tokens.size()]),
toIntArray(startOffsets),
toIntArray(endOffsets),
null,
toIntArray(positions),
toIntArray(positionLengths),
text.length(),
offsetsAreCorrect);
} else if (posIncAtt != null && offsetAtt != null) {
// offset + pos
assertTokenStreamContents(ts,
tokens.toArray(new String[tokens.size()]),
toIntArray(startOffsets),
toIntArray(endOffsets),
null,
toIntArray(positions),
null,
text.length(),
offsetsAreCorrect);
} else if (offsetAtt != null) {
// offset
assertTokenStreamContents(ts,
tokens.toArray(new String[tokens.size()]),
toIntArray(startOffsets),
toIntArray(endOffsets),
null,
null,
null,
text.length(),
offsetsAreCorrect);
} else {
// terms only
assertTokenStreamContents(ts,
tokens.toArray(new String[tokens.size()]));
}
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
protected String toDot(Analyzer a, String inputText) throws IOException {
final StringWriter sw = new StringWriter();
final TokenStream ts = a.tokenStream("field", new StringReader(inputText));
ts.reset();
new TokenStreamToDot(inputText, ts, new PrintWriter(sw)).toDot();
return sw.toString();
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
protected void toDotFile(Analyzer a, String inputText, String localFileName) throws IOException {
Writer w = new OutputStreamWriter(new FileOutputStream(localFileName), "UTF-8");
final TokenStream ts = a.tokenStream("field", new StringReader(inputText));
ts.reset();
new TokenStreamToDot(inputText, ts, new PrintWriter(w)).toDot();
w.close();
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/TokenStreamToDot.java
public void toDot() throws IOException {
in.reset();
writeHeader();
// TODO: is there some way to tell dot that it should
// make the "main path" a straight line and have the
// non-sausage arcs not affect node placement...
int pos = -1;
int lastEndPos = -1;
while (in.incrementToken()) {
final boolean isFirst = pos == -1;
int posInc = posIncAtt.getPositionIncrement();
if (isFirst && posInc == 0) {
// TODO: hmm are TS's still allowed to do this...?
System.err.println("WARNING: first posInc was 0; correcting to 1");
posInc = 1;
}
if (posInc > 0) {
// New node:
pos += posInc;
writeNode(pos, Integer.toString(pos));
}
if (posInc > 1) {
// Gap!
writeArc(lastEndPos, pos, null, "dotted");
}
if (isFirst) {
writeNode(-1, null);
writeArc(-1, pos, null, null);
}
String arcLabel = termAtt.toString();
if (offsetAtt != null) {
final int startOffset = offsetAtt.startOffset();
final int endOffset = offsetAtt.endOffset();
//System.out.println("start=" + startOffset + " end=" + endOffset + " len=" + inputText.length());
if (inputText != null) {
arcLabel += " / " + inputText.substring(startOffset, endOffset);
} else {
arcLabel += " / " + startOffset + "-" + endOffset;
}
}
writeArc(pos, pos + posLengthAtt.getPositionLength(), arcLabel, null);
lastEndPos = pos + posLengthAtt.getPositionLength();
}
in.end();
if (lastEndPos != -1) {
// TODO: should we output any final text (from end
// offsets) on this arc...?
writeNode(-2, null);
writeArc(lastEndPos, -2, null, null);
}
writeTrailer();
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/MockCharFilter.java
Override
public void close() throws IOException {
in.close();
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/MockCharFilter.java
Override
public int read() throws IOException {
// we have a buffered character, add an offset correction and return it
if (bufferedCh >= 0) {
int ch = bufferedCh;
bufferedCh = -1;
currentOffset++;
addOffCorrectMap(currentOffset, delta-1);
delta--;
return ch;
}
// otherwise actually read one
int ch = in.read();
if (ch < 0)
return ch;
currentOffset++;
if ((ch % 10) != remainder || Character.isHighSurrogate((char)ch) || Character.isLowSurrogate((char)ch)) {
return ch;
}
// we will double this character, so buffer it.
bufferedCh = ch;
return ch;
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/MockCharFilter.java
Override
public int read(char[] cbuf, int off, int len) throws IOException {
int numRead = 0;
for (int i = off; i < off + len; i++) {
int c = read();
if (c == -1) break;
cbuf[i] = (char) c;
numRead++;
}
return numRead == 0 ? -1 : numRead;
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/CollationTestBase.java
private void assertMatches(IndexSearcher searcher, Query query, Sort sort,
String expectedResult) throws IOException {
ScoreDoc[] result = searcher.search(query, null, 1000, sort).scoreDocs;
StringBuilder buff = new StringBuilder(10);
int n = result.length;
for (int i = 0 ; i < n ; ++i) {
Document doc = searcher.doc(result[i].doc);
IndexableField[] v = doc.getFields("tracer");
for (int j = 0 ; j < v.length ; ++j) {
buff.append(v[j].stringValue());
}
}
assertEquals(expectedResult, buff.toString());
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java
Override
public boolean incrementToken() throws IOException {
if (!input.incrementToken()) {
return false;
}
int startOffset = 0;
int endOffset = 0;
int posLen = 0;
if (posIncAtt != null) {
pos += posIncAtt.getPositionIncrement();
if (pos == -1) {
throw new IllegalStateException("first posInc must be > 0");
}
}
// System.out.println(" got token=" + termAtt + " pos=" + pos);
if (offsetAtt != null) {
startOffset = offsetAtt.startOffset();
endOffset = offsetAtt.endOffset();
if (startOffset < 0) {
throw new IllegalStateException(name + ": startOffset=" + startOffset + " is < 0");
}
if (endOffset < 0) {
throw new IllegalStateException(name + ": endOffset=" + endOffset + " is < 0");
}
if (endOffset < startOffset) {
throw new IllegalStateException(name + ": startOffset=" + startOffset + " is > endOffset=" + endOffset + " pos=" + pos + "; token=" + termAtt);
}
if (offsetsAreCorrect && offsetAtt.startOffset() < lastStartOffset) {
throw new IllegalStateException(name + ": offsets must not go backwards startOffset=" + startOffset + " is < lastStartOffset=" + lastStartOffset);
}
lastStartOffset = offsetAtt.startOffset();
}
posLen = posLenAtt == null ? 1 : posLenAtt.getPositionLength();
if (offsetAtt != null && posIncAtt != null && offsetsAreCorrect) {
if (!posToStartOffset.containsKey(pos)) {
// First time we've seen a token leaving from this position:
posToStartOffset.put(pos, startOffset);
//System.out.println(" + s " + pos + " -> " + startOffset);
} else {
// We've seen a token leaving from this position
// before; verify the startOffset is the same:
//System.out.println(" + vs " + pos + " -> " + startOffset);
final int oldStartOffset = posToStartOffset.get(pos);
if (oldStartOffset != startOffset) {
throw new IllegalStateException(name + ": inconsistent startOffset at pos=" + pos + ": " + oldStartOffset + " vs " + startOffset + "; token=" + termAtt);
}
}
final int endPos = pos + posLen;
if (!posToEndOffset.containsKey(endPos)) {
// First time we've seen a token arriving to this position:
posToEndOffset.put(endPos, endOffset);
//System.out.println(" + e " + endPos + " -> " + endOffset);
} else {
// We've seen a token arriving to this position
// before; verify the endOffset is the same:
//System.out.println(" + ve " + endPos + " -> " + endOffset);
final int oldEndOffset = posToEndOffset.get(endPos);
if (oldEndOffset != endOffset) {
throw new IllegalStateException(name + ": inconsistent endOffset at pos=" + endPos + ": " + oldEndOffset + " vs " + endOffset + "; token=" + termAtt);
}
}
}
return true;
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java
Override
public void end() throws IOException {
super.end();
// TODO: what else to validate
// TODO: check that endOffset is >= max(endOffset)
// we've seen
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java
Override
public void reset() throws IOException {
super.reset();
pos = -1;
posToStartOffset.clear();
posToEndOffset.clear();
lastStartOffset = 0;
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/MockFixedLengthPayloadFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
random.nextBytes(bytes);
payloadAtt.setPayload(payload);
return true;
} else {
return false;
}
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/MockReaderWrapper.java
Override
public void close() throws IOException {
in.close();
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/MockReaderWrapper.java
Override
public int read(char[] cbuf, int off, int len) throws IOException {
if (throwExcNext || (excAtChar != -1 && readSoFar >= excAtChar)) {
throw new RuntimeException("fake exception now!");
}
final int read;
final int realLen;
if (len == 1) {
realLen = 1;
} else {
// Spoon-feed: intentionally maybe return less than
// the consumer asked for
realLen = _TestUtil.nextInt(random, 1, len);
}
if (excAtChar != -1) {
final int left = excAtChar - readSoFar;
assert left != 0;
read = in.read(cbuf, off, Math.min(realLen, left));
assert read != -1;
readSoFar += read;
} else {
read = in.read(cbuf, off, realLen);
}
return read;
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/MockGraphTokenFilter.java
Override
protected void afterPosition() throws IOException {
if (DEBUG) {
System.out.println("MockGraphTF.afterPos");
}
if (random.nextInt(7) == 5) {
final int posLength = _TestUtil.nextInt(random, 1, 5);
if (DEBUG) {
System.out.println(" do insert! posLen=" + posLength);
}
final Position posEndData = positions.get(outputPos + posLength);
// Look ahead as needed until we figure out the right
// endOffset:
while(!end && posEndData.endOffset == -1 && inputPos <= (outputPos + posLength)) {
if (!peekToken()) {
break;
}
}
if (posEndData.endOffset != -1) {
// Notify super class that we are injecting a token:
insertToken();
clearAttributes();
posLenAtt.setPositionLength(posLength);
termAtt.append(_TestUtil.randomUnicodeString(random));
posIncAtt.setPositionIncrement(0);
offsetAtt.setOffset(positions.get(outputPos).startOffset,
posEndData.endOffset);
if (DEBUG) {
System.out.println(" inject: outputPos=" + outputPos + " startOffset=" + offsetAtt.startOffset() +
" endOffset=" + offsetAtt.endOffset() +
" posLength=" + posLenAtt.getPositionLength());
}
// TODO: set TypeAtt too?
} else {
// Either 1) the tokens ended before our posLength,
// or 2) our posLength ended inside a hole from the
// input. In each case we just skip the inserted
// token.
}
}
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/MockGraphTokenFilter.java
Override
public void reset() throws IOException {
super.reset();
// NOTE: must be "deterministically random" because
// BaseTokenStreamTestCase pulls tokens twice on the
// same input and asserts they are the same:
this.random = new Random(seed);
}
// in lucene/test-framework/src/java/org/apache/lucene/analysis/MockGraphTokenFilter.java
Override
public boolean incrementToken() throws IOException {
if (DEBUG) {
System.out.println("MockGraphTF.incr inputPos=" + inputPos + " outputPos=" + outputPos);
}
return nextToken();
}
// in lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
public static void checkExplanations (final Query q, final IndexSearcher s) throws IOException {
CheckHits.checkExplanations(q, null, s, true);
}
// in lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
public static void purgeFieldCache(IndexReader r) throws IOException {
// this is just a hack, to get an atomic reader that contains all subreaders for insanity checks
FieldCache.DEFAULT.purge(SlowCompositeReaderWrapper.wrap(r));
}
// in lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
public static IndexSearcher wrapUnderlyingReader(Random random, final IndexSearcher s, final int edge)
throws IOException {
IndexReader r = s.getIndexReader();
// we can't put deleted docs before the nested reader, because
// it will throw off the docIds
IndexReader[] readers = new IndexReader[] {
edge < 0 ? r : emptyReaders[0],
emptyReaders[0],
new FCInvisibleMultiReader(edge < 0 ? emptyReaders[4] : emptyReaders[0],
emptyReaders[0],
0 == edge ? r : emptyReaders[0]),
0 < edge ? emptyReaders[0] : emptyReaders[7],
emptyReaders[0],
new FCInvisibleMultiReader(0 < edge ? emptyReaders[0] : emptyReaders[5],
emptyReaders[0],
0 < edge ? r : emptyReaders[0])
};
IndexSearcher out = LuceneTestCase.newSearcher(new FCInvisibleMultiReader(readers));
out.setSimilarity(s.getSimilarity());
return out;
}
// in lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
private static DirectoryReader makeEmptyIndex(Random random, final int numDeletedDocs)
throws IOException {
Directory d = new MockDirectoryWrapper(random, new RAMDirectory());
IndexWriter w = new IndexWriter(d, new IndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random)));
for (int i = 0; i < numDeletedDocs; i++) {
w.addDocument(new Document());
}
w.commit();
w.deleteDocuments( new MatchAllDocsQuery() );
_TestUtil.keepFullyDeletedSegments(w);
w.commit();
if (0 < numDeletedDocs)
Assert.assertTrue("writer has no deletions", w.hasDeletions());
Assert.assertEquals("writer is missing some deleted docs",
numDeletedDocs, w.maxDoc());
Assert.assertEquals("writer has non-deleted docs",
0, w.numDocs());
w.close();
DirectoryReader r = DirectoryReader.open(d);
Assert.assertEquals("reader has wrong number of deleted docs",
numDeletedDocs, r.numDeletedDocs());
return r;
}
// in lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
public static void checkSkipTo(final Query q, final IndexSearcher s) throws IOException {
//System.out.println("Checking "+q);
final AtomicReaderContext[] readerContextArray = s.getTopReaderContext().leaves();
if (s.createNormalizedWeight(q).scoresDocsOutOfOrder()) return; // in this case order of skipTo() might differ from that of next().
final int skip_op = 0;
final int next_op = 1;
final int orders [][] = {
{next_op},
{skip_op},
{skip_op, next_op},
{next_op, skip_op},
{skip_op, skip_op, next_op, next_op},
{next_op, next_op, skip_op, skip_op},
{skip_op, skip_op, skip_op, next_op, next_op},
};
for (int k = 0; k < orders.length; k++) {
final int order[] = orders[k];
// System.out.print("Order:");for (int i = 0; i < order.length; i++)
// System.out.print(order[i]==skip_op ? " skip()":" next()");
// System.out.println();
final int opidx[] = { 0 };
final int lastDoc[] = {-1};
// FUTURE: ensure scorer.doc()==-1
final float maxDiff = 1e-5f;
final AtomicReader lastReader[] = {null};
s.search(q, new Collector() {
private Scorer sc;
private Scorer scorer;
private int leafPtr;
@Override
public void setScorer(Scorer scorer) throws IOException {
this.sc = scorer;
}
@Override
public void collect(int doc) throws IOException {
float score = sc.score();
lastDoc[0] = doc;
try {
if (scorer == null) {
Weight w = s.createNormalizedWeight(q);
AtomicReaderContext context = readerContextArray[leafPtr];
scorer = w.scorer(context, true, false, context.reader().getLiveDocs());
}
int op = order[(opidx[0]++) % order.length];
// System.out.println(op==skip_op ?
// "skip("+(sdoc[0]+1)+")":"next()");
boolean more = op == skip_op ? scorer.advance(scorer.docID() + 1) != DocIdSetIterator.NO_MORE_DOCS
: scorer.nextDoc() != DocIdSetIterator.NO_MORE_DOCS;
int scorerDoc = scorer.docID();
float scorerScore = scorer.score();
float scorerScore2 = scorer.score();
float scoreDiff = Math.abs(score - scorerScore);
float scorerDiff = Math.abs(scorerScore2 - scorerScore);
if (!more || doc != scorerDoc || scoreDiff > maxDiff
|| scorerDiff > maxDiff) {
StringBuilder sbord = new StringBuilder();
for (int i = 0; i < order.length; i++)
sbord.append(order[i] == skip_op ? " skip()" : " next()");
throw new RuntimeException("ERROR matching docs:" + "\n\t"
+ (doc != scorerDoc ? "--> " : "") + "doc=" + doc + ", scorerDoc=" + scorerDoc
+ "\n\t" + (!more ? "--> " : "") + "tscorer.more=" + more
+ "\n\t" + (scoreDiff > maxDiff ? "--> " : "")
+ "scorerScore=" + scorerScore + " scoreDiff=" + scoreDiff
+ " maxDiff=" + maxDiff + "\n\t"
+ (scorerDiff > maxDiff ? "--> " : "") + "scorerScore2="
+ scorerScore2 + " scorerDiff=" + scorerDiff
+ "\n\thitCollector.doc=" + doc + " score=" + score
+ "\n\t Scorer=" + scorer + "\n\t Query=" + q + " "
+ q.getClass().getName() + "\n\t Searcher=" + s
+ "\n\t Order=" + sbord + "\n\t Op="
+ (op == skip_op ? " skip()" : " next()"));
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {
// confirm that skipping beyond the last doc, on the
// previous reader, hits NO_MORE_DOCS
if (lastReader[0] != null) {
final AtomicReader previousReader = lastReader[0];
IndexSearcher indexSearcher = LuceneTestCase.newSearcher(previousReader);
indexSearcher.setSimilarity(s.getSimilarity());
Weight w = indexSearcher.createNormalizedWeight(q);
AtomicReaderContext ctx = (AtomicReaderContext)indexSearcher.getTopReaderContext();
Scorer scorer = w.scorer(ctx, true, false, ctx.reader().getLiveDocs());
if (scorer != null) {
boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS;
Assert.assertFalse("query's last doc was "+ lastDoc[0] +" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more);
}
leafPtr++;
}
lastReader[0] = context.reader();
assert readerContextArray[leafPtr].reader() == context.reader();
this.scorer = null;
lastDoc[0] = -1;
}
@Override
public boolean acceptsDocsOutOfOrder() {
return true;
}
});
if (lastReader[0] != null) {
// confirm that skipping beyond the last doc, on the
// previous reader, hits NO_MORE_DOCS
final AtomicReader previousReader = lastReader[0];
IndexSearcher indexSearcher = LuceneTestCase.newSearcher(previousReader, false);
indexSearcher.setSimilarity(s.getSimilarity());
Weight w = indexSearcher.createNormalizedWeight(q);
AtomicReaderContext ctx = previousReader.getTopReaderContext();
Scorer scorer = w.scorer(ctx, true, false, ctx.reader().getLiveDocs());
if (scorer != null) {
boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS;
Assert.assertFalse("query's last doc was "+ lastDoc[0] +" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more);
}
}
}
}
// in lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
Override
public void setScorer(Scorer scorer) throws IOException {
this.sc = scorer;
}
// in lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
Override
public void collect(int doc) throws IOException {
float score = sc.score();
lastDoc[0] = doc;
try {
if (scorer == null) {
Weight w = s.createNormalizedWeight(q);
AtomicReaderContext context = readerContextArray[leafPtr];
scorer = w.scorer(context, true, false, context.reader().getLiveDocs());
}
int op = order[(opidx[0]++) % order.length];
// System.out.println(op==skip_op ?
// "skip("+(sdoc[0]+1)+")":"next()");
boolean more = op == skip_op ? scorer.advance(scorer.docID() + 1) != DocIdSetIterator.NO_MORE_DOCS
: scorer.nextDoc() != DocIdSetIterator.NO_MORE_DOCS;
int scorerDoc = scorer.docID();
float scorerScore = scorer.score();
float scorerScore2 = scorer.score();
float scoreDiff = Math.abs(score - scorerScore);
float scorerDiff = Math.abs(scorerScore2 - scorerScore);
if (!more || doc != scorerDoc || scoreDiff > maxDiff
|| scorerDiff > maxDiff) {
StringBuilder sbord = new StringBuilder();
for (int i = 0; i < order.length; i++)
sbord.append(order[i] == skip_op ? " skip()" : " next()");
throw new RuntimeException("ERROR matching docs:" + "\n\t"
+ (doc != scorerDoc ? "--> " : "") + "doc=" + doc + ", scorerDoc=" + scorerDoc
+ "\n\t" + (!more ? "--> " : "") + "tscorer.more=" + more
+ "\n\t" + (scoreDiff > maxDiff ? "--> " : "")
+ "scorerScore=" + scorerScore + " scoreDiff=" + scoreDiff
+ " maxDiff=" + maxDiff + "\n\t"
+ (scorerDiff > maxDiff ? "--> " : "") + "scorerScore2="
+ scorerScore2 + " scorerDiff=" + scorerDiff
+ "\n\thitCollector.doc=" + doc + " score=" + score
+ "\n\t Scorer=" + scorer + "\n\t Query=" + q + " "
+ q.getClass().getName() + "\n\t Searcher=" + s
+ "\n\t Order=" + sbord + "\n\t Op="
+ (op == skip_op ? " skip()" : " next()"));
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
// in lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
Override
public void setNextReader(AtomicReaderContext context) throws IOException {
// confirm that skipping beyond the last doc, on the
// previous reader, hits NO_MORE_DOCS
if (lastReader[0] != null) {
final AtomicReader previousReader = lastReader[0];
IndexSearcher indexSearcher = LuceneTestCase.newSearcher(previousReader);
indexSearcher.setSimilarity(s.getSimilarity());
Weight w = indexSearcher.createNormalizedWeight(q);
AtomicReaderContext ctx = (AtomicReaderContext)indexSearcher.getTopReaderContext();
Scorer scorer = w.scorer(ctx, true, false, ctx.reader().getLiveDocs());
if (scorer != null) {
boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS;
Assert.assertFalse("query's last doc was "+ lastDoc[0] +" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more);
}
leafPtr++;
}
lastReader[0] = context.reader();
assert readerContextArray[leafPtr].reader() == context.reader();
this.scorer = null;
lastDoc[0] = -1;
}
// in lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
public static void checkFirstSkipTo(final Query q, final IndexSearcher s) throws IOException {
//System.out.println("checkFirstSkipTo: "+q);
final float maxDiff = 1e-3f;
final int lastDoc[] = {-1};
final AtomicReader lastReader[] = {null};
final AtomicReaderContext[] context = s.getTopReaderContext().leaves();
s.search(q,new Collector() {
private Scorer scorer;
private int leafPtr;
private Bits liveDocs;
@Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
}
@Override
public void collect(int doc) throws IOException {
float score = scorer.score();
try {
long startMS = System.currentTimeMillis();
for (int i=lastDoc[0]+1; i<=doc; i++) {
Weight w = s.createNormalizedWeight(q);
Scorer scorer = w.scorer(context[leafPtr], true, false, liveDocs);
Assert.assertTrue("query collected "+doc+" but skipTo("+i+") says no more docs!",scorer.advance(i) != DocIdSetIterator.NO_MORE_DOCS);
Assert.assertEquals("query collected "+doc+" but skipTo("+i+") got to "+scorer.docID(),doc,scorer.docID());
float skipToScore = scorer.score();
Assert.assertEquals("unstable skipTo("+i+") score!",skipToScore,scorer.score(),maxDiff);
Assert.assertEquals("query assigned doc "+doc+" a score of <"+score+"> but skipTo("+i+") has <"+skipToScore+">!",score,skipToScore,maxDiff);
// Hurry things along if they are going slow (eg
// if you got SimpleText codec this will kick in):
if (i < doc && System.currentTimeMillis() - startMS > 5) {
i = doc-1;
}
}
lastDoc[0] = doc;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {
// confirm that skipping beyond the last doc, on the
// previous reader, hits NO_MORE_DOCS
if (lastReader[0] != null) {
final AtomicReader previousReader = lastReader[0];
IndexSearcher indexSearcher = LuceneTestCase.newSearcher(previousReader);
indexSearcher.setSimilarity(s.getSimilarity());
Weight w = indexSearcher.createNormalizedWeight(q);
Scorer scorer = w.scorer((AtomicReaderContext)indexSearcher.getTopReaderContext(), true, false, previousReader.getLiveDocs());
if (scorer != null) {
boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS;
Assert.assertFalse("query's last doc was "+ lastDoc[0] +" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more);
}
leafPtr++;
}
lastReader[0] = context.reader();
lastDoc[0] = -1;
liveDocs = context.reader().getLiveDocs();
}
@Override
public boolean acceptsDocsOutOfOrder() {
return false;
}
});
if (lastReader[0] != null) {
// confirm that skipping beyond the last doc, on the
// previous reader, hits NO_MORE_DOCS
final AtomicReader previousReader = lastReader[0];
IndexSearcher indexSearcher = LuceneTestCase.newSearcher(previousReader);
indexSearcher.setSimilarity(s.getSimilarity());
Weight w = indexSearcher.createNormalizedWeight(q);
Scorer scorer = w.scorer((AtomicReaderContext)indexSearcher.getTopReaderContext(), true, false, previousReader.getLiveDocs());
if (scorer != null) {
boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS;
Assert.assertFalse("query's last doc was "+ lastDoc[0] +" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more);
}
}
}
// in lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
}
// in lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
Override
public void collect(int doc) throws IOException {
float score = scorer.score();
try {
long startMS = System.currentTimeMillis();
for (int i=lastDoc[0]+1; i<=doc; i++) {
Weight w = s.createNormalizedWeight(q);
Scorer scorer = w.scorer(context[leafPtr], true, false, liveDocs);
Assert.assertTrue("query collected "+doc+" but skipTo("+i+") says no more docs!",scorer.advance(i) != DocIdSetIterator.NO_MORE_DOCS);
Assert.assertEquals("query collected "+doc+" but skipTo("+i+") got to "+scorer.docID(),doc,scorer.docID());
float skipToScore = scorer.score();
Assert.assertEquals("unstable skipTo("+i+") score!",skipToScore,scorer.score(),maxDiff);
Assert.assertEquals("query assigned doc "+doc+" a score of <"+score+"> but skipTo("+i+") has <"+skipToScore+">!",score,skipToScore,maxDiff);
// Hurry things along if they are going slow (eg
// if you got SimpleText codec this will kick in):
if (i < doc && System.currentTimeMillis() - startMS > 5) {
i = doc-1;
}
}
lastDoc[0] = doc;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
// in lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
Override
public void setNextReader(AtomicReaderContext context) throws IOException {
// confirm that skipping beyond the last doc, on the
// previous reader, hits NO_MORE_DOCS
if (lastReader[0] != null) {
final AtomicReader previousReader = lastReader[0];
IndexSearcher indexSearcher = LuceneTestCase.newSearcher(previousReader);
indexSearcher.setSimilarity(s.getSimilarity());
Weight w = indexSearcher.createNormalizedWeight(q);
Scorer scorer = w.scorer((AtomicReaderContext)indexSearcher.getTopReaderContext(), true, false, previousReader.getLiveDocs());
if (scorer != null) {
boolean more = scorer.advance(lastDoc[0] + 1) != DocIdSetIterator.NO_MORE_DOCS;
Assert.assertFalse("query's last doc was "+ lastDoc[0] +" but skipTo("+(lastDoc[0]+1)+") got to "+scorer.docID(),more);
}
leafPtr++;
}
lastReader[0] = context.reader();
lastDoc[0] = -1;
liveDocs = context.reader().getLiveDocs();
}
// in lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java
void broadcastNodeReopen(int nodeID, long version, IndexSearcher newSearcher) throws IOException {
if (VERBOSE) {
System.out.println("REOPEN: nodeID=" + nodeID + " version=" + version + " maxDoc=" + newSearcher.getIndexReader().maxDoc());
}
// Broadcast new collection stats for this node to all
// other nodes:
for(String field : fieldsToShare) {
final CollectionStatistics stats = newSearcher.collectionStatistics(field);
for (NodeState node : nodes) {
// Don't put my own collection stats into the cache;
// we pull locally:
if (node.myNodeID != nodeID) {
node.collectionStatsCache.put(new FieldAndShardVersion(nodeID, version, field), stats);
}
}
}
for (NodeState node : nodes) {
node.updateNodeVersion(nodeID, version);
}
}
// in lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java
TopDocs searchNode(int nodeID, long[] nodeVersions, Query q, Sort sort, int numHits, ScoreDoc searchAfter) throws IOException {
final NodeState.ShardIndexSearcher s = nodes[nodeID].acquire(nodeVersions);
try {
if (sort == null) {
if (searchAfter != null) {
return s.localSearchAfter(searchAfter, q, numHits);
} else {
return s.localSearch(q, numHits);
}
} else {
assert searchAfter == null; // not supported yet
return s.localSearch(q, numHits, sort);
}
} finally {
nodes[nodeID].release(s);
}
}
// in lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java
Map<Term,TermStatistics> getNodeTermStats(Set<Term> terms, int nodeID, long version) throws IOException {
final NodeState node = nodes[nodeID];
final Map<Term,TermStatistics> stats = new HashMap<Term,TermStatistics>();
final IndexSearcher s = node.searchers.acquire(version);
if (s == null) {
throw new SearcherExpiredException("node=" + nodeID + " version=" + version);
}
try {
for(Term term : terms) {
final TermContext termContext = TermContext.build(s.getIndexReader().getTopReaderContext(), term, false);
stats.put(term, s.termStatistics(term, termContext));
}
} finally {
node.searchers.release(s);
}
return stats;
}
// in lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java
Override
public Query rewrite(Query original) throws IOException {
final Query rewritten = super.rewrite(original);
final Set<Term> terms = new HashSet<Term>();
rewritten.extractTerms(terms);
// Make a single request to remote nodes for term
// stats:
for(int nodeID=0;nodeID<nodeVersions.length;nodeID++) {
if (nodeID == myNodeID) {
continue;
}
final Set<Term> missing = new HashSet<Term>();
for(Term term : terms) {
final TermAndShardVersion key = new TermAndShardVersion(nodeID, nodeVersions[nodeID], term);
if (!termStatsCache.containsKey(key)) {
missing.add(term);
}
}
if (missing.size() != 0) {
for(Map.Entry<Term,TermStatistics> ent : getNodeTermStats(missing, nodeID, nodeVersions[nodeID]).entrySet()) {
final TermAndShardVersion key = new TermAndShardVersion(nodeID, nodeVersions[nodeID], ent.getKey());
termStatsCache.put(key, ent.getValue());
}
}
}
return rewritten;
}
// in lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java
Override
public TermStatistics termStatistics(Term term, TermContext context) throws IOException {
assert term != null;
long docFreq = 0;
long totalTermFreq = 0;
for(int nodeID=0;nodeID<nodeVersions.length;nodeID++) {
final TermStatistics subStats;
if (nodeID == myNodeID) {
subStats = super.termStatistics(term, context);
} else {
final TermAndShardVersion key = new TermAndShardVersion(nodeID, nodeVersions[nodeID], term);
subStats = termStatsCache.get(key);
// We pre-cached during rewrite so all terms
// better be here...
assert subStats != null;
}
long nodeDocFreq = subStats.docFreq();
if (docFreq >= 0 && nodeDocFreq >= 0) {
docFreq += nodeDocFreq;
} else {
docFreq = -1;
}
long nodeTotalTermFreq = subStats.totalTermFreq();
if (totalTermFreq >= 0 && nodeTotalTermFreq >= 0) {
totalTermFreq += nodeTotalTermFreq;
} else {
totalTermFreq = -1;
}
}
return new TermStatistics(term.bytes(), docFreq, totalTermFreq);
}
// in lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java
Override
public CollectionStatistics collectionStatistics(String field) throws IOException {
// TODO: we could compute this on init and cache,
// since we are re-inited whenever any nodes have a
// new reader
long docCount = 0;
long sumTotalTermFreq = 0;
long sumDocFreq = 0;
long maxDoc = 0;
for(int nodeID=0;nodeID<nodeVersions.length;nodeID++) {
final FieldAndShardVersion key = new FieldAndShardVersion(nodeID, nodeVersions[nodeID], field);
final CollectionStatistics nodeStats;
if (nodeID == myNodeID) {
nodeStats = super.collectionStatistics(field);
} else {
nodeStats = collectionStatsCache.get(key);
}
if (nodeStats == null) {
System.out.println("coll stats myNodeID=" + myNodeID + ": " + collectionStatsCache.keySet());
}
// Collection stats are pre-shared on reopen, so,
// we better not have a cache miss:
assert nodeStats != null: "myNodeID=" + myNodeID + " nodeID=" + nodeID + " version=" + nodeVersions[nodeID] + " field=" + field;
long nodeDocCount = nodeStats.docCount();
if (docCount >= 0 && nodeDocCount >= 0) {
docCount += nodeDocCount;
} else {
docCount = -1;
}
long nodeSumTotalTermFreq = nodeStats.sumTotalTermFreq();
if (sumTotalTermFreq >= 0 && nodeSumTotalTermFreq >= 0) {
sumTotalTermFreq += nodeSumTotalTermFreq;
} else {
sumTotalTermFreq = -1;
}
long nodeSumDocFreq = nodeStats.sumDocFreq();
if (sumDocFreq >= 0 && nodeSumDocFreq >= 0) {
sumDocFreq += nodeSumDocFreq;
} else {
sumDocFreq = -1;
}
assert nodeStats.maxDoc() >= 0;
maxDoc += nodeStats.maxDoc();
}
return new CollectionStatistics(field, maxDoc, docCount, sumTotalTermFreq, sumDocFreq);
}
// in lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java
Override
public TopDocs search(Query query, int numHits) throws IOException {
final TopDocs[] shardHits = new TopDocs[nodeVersions.length];
for(int nodeID=0;nodeID<nodeVersions.length;nodeID++) {
if (nodeID == myNodeID) {
// My node; run using local shard searcher we
// already aquired:
shardHits[nodeID] = localSearch(query, numHits);
} else {
shardHits[nodeID] = searchNode(nodeID, nodeVersions, query, null, numHits, null);
}
}
// Merge:
return TopDocs.merge(null, numHits, shardHits);
}
// in lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java
public TopDocs localSearch(Query query, int numHits) throws IOException {
return super.search(query, numHits);
}
// in lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java
Override
public TopDocs searchAfter(ScoreDoc after, Query query, int numHits) throws IOException {
final TopDocs[] shardHits = new TopDocs[nodeVersions.length];
ScoreDoc shardAfter = new ScoreDoc(after.doc, after.score);
for(int nodeID=0;nodeID<nodeVersions.length;nodeID++) {
if (nodeID < after.shardIndex) {
// If score is tied then no docs in this shard
// should be collected:
shardAfter.doc = Integer.MAX_VALUE;
} else if (nodeID == after.shardIndex) {
// If score is tied then we break according to
// docID (like normal):
shardAfter.doc = after.doc;
} else {
// If score is tied then all docs in this shard
// should be collected, because they come after
// the previous bottom:
shardAfter.doc = -1;
}
if (nodeID == myNodeID) {
// My node; run using local shard searcher we
// already aquired:
shardHits[nodeID] = localSearchAfter(shardAfter, query, numHits);
} else {
shardHits[nodeID] = searchNode(nodeID, nodeVersions, query, null, numHits, shardAfter);
}
//System.out.println(" node=" + nodeID + " totHits=" + shardHits[nodeID].totalHits);
}
// Merge:
return TopDocs.merge(null, numHits, shardHits);
}
// in lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java
public TopDocs localSearchAfter(ScoreDoc after, Query query, int numHits) throws IOException {
return super.searchAfter(after, query, numHits);
}
// in lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java
Override
public TopFieldDocs search(Query query, int numHits, Sort sort) throws IOException {
assert sort != null;
final TopDocs[] shardHits = new TopDocs[nodeVersions.length];
for(int nodeID=0;nodeID<nodeVersions.length;nodeID++) {
if (nodeID == myNodeID) {
// My node; run using local shard searcher we
// already aquired:
shardHits[nodeID] = localSearch(query, numHits, sort);
} else {
shardHits[nodeID] = searchNode(nodeID, nodeVersions, query, sort, numHits, null);
}
}
// Merge:
return (TopFieldDocs) TopDocs.merge(sort, numHits, shardHits);
}
// in lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java
public TopFieldDocs localSearch(Query query, int numHits, Sort sort) throws IOException {
return super.search(query, numHits, sort);
}
// in lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java
public void updateNodeVersion(int nodeID, long version) throws IOException {
currentNodeVersions[nodeID] = version;
if (currentShardSearcher != null) {
currentShardSearcher.getIndexReader().decRef();
}
currentShardSearcher = new ShardIndexSearcher(currentNodeVersions.clone(),
mgr.acquire().getIndexReader(),
myNodeID);
}
// in lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java
public void release(ShardIndexSearcher s) throws IOException {
s.getIndexReader().decRef();
}
// in lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java
public void reopen() throws IOException {
final IndexSearcher before = mgr.acquire();
mgr.release(before);
mgr.maybeRefresh();
final IndexSearcher after = mgr.acquire();
try {
if (after != before) {
// New searcher was opened
final long version = searchers.record(after);
searchers.prune(new SearcherLifetimeManager.PruneByAge(maxSearcherAgeSeconds));
broadcastNodeReopen(myNodeID, version, after);
}
} finally {
mgr.release(after);
}
}
// in lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java
Override
public void close() throws IOException {
if (currentShardSearcher != null) {
currentShardSearcher.getIndexReader().decRef();
}
searchers.close();
mgr.close();
writer.close();
dir.close();
}
// in lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java
protected void start(String baseDirName, int numNodes, double runTimeSec, int maxSearcherAgeSeconds) throws IOException {
endTimeNanos = System.nanoTime() + (long) (runTimeSec*1000000000);
this.maxSearcherAgeSeconds = maxSearcherAgeSeconds;
nodes = new NodeState[numNodes];
for(int nodeID=0;nodeID<numNodes;nodeID++) {
nodes[nodeID] = new NodeState(random(), baseDirName, nodeID, numNodes);
}
long[] nodeVersions = new long[nodes.length];
for(int nodeID=0;nodeID<numNodes;nodeID++) {
final IndexSearcher s = nodes[nodeID].mgr.acquire();
try {
nodeVersions[nodeID] = nodes[nodeID].searchers.record(s);
} finally {
nodes[nodeID].mgr.release(s);
}
}
for(int nodeID=0;nodeID<numNodes;nodeID++) {
final IndexSearcher s = nodes[nodeID].mgr.acquire();
assert nodeVersions[nodeID] == nodes[nodeID].searchers.record(s);
assert s != null;
try {
broadcastNodeReopen(nodeID, nodeVersions[nodeID], s);
} finally {
nodes[nodeID].mgr.release(s);
}
}
changeIndicesThread = new ChangeIndices();
changeIndicesThread.start();
}
// in lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java
protected void finish() throws InterruptedException, IOException {
changeIndicesThread.join();
for(NodeState node : nodes) {
node.close();
}
}
// in lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java
public static void checkNoMatchExplanations(Query q, String defaultFieldName,
IndexSearcher searcher, int[] results)
throws IOException {
String d = q.toString(defaultFieldName);
Set<Integer> ignore = new TreeSet<Integer>();
for (int i = 0; i < results.length; i++) {
ignore.add(Integer.valueOf(results[i]));
}
int maxDoc = searcher.getIndexReader().maxDoc();
for (int doc = 0; doc < maxDoc; doc++) {
if (ignore.contains(Integer.valueOf(doc))) continue;
Explanation exp = searcher.explain(q, doc);
Assert.assertNotNull("Explanation of [["+d+"]] for #"+doc+" is null",
exp);
Assert.assertFalse("Explanation of [["+d+"]] for #"+doc+
" doesn't indicate non-match: " + exp.toString(),
exp.isMatch());
}
}
// in lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java
public static void checkHitCollector(Random random, Query query, String defaultFieldName,
IndexSearcher searcher, int[] results)
throws IOException {
QueryUtils.check(random,query,searcher);
Set<Integer> correct = new TreeSet<Integer>();
for (int i = 0; i < results.length; i++) {
correct.add(Integer.valueOf(results[i]));
}
final Set<Integer> actual = new TreeSet<Integer>();
final Collector c = new SetCollector(actual);
searcher.search(query, c);
Assert.assertEquals("Simple: " + query.toString(defaultFieldName),
correct, actual);
for (int i = -1; i < 2; i++) {
actual.clear();
IndexSearcher s = QueryUtils.wrapUnderlyingReader
(random, searcher, i);
s.search(query, c);
Assert.assertEquals("Wrap Reader " + i + ": " +
query.toString(defaultFieldName),
correct, actual);
}
}
// in lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java
Override
public void setScorer(Scorer scorer) throws IOException {}
// in lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java
public static void checkHits(
Random random,
Query query,
String defaultFieldName,
IndexSearcher searcher,
int[] results)
throws IOException {
ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
Set<Integer> correct = new TreeSet<Integer>();
for (int i = 0; i < results.length; i++) {
correct.add(Integer.valueOf(results[i]));
}
Set<Integer> actual = new TreeSet<Integer>();
for (int i = 0; i < hits.length; i++) {
actual.add(Integer.valueOf(hits[i].doc));
}
Assert.assertEquals(query.toString(defaultFieldName), correct, actual);
QueryUtils.check(random, query,searcher, LuceneTestCase.rarely(random));
}
// in lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java
public static void checkDocIds(String mes, int[] results, ScoreDoc[] hits)
throws IOException {
Assert.assertEquals(mes + " nr of hits", hits.length, results.length);
for (int i = 0; i < results.length; i++) {
Assert.assertEquals(mes + " doc nrs for hit " + i, results[i], hits[i].doc);
}
}
// in lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java
public static void checkHitsQuery(
Query query,
ScoreDoc[] hits1,
ScoreDoc[] hits2,
int[] results)
throws IOException {
checkDocIds("hits1", results, hits1);
checkDocIds("hits2", results, hits2);
checkEqual(query, hits1, hits2);
}
// in lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java
public static void checkEqual(Query query, ScoreDoc[] hits1, ScoreDoc[] hits2) throws IOException {
final float scoreTolerance = 1.0e-6f;
if (hits1.length != hits2.length) {
Assert.fail("Unequal lengths: hits1="+hits1.length+",hits2="+hits2.length);
}
for (int i = 0; i < hits1.length; i++) {
if (hits1[i].doc != hits2[i].doc) {
Assert.fail("Hit " + i + " docnumbers don't match\n"
+ hits2str(hits1, hits2,0,0)
+ "for query:" + query.toString());
}
if ((hits1[i].doc != hits2[i].doc)
|| Math.abs(hits1[i].score - hits2[i].score) > scoreTolerance)
{
Assert.fail("Hit " + i + ", doc nrs " + hits1[i].doc + " and " + hits2[i].doc
+ "\nunequal : " + hits1[i].score
+ "\n and: " + hits2[i].score
+ "\nfor query:" + query.toString());
}
}
}
// in lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java
public static String hits2str(ScoreDoc[] hits1, ScoreDoc[] hits2, int start, int end) throws IOException {
StringBuilder sb = new StringBuilder();
int len1=hits1==null ? 0 : hits1.length;
int len2=hits2==null ? 0 : hits2.length;
if (end<=0) {
end = Math.max(len1,len2);
}
sb.append("Hits length1=").append(len1).append("\tlength2=").append(len2);
sb.append('\n');
for (int i=start; i<end; i++) {
sb.append("hit=").append(i).append(':');
if (i<len1) {
sb.append(" doc").append(hits1[i].doc).append('=').append(hits1[i].score);
} else {
sb.append(" ");
}
sb.append(",\t");
if (i<len2) {
sb.append(" doc").append(hits2[i].doc).append('=').append(hits2[i].score);
}
sb.append('\n');
}
return sb.toString();
}
// in lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java
public static void checkExplanations(Query query,
String defaultFieldName,
IndexSearcher searcher) throws IOException {
checkExplanations(query, defaultFieldName, searcher, false);
}
// in lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java
public static void checkExplanations(Query query,
String defaultFieldName,
IndexSearcher searcher,
boolean deep) throws IOException {
searcher.search(query,
new ExplanationAsserter
(query, defaultFieldName, searcher, deep));
}
// in lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java
protected void checkExplanations(Query q) throws IOException {
super.search(q, null,
new ExplanationAsserter
(q, null, this));
}
// in lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java
Override
public TopFieldDocs search(Query query,
Filter filter,
int n,
Sort sort) throws IOException {
checkExplanations(query);
return super.search(query,filter,n,sort);
}
// in lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java
Override
public void search(Query query, Collector results) throws IOException {
checkExplanations(query);
super.search(query, results);
}
// in lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java
Override
public void search(Query query, Filter filter, Collector results) throws IOException {
checkExplanations(query);
super.search(query, filter, results);
}
// in lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java
Override
public TopDocs search(Query query, Filter filter,
int n) throws IOException {
checkExplanations(query);
return super.search(query,filter, n);
}
// in lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java
Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
}
// in lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java
Override
public void collect(int doc) throws IOException {
Explanation exp = null;
doc = doc + base;
try {
exp = s.explain(q, doc);
} catch (IOException e) {
throw new RuntimeException
("exception in hitcollector of [["+d+"]] for #"+doc, e);
}
Assert.assertNotNull("Explanation of [["+d+"]] for #"+doc+" is null", exp);
verifyExplanation(d,doc,scorer.score(),deep,exp);
Assert.assertTrue("Explanation of [["+d+"]] for #"+ doc +
" does not indicate match: " + exp.toString(),
exp.isMatch());
}
// in lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java
Override
public Weight createNormalizedWeight(Query query) throws IOException {
final Weight w = super.createNormalizedWeight(query);
return new Weight() {
@Override
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
return w.explain(context, doc);
}
@Override
public Query getQuery() {
return w.getQuery();
}
@Override
public void normalize(float norm, float topLevelBoost) {
throw new IllegalStateException("Weight already normalized.");
}
@Override
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException {
Scorer scorer = w.scorer(context, scoreDocsInOrder, topScorer, acceptDocs);
if (scorer != null) {
// check that scorer obeys disi contract for docID() before next()/advance
try {
int docid = scorer.docID();
assert docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS;
} catch (UnsupportedOperationException ignored) {
// from a top-level BS1
assert topScorer;
}
}
return scorer;
}
@Override
public float getValueForNormalization() throws IOException {
throw new IllegalStateException("Weight already normalized.");
}
@Override
public boolean scoresDocsOutOfOrder() {
return w.scoresDocsOutOfOrder();
}
};
}
// in lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java
Override
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
return w.explain(context, doc);
}
// in lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java
Override
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException {
Scorer scorer = w.scorer(context, scoreDocsInOrder, topScorer, acceptDocs);
if (scorer != null) {
// check that scorer obeys disi contract for docID() before next()/advance
try {
int docid = scorer.docID();
assert docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS;
} catch (UnsupportedOperationException ignored) {
// from a top-level BS1
assert topScorer;
}
}
return scorer;
}
// in lucene/test-framework/src/java/org/apache/lucene/search/AssertingIndexSearcher.java
Override
public float getValueForNormalization() throws IOException {
throw new IllegalStateException("Weight already normalized.");
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockIndexInputWrapper.java
Override
public void close() throws IOException {
try {
// turn on the following to look for leaks closing inputs,
// after fixing TestTransactions
// dir.maybeThrowDeterministicException();
} finally {
closed = true;
delegate.close();
// Pending resolution on LUCENE-686 we may want to
// remove the conditional check so we also track that
// all clones get closed:
if (!isClone) {
dir.removeIndexInput(this, name);
}
}
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockIndexInputWrapper.java
Override
public void seek(long pos) throws IOException {
ensureOpen();
delegate.seek(pos);
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockIndexInputWrapper.java
Override
public byte readByte() throws IOException {
ensureOpen();
return delegate.readByte();
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockIndexInputWrapper.java
Override
public void readBytes(byte[] b, int offset, int len) throws IOException {
ensureOpen();
delegate.readBytes(b, offset, len);
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockIndexInputWrapper.java
Override
public void copyBytes(IndexOutput out, long numBytes) throws IOException {
ensureOpen();
delegate.copyBytes(out, numBytes);
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockIndexInputWrapper.java
Override
public void readBytes(byte[] b, int offset, int len, boolean useBuffer)
throws IOException {
ensureOpen();
delegate.readBytes(b, offset, len, useBuffer);
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockIndexInputWrapper.java
Override
public short readShort() throws IOException {
ensureOpen();
return delegate.readShort();
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockIndexInputWrapper.java
Override
public int readInt() throws IOException {
ensureOpen();
return delegate.readInt();
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockIndexInputWrapper.java
Override
public long readLong() throws IOException {
ensureOpen();
return delegate.readLong();
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockIndexInputWrapper.java
Override
public String readString() throws IOException {
ensureOpen();
return delegate.readString();
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockIndexInputWrapper.java
Override
public Map<String,String> readStringStringMap() throws IOException {
ensureOpen();
return delegate.readStringStringMap();
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockIndexInputWrapper.java
Override
public int readVInt() throws IOException {
ensureOpen();
return delegate.readVInt();
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockIndexInputWrapper.java
Override
public long readVLong() throws IOException {
ensureOpen();
return delegate.readVLong();
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java
Override
public synchronized void sync(Collection<String> names) throws IOException {
maybeYield();
maybeThrowDeterministicException();
if (crashed) {
throw new IOException("cannot sync after crash");
}
unSyncedFiles.removeAll(names);
if (LuceneTestCase.rarely(randomState) || delegate instanceof NRTCachingDirectory) {
// don't wear out our hardware so much in tests.
delegate.sync(names);
}
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java
public synchronized final long sizeInBytes() throws IOException {
if (delegate instanceof RAMDirectory)
return ((RAMDirectory) delegate).sizeInBytes();
else {
// hack
long size = 0;
for (String file : delegate.listAll())
size += delegate.fileLength(file);
return size;
}
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java
public synchronized void crash() throws IOException {
crashed = true;
openFiles = new HashMap<String,Integer>();
openFilesForWrite = new HashSet<String>();
openFilesDeleted = new HashSet<String>();
Iterator<String> it = unSyncedFiles.iterator();
unSyncedFiles = new HashSet<String>();
// first force-close all files, so we can corrupt on windows etc.
// clone the file map, as these guys want to remove themselves on close.
Map<Closeable,Exception> m = new IdentityHashMap<Closeable,Exception>(openFileHandles);
for (Closeable f : m.keySet())
try {
f.close();
} catch (Exception ignored) {}
while(it.hasNext()) {
String name = it.next();
int damage = randomState.nextInt(5);
String action = null;
if (damage == 0) {
action = "deleted";
deleteFile(name, true);
} else if (damage == 1) {
action = "zeroed";
// Zero out file entirely
long length = fileLength(name);
byte[] zeroes = new byte[256];
long upto = 0;
IndexOutput out = delegate.createOutput(name, LuceneTestCase.newIOContext(randomState));
while(upto < length) {
final int limit = (int) Math.min(length-upto, zeroes.length);
out.writeBytes(zeroes, 0, limit);
upto += limit;
}
out.close();
} else if (damage == 2) {
action = "partially truncated";
// Partially Truncate the file:
// First, make temp file and copy only half this
// file over:
String tempFileName;
while (true) {
tempFileName = ""+randomState.nextInt();
if (!delegate.fileExists(tempFileName)) {
break;
}
}
final IndexOutput tempOut = delegate.createOutput(tempFileName, LuceneTestCase.newIOContext(randomState));
IndexInput in = delegate.openInput(name, LuceneTestCase.newIOContext(randomState));
tempOut.copyBytes(in, in.length()/2);
tempOut.close();
in.close();
// Delete original and copy bytes back:
deleteFile(name, true);
final IndexOutput out = delegate.createOutput(name, LuceneTestCase.newIOContext(randomState));
in = delegate.openInput(tempFileName, LuceneTestCase.newIOContext(randomState));
out.copyBytes(in, in.length());
out.close();
in.close();
deleteFile(tempFileName, true);
} else if (damage == 3) {
// The file survived intact:
action = "didn't change";
} else {
action = "fully truncated";
// Totally truncate the file to zero bytes
deleteFile(name, true);
IndexOutput out = delegate.createOutput(name, LuceneTestCase.newIOContext(randomState));
out.setLength(0);
out.close();
}
if (LuceneTestCase.VERBOSE) {
System.out.println("MockDirectoryWrapper: " + action + " unsynced file: " + name);
}
}
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java
public synchronized void clearCrash() throws IOException {
crashed = false;
openLocks.clear();
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java
public void resetMaxUsedSizeInBytes() throws IOException {
this.maxUsedSize = getRecomputedActualSizeInBytes();
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java
void maybeThrowIOException() throws IOException {
maybeThrowIOException(null);
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java
void maybeThrowIOException(String message) throws IOException {
if (randomIOExceptionRate > 0.0) {
int number = Math.abs(randomState.nextInt() % 1000);
if (number < randomIOExceptionRate*1000) {
if (LuceneTestCase.VERBOSE) {
System.out.println(Thread.currentThread().getName() + ": MockDirectoryWrapper: now throw random exception" + (message == null ? "" : " (" + message + ")"));
new Throwable().printStackTrace(System.out);
}
throw new IOException("a random IOException" + (message == null ? "" : "(" + message + ")"));
}
}
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java
Override
public synchronized void deleteFile(String name) throws IOException {
maybeYield();
deleteFile(name, false);
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java
private synchronized void deleteFile(String name, boolean forced) throws IOException {
maybeYield();
maybeThrowDeterministicException();
if (crashed && !forced)
throw new IOException("cannot delete after crash");
if (unSyncedFiles.contains(name))
unSyncedFiles.remove(name);
if (!forced && noDeleteOpenFile) {
if (openFiles.containsKey(name)) {
openFilesDeleted.add(name);
throw fillOpenTrace(new IOException("MockDirectoryWrapper: file \"" + name + "\" is still open: cannot delete"), name, true);
} else {
openFilesDeleted.remove(name);
}
}
delegate.deleteFile(name);
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java
Override
public synchronized IndexOutput createOutput(String name, IOContext context) throws IOException {
maybeYield();
if (failOnCreateOutput) {
maybeThrowDeterministicException();
}
if (crashed)
throw new IOException("cannot createOutput after crash");
init();
synchronized(this) {
if (preventDoubleWrite && createdFiles.contains(name) && !name.equals("segments.gen"))
throw new IOException("file \"" + name + "\" was already written to");
}
if (noDeleteOpenFile && openFiles.containsKey(name))
throw new IOException("MockDirectoryWrapper: file \"" + name + "\" is still open: cannot overwrite");
if (crashed)
throw new IOException("cannot createOutput after crash");
unSyncedFiles.add(name);
createdFiles.add(name);
if (delegate instanceof RAMDirectory) {
RAMDirectory ramdir = (RAMDirectory) delegate;
RAMFile file = new RAMFile(ramdir);
RAMFile existing = ramdir.fileMap.get(name);
// Enforce write once:
if (existing!=null && !name.equals("segments.gen") && preventDoubleWrite)
throw new IOException("file " + name + " already exists");
else {
if (existing!=null) {
ramdir.sizeInBytes.getAndAdd(-existing.sizeInBytes);
existing.directory = null;
}
ramdir.fileMap.put(name, file);
}
}
//System.out.println(Thread.currentThread().getName() + ": MDW: create " + name);
IndexOutput io = new MockIndexOutputWrapper(this, delegate.createOutput(name, LuceneTestCase.newIOContext(randomState)), name);
addFileHandle(io, name, Handle.Output);
openFilesForWrite.add(name);
// throttling REALLY slows down tests, so don't do it very often for SOMETIMES.
if (throttling == Throttling.ALWAYS ||
(throttling == Throttling.SOMETIMES && randomState.nextInt(50) == 0)) {
if (LuceneTestCase.VERBOSE) {
System.out.println("MockDirectoryWrapper: throttling indexOutput");
}
return throttledOutput.newFromDelegate(io);
} else {
return io;
}
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java
Override
public synchronized IndexInput openInput(String name, IOContext context) throws IOException {
maybeYield();
if (failOnOpenInput) {
maybeThrowDeterministicException();
}
if (!delegate.fileExists(name)) {
throw new FileNotFoundException(name + " in dir=" + delegate);
}
// cannot open a file for input if it's still open for
// output, except for segments.gen and segments_N
if (openFilesForWrite.contains(name) && !name.startsWith("segments")) {
throw fillOpenTrace(new IOException("MockDirectoryWrapper: file \"" + name + "\" is still open for writing"), name, false);
}
IndexInput ii = new MockIndexInputWrapper(this, name, delegate.openInput(name, LuceneTestCase.newIOContext(randomState)));
addFileHandle(ii, name, Handle.Input);
return ii;
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java
public synchronized final long getRecomputedSizeInBytes() throws IOException {
if (!(delegate instanceof RAMDirectory))
return sizeInBytes();
long size = 0;
for(final RAMFile file: ((RAMDirectory)delegate).fileMap.values()) {
size += file.getSizeInBytes();
}
return size;
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java
public final synchronized long getRecomputedActualSizeInBytes() throws IOException {
if (!(delegate instanceof RAMDirectory))
return sizeInBytes();
long size = 0;
for (final RAMFile file : ((RAMDirectory)delegate).fileMap.values())
size += file.length;
return size;
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java
Override
public synchronized void close() throws IOException {
maybeYield();
if (openFiles == null) {
openFiles = new HashMap<String,Integer>();
openFilesDeleted = new HashSet<String>();
}
if (noDeleteOpenFile && openFiles.size() > 0) {
// print the first one as its very verbose otherwise
Exception cause = null;
Iterator<Exception> stacktraces = openFileHandles.values().iterator();
if (stacktraces.hasNext())
cause = stacktraces.next();
// RuntimeException instead of IOException because
// super() does not throw IOException currently:
throw new RuntimeException("MockDirectoryWrapper: cannot close: there are still open files: " + openFiles, cause);
}
if (noDeleteOpenFile && openLocks.size() > 0) {
throw new RuntimeException("MockDirectoryWrapper: cannot close: there are still open locks: " + openLocks);
}
open = false;
if (checkIndexOnClose) {
if (indexPossiblyExists(this)) {
if (LuceneTestCase.VERBOSE) {
System.out.println("\nNOTE: MockDirectoryWrapper: now crash");
}
crash(); // corrumpt any unsynced-files
if (LuceneTestCase.VERBOSE) {
System.out.println("\nNOTE: MockDirectoryWrapper: now run CheckIndex");
}
_TestUtil.checkIndex(this, crossCheckTermVectorsOnClose);
if (assertNoUnreferencedFilesOnClose) {
// now look for unreferenced files:
String[] startFiles = listAll();
new IndexWriter(this, new IndexWriterConfig(LuceneTestCase.TEST_VERSION_CURRENT, null)).rollback();
String[] endFiles = listAll();
Arrays.sort(startFiles);
Arrays.sort(endFiles);
if (!Arrays.equals(startFiles, endFiles)) {
assert false : "unreferenced files: before delete:\n " + Arrays.toString(startFiles) + "\n after delete:\n " + Arrays.toString(endFiles);
}
DirectoryReader ir1 = DirectoryReader.open(this);
int numDocs1 = ir1.numDocs();
ir1.close();
new IndexWriter(this, new IndexWriterConfig(LuceneTestCase.TEST_VERSION_CURRENT, null)).close();
DirectoryReader ir2 = DirectoryReader.open(this);
int numDocs2 = ir2.numDocs();
ir2.close();
assert numDocs1 == numDocs2 : "numDocs changed after opening/closing IW: before=" + numDocs1 + " after=" + numDocs2;
}
}
}
delegate.close();
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java
private boolean indexPossiblyExists(Directory d) throws IOException {
String files[];
try {
files = d.listAll();
} catch (IOException ex) {
// this means directory doesn't exist, which is ok. return false
return false;
}
for (String f : files) {
if (f.startsWith("segments_")) {
return true;
}
}
return false;
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java
public void eval(MockDirectoryWrapper dir) throws IOException { }
// in lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java
synchronized void maybeThrowDeterministicException() throws IOException {
if (failures != null) {
for(int i = 0; i < failures.size(); i++) {
failures.get(i).eval(this);
}
}
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java
Override
public synchronized String[] listAll() throws IOException {
maybeYield();
return delegate.listAll();
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java
Override
public synchronized boolean fileExists(String name) throws IOException {
maybeYield();
return delegate.fileExists(name);
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java
Override
public synchronized long fileLength(String name) throws IOException {
maybeYield();
return delegate.fileLength(name);
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java
Override
public synchronized void clearLock(String name) throws IOException {
maybeYield();
delegate.clearLock(name);
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java
Override
public synchronized void setLockFactory(LockFactory lockFactory) throws IOException {
maybeYield();
delegate.setLockFactory(lockFactory);
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java
Override
public synchronized void copy(Directory to, String src, String dest, IOContext context) throws IOException {
maybeYield();
// randomize the IOContext here?
delegate.copy(to, src, dest, context);
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java
Override
public IndexInputSlicer createSlicer(final String name, IOContext context)
throws IOException {
maybeYield();
if (!delegate.fileExists(name)) {
throw new FileNotFoundException(name);
}
// cannot open a file for input if it's still open for
// output, except for segments.gen and segments_N
if (openFilesForWrite.contains(name) && !name.startsWith("segments")) {
throw fillOpenTrace(new IOException("MockDirectoryWrapper: file \"" + name + "\" is still open for writing"), name, false);
}
final IndexInputSlicer delegateHandle = delegate.createSlicer(name, context);
final IndexInputSlicer handle = new IndexInputSlicer() {
private boolean isClosed;
@Override
public void close() throws IOException {
if (!isClosed) {
delegateHandle.close();
MockDirectoryWrapper.this.removeOpenFile(this, name);
isClosed = true;
}
}
@Override
public IndexInput openSlice(String sliceDescription, long offset, long length) throws IOException {
maybeYield();
IndexInput ii = new MockIndexInputWrapper(MockDirectoryWrapper.this, name, delegateHandle.openSlice(sliceDescription, offset, length));
addFileHandle(ii, name, Handle.Input);
return ii;
}
@Override
public IndexInput openFullSlice() throws IOException {
maybeYield();
IndexInput ii = new MockIndexInputWrapper(MockDirectoryWrapper.this, name, delegateHandle.openFullSlice());
addFileHandle(ii, name, Handle.Input);
return ii;
}
};
addFileHandle(handle, name, Handle.Slice);
return handle;
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java
Override
public void close() throws IOException {
if (!isClosed) {
delegateHandle.close();
MockDirectoryWrapper.this.removeOpenFile(this, name);
isClosed = true;
}
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java
Override
public IndexInput openSlice(String sliceDescription, long offset, long length) throws IOException {
maybeYield();
IndexInput ii = new MockIndexInputWrapper(MockDirectoryWrapper.this, name, delegateHandle.openSlice(sliceDescription, offset, length));
addFileHandle(ii, name, Handle.Input);
return ii;
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java
Override
public IndexInput openFullSlice() throws IOException {
maybeYield();
IndexInput ii = new MockIndexInputWrapper(MockDirectoryWrapper.this, name, delegateHandle.openFullSlice());
addFileHandle(ii, name, Handle.Input);
return ii;
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockLockFactoryWrapper.java
Override
public void clearLock(String lockName) throws IOException {
delegate.clearLock(lockName);
dir.openLocks.remove(lockName);
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockLockFactoryWrapper.java
Override
public boolean obtain() throws IOException {
if (delegateLock.obtain()) {
dir.openLocks.add(name);
return true;
} else {
return false;
}
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockLockFactoryWrapper.java
Override
public void release() throws IOException {
delegateLock.release();
dir.openLocks.remove(name);
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockLockFactoryWrapper.java
Override
public boolean isLocked() throws IOException {
return delegateLock.isLocked();
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockIndexOutputWrapper.java
Override
public void close() throws IOException {
try {
dir.maybeThrowDeterministicException();
} finally {
delegate.close();
if (dir.trackDiskUsage) {
// Now compute actual disk usage & track the maxUsedSize
// in the MockDirectoryWrapper:
long size = dir.getRecomputedActualSizeInBytes();
if (size > dir.maxUsedSize) {
dir.maxUsedSize = size;
}
}
dir.removeIndexOutput(this, name);
}
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockIndexOutputWrapper.java
Override
public void flush() throws IOException {
dir.maybeThrowDeterministicException();
delegate.flush();
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockIndexOutputWrapper.java
Override
public void writeByte(byte b) throws IOException {
singleByte[0] = b;
writeBytes(singleByte, 0, 1);
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockIndexOutputWrapper.java
Override
public void writeBytes(byte[] b, int offset, int len) throws IOException {
long freeSpace = dir.maxSize == 0 ? 0 : dir.maxSize - dir.sizeInBytes();
long realUsage = 0;
if (dir.rateLimiter != null && len >= 10) {
dir.rateLimiter.pause(len);
}
// If MockRAMDir crashed since we were opened, then
// don't write anything:
if (dir.crashed)
throw new IOException("MockRAMDirectory was crashed; cannot write to " + name);
// Enforce disk full:
if (dir.maxSize != 0 && freeSpace <= len) {
// Compute the real disk free. This will greatly slow
// down our test but makes it more accurate:
realUsage = dir.getRecomputedActualSizeInBytes();
freeSpace = dir.maxSize - realUsage;
}
if (dir.maxSize != 0 && freeSpace <= len) {
if (freeSpace > 0) {
realUsage += freeSpace;
delegate.writeBytes(b, offset, (int) freeSpace);
}
if (realUsage > dir.maxUsedSize) {
dir.maxUsedSize = realUsage;
}
String message = "fake disk full at " + dir.getRecomputedActualSizeInBytes() + " bytes when writing " + name + " (file length=" + delegate.length();
if (freeSpace > 0) {
message += "; wrote " + freeSpace + " of " + len + " bytes";
}
message += ")";
if (LuceneTestCase.VERBOSE) {
System.out.println(Thread.currentThread().getName() + ": MDW: now throw fake disk full");
new Throwable().printStackTrace(System.out);
}
throw new IOException(message);
} else {
if (dir.randomState.nextInt(200) == 0) {
final int half = len/2;
delegate.writeBytes(b, offset, half);
Thread.yield();
delegate.writeBytes(b, offset+half, len-half);
} else {
delegate.writeBytes(b, offset, len);
}
}
dir.maybeThrowDeterministicException();
if (first) {
// Maybe throw random exception; only do this on first
// write to a new file:
first = false;
dir.maybeThrowIOException(name);
}
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockIndexOutputWrapper.java
Override
public void seek(long pos) throws IOException {
delegate.seek(pos);
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockIndexOutputWrapper.java
Override
public long length() throws IOException {
return delegate.length();
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockIndexOutputWrapper.java
Override
public void setLength(long length) throws IOException {
delegate.setLength(length);
}
// in lucene/test-framework/src/java/org/apache/lucene/store/MockIndexOutputWrapper.java
Override
public void copyBytes(DataInput input, long numBytes) throws IOException {
delegate.copyBytes(input, numBytes);
// TODO: we may need to check disk full here as well
dir.maybeThrowDeterministicException();
}
// in lucene/test-framework/src/java/org/apache/lucene/store/_TestHelper.java
public static boolean isSimpleFSIndexInputOpen(IndexInput is)
throws IOException
{
if (isSimpleFSIndexInput(is)) {
SimpleFSIndexInput fis = (SimpleFSIndexInput) is;
return fis.isFDValid();
} else {
return false;
}
}
// in lucene/test-framework/src/java/org/apache/lucene/index/ThreadedIndexingAndSearchingTestCase.java
public void runTest(String testName) throws Exception {
failed.set(false);
addCount.set(0);
delCount.set(0);
packCount.set(0);
final long t0 = System.currentTimeMillis();
Random random = new Random(random().nextLong());
final LineFileDocs docs = new LineFileDocs(random, true);
final File tempDir = _TestUtil.getTempDir(testName);
dir = newFSDirectory(tempDir);
((MockDirectoryWrapper) dir).setCheckIndexOnClose(false); // don't double-checkIndex, we do it ourselves.
final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random())).setInfoStream(new FailOnNonBulkMergesInfoStream());
if (LuceneTestCase.TEST_NIGHTLY) {
// newIWConfig makes smallish max seg size, which
// results in tons and tons of segments for this test
// when run nightly:
MergePolicy mp = conf.getMergePolicy();
if (mp instanceof TieredMergePolicy) {
((TieredMergePolicy) mp).setMaxMergedSegmentMB(5000.);
} else if (mp instanceof LogByteSizeMergePolicy) {
((LogByteSizeMergePolicy) mp).setMaxMergeMB(1000.);
} else if (mp instanceof LogMergePolicy) {
((LogMergePolicy) mp).setMaxMergeDocs(100000);
}
}
conf.setMergedSegmentWarmer(new IndexWriter.IndexReaderWarmer() {
@Override
public void warm(AtomicReader reader) throws IOException {
if (VERBOSE) {
System.out.println("TEST: now warm merged reader=" + reader);
}
final int maxDoc = reader.maxDoc();
final Bits liveDocs = reader.getLiveDocs();
int sum = 0;
final int inc = Math.max(1, maxDoc/50);
for(int docID=0;docID<maxDoc;docID += inc) {
if (liveDocs == null || liveDocs.get(docID)) {
final Document doc = reader.document(docID);
sum += doc.getFields().size();
}
}
IndexSearcher searcher = newSearcher(reader);
sum += searcher.search(new TermQuery(new Term("body", "united")), 10).totalHits;
if (VERBOSE) {
System.out.println("TEST: warm visited " + sum + " fields");
}
}
});
writer = new IndexWriter(dir, conf);
_TestUtil.reduceOpenFiles(writer);
final ExecutorService es = random().nextBoolean() ? null : Executors.newCachedThreadPool(new NamedThreadFactory(testName));
doAfterWriter(es);
final int NUM_INDEX_THREADS = _TestUtil.nextInt(random(), 2, 4);
final int RUN_TIME_SEC = LuceneTestCase.TEST_NIGHTLY ? 300 : RANDOM_MULTIPLIER;
final Set<String> delIDs = Collections.synchronizedSet(new HashSet<String>());
final Set<String> delPackIDs = Collections.synchronizedSet(new HashSet<String>());
final List<SubDocs> allSubDocs = Collections.synchronizedList(new ArrayList<SubDocs>());
final long stopTime = System.currentTimeMillis() + RUN_TIME_SEC*1000;
final Thread[] indexThreads = launchIndexingThreads(docs, NUM_INDEX_THREADS, stopTime, delIDs, delPackIDs, allSubDocs);
if (VERBOSE) {
System.out.println("TEST: DONE start indexing threads [" + (System.currentTimeMillis()-t0) + " ms]");
}
// Let index build up a bit
Thread.sleep(100);
doSearching(es, stopTime);
if (VERBOSE) {
System.out.println("TEST: all searching done [" + (System.currentTimeMillis()-t0) + " ms]");
}
for(int thread=0;thread<indexThreads.length;thread++) {
indexThreads[thread].join();
}
if (VERBOSE) {
System.out.println("TEST: done join indexing threads [" + (System.currentTimeMillis()-t0) + " ms]; addCount=" + addCount + " delCount=" + delCount);
}
final IndexSearcher s = getFinalSearcher();
if (VERBOSE) {
System.out.println("TEST: finalSearcher=" + s);
}
assertFalse(failed.get());
boolean doFail = false;
// Verify: make sure delIDs are in fact deleted:
for(String id : delIDs) {
final TopDocs hits = s.search(new TermQuery(new Term("docid", id)), 1);
if (hits.totalHits != 0) {
System.out.println("doc id=" + id + " is supposed to be deleted, but got " + hits.totalHits + " hits; first docID=" + hits.scoreDocs[0].doc);
doFail = true;
}
}
// Verify: make sure delPackIDs are in fact deleted:
for(String id : delPackIDs) {
final TopDocs hits = s.search(new TermQuery(new Term("packID", id)), 1);
if (hits.totalHits != 0) {
System.out.println("packID=" + id + " is supposed to be deleted, but got " + hits.totalHits + " matches");
doFail = true;
}
}
// Verify: make sure each group of sub-docs are still in docID order:
for(SubDocs subDocs : allSubDocs) {
TopDocs hits = s.search(new TermQuery(new Term("packID", subDocs.packID)), 20);
if (!subDocs.deleted) {
// We sort by relevance but the scores should be identical so sort falls back to by docID:
if (hits.totalHits != subDocs.subIDs.size()) {
System.out.println("packID=" + subDocs.packID + ": expected " + subDocs.subIDs.size() + " hits but got " + hits.totalHits);
doFail = true;
} else {
int lastDocID = -1;
int startDocID = -1;
for(ScoreDoc scoreDoc : hits.scoreDocs) {
final int docID = scoreDoc.doc;
if (lastDocID != -1) {
assertEquals(1+lastDocID, docID);
} else {
startDocID = docID;
}
lastDocID = docID;
final Document doc = s.doc(docID);
assertEquals(subDocs.packID, doc.get("packID"));
}
lastDocID = startDocID - 1;
for(String subID : subDocs.subIDs) {
hits = s.search(new TermQuery(new Term("docid", subID)), 1);
assertEquals(1, hits.totalHits);
final int docID = hits.scoreDocs[0].doc;
if (lastDocID != -1) {
assertEquals(1+lastDocID, docID);
}
lastDocID = docID;
}
}
} else {
// Pack was deleted -- make sure its docs are
// deleted. We can't verify packID is deleted
// because we can re-use packID for update:
for(String subID : subDocs.subIDs) {
assertEquals(0, s.search(new TermQuery(new Term("docid", subID)), 1).totalHits);
}
}
}
// Verify: make sure all not-deleted docs are in fact
// not deleted:
final int endID = Integer.parseInt(docs.nextDoc().get("docid"));
docs.close();
for(int id=0;id<endID;id++) {
String stringID = ""+id;
if (!delIDs.contains(stringID)) {
final TopDocs hits = s.search(new TermQuery(new Term("docid", stringID)), 1);
if (hits.totalHits != 1) {
System.out.println("doc id=" + stringID + " is not supposed to be deleted, but got hitCount=" + hits.totalHits);
doFail = true;
}
}
}
assertFalse(doFail);
assertEquals("index=" + writer.segString() + " addCount=" + addCount + " delCount=" + delCount, addCount.get() - delCount.get(), s.getIndexReader().numDocs());
releaseSearcher(s);
writer.commit();
assertEquals("index=" + writer.segString() + " addCount=" + addCount + " delCount=" + delCount, addCount.get() - delCount.get(), writer.numDocs());
doClose();
writer.close(false);
// Cannot shutdown until after writer is closed because
// writer has merged segment warmer that uses IS to run
// searches, and that IS may be using this es!
if (es != null) {
es.shutdown();
es.awaitTermination(1, TimeUnit.SECONDS);
}
_TestUtil.checkIndex(dir);
dir.close();
_TestUtil.rmDir(tempDir);
if (VERBOSE) {
System.out.println("TEST: done [" + (System.currentTimeMillis()-t0) + " ms]");
}
}
// in lucene/test-framework/src/java/org/apache/lucene/index/ThreadedIndexingAndSearchingTestCase.java
Override
public void warm(AtomicReader reader) throws IOException {
if (VERBOSE) {
System.out.println("TEST: now warm merged reader=" + reader);
}
final int maxDoc = reader.maxDoc();
final Bits liveDocs = reader.getLiveDocs();
int sum = 0;
final int inc = Math.max(1, maxDoc/50);
for(int docID=0;docID<maxDoc;docID += inc) {
if (liveDocs == null || liveDocs.get(docID)) {
final Document doc = reader.document(docID);
sum += doc.getFields().size();
}
}
IndexSearcher searcher = newSearcher(reader);
sum += searcher.search(new TermQuery(new Term("body", "united")), 10).totalHits;
if (VERBOSE) {
System.out.println("TEST: warm visited " + sum + " fields");
}
}
// in lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterAtomicReader.java
Override
public Fields getTermVectors(int docID) throws IOException {
Fields f = super.getTermVectors(docID);
if (f == null) {
return null;
}
f = new FieldFilterFields(f);
// we need to check for emptyness, so we can return null:
return (f.iterator().next() == null) ? null : f;
}
// in lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterAtomicReader.java
Override
public void document(final int docID, final StoredFieldVisitor visitor) throws CorruptIndexException, IOException {
super.document(docID, new StoredFieldVisitor() {
@Override
public void binaryField(FieldInfo fieldInfo, byte[] value, int offset, int length) throws IOException {
visitor.binaryField(fieldInfo, value, offset, length);
}
@Override
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
visitor.stringField(fieldInfo, value);
}
@Override
public void intField(FieldInfo fieldInfo, int value) throws IOException {
visitor.intField(fieldInfo, value);
}
@Override
public void longField(FieldInfo fieldInfo, long value) throws IOException {
visitor.longField(fieldInfo, value);
}
@Override
public void floatField(FieldInfo fieldInfo, float value) throws IOException {
visitor.floatField(fieldInfo, value);
}
@Override
public void doubleField(FieldInfo fieldInfo, double value) throws IOException {
visitor.doubleField(fieldInfo, value);
}
@Override
public Status needsField(FieldInfo fieldInfo) throws IOException {
return hasField(fieldInfo.name) ? visitor.needsField(fieldInfo) : Status.NO;
}
});
}
// in lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterAtomicReader.java
Override
public void binaryField(FieldInfo fieldInfo, byte[] value, int offset, int length) throws IOException {
visitor.binaryField(fieldInfo, value, offset, length);
}
// in lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterAtomicReader.java
Override
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
visitor.stringField(fieldInfo, value);
}
// in lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterAtomicReader.java
Override
public void intField(FieldInfo fieldInfo, int value) throws IOException {
visitor.intField(fieldInfo, value);
}
// in lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterAtomicReader.java
Override
public void longField(FieldInfo fieldInfo, long value) throws IOException {
visitor.longField(fieldInfo, value);
}
// in lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterAtomicReader.java
Override
public void floatField(FieldInfo fieldInfo, float value) throws IOException {
visitor.floatField(fieldInfo, value);
}
// in lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterAtomicReader.java
Override
public void doubleField(FieldInfo fieldInfo, double value) throws IOException {
visitor.doubleField(fieldInfo, value);
}
// in lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterAtomicReader.java
Override
public Status needsField(FieldInfo fieldInfo) throws IOException {
return hasField(fieldInfo.name) ? visitor.needsField(fieldInfo) : Status.NO;
}
// in lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterAtomicReader.java
Override
public Fields fields() throws IOException {
final Fields f = super.fields();
return (f == null) ? null : new FieldFilterFields(f);
}
// in lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterAtomicReader.java
Override
public DocValues docValues(String field) throws IOException {
return hasField(field) ? super.docValues(field) : null;
}
// in lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterAtomicReader.java
Override
public DocValues normValues(String field) throws IOException {
return hasField(field) ? super.normValues(field) : null;
}
// in lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterAtomicReader.java
Override
public int size() throws IOException {
// TODO: add faster implementation!
int c = 0;
final FieldsEnum it = iterator();
while (it.next() != null) {
c++;
}
return c;
}
// in lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterAtomicReader.java
Override
public FieldsEnum iterator() throws IOException {
return new FilterFieldsEnum(super.iterator()) {
@Override
public String next() throws IOException {
String f;
while ((f = super.next()) != null) {
if (hasField(f)) return f;
}
return null;
}
};
}
// in lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterAtomicReader.java
Override
public String next() throws IOException {
String f;
while ((f = super.next()) != null) {
if (hasField(f)) return f;
}
return null;
}
// in lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterAtomicReader.java
Override
public Terms terms(String field) throws IOException {
return hasField(field) ? super.terms(field) : null;
}
// in lucene/test-framework/src/java/org/apache/lucene/index/DocHelper.java
public static SegmentInfoPerCommit writeDoc(Random random, Directory dir, Document doc) throws IOException
{
return writeDoc(random, dir, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), null, doc);
}
// in lucene/test-framework/src/java/org/apache/lucene/index/DocHelper.java
public static SegmentInfoPerCommit writeDoc(Random random, Directory dir, Analyzer analyzer, Similarity similarity, Document doc) throws IOException {
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig( /* LuceneTestCase.newIndexWriterConfig(random, */
TEST_VERSION_CURRENT, analyzer).setSimilarity(similarity));
//writer.setUseCompoundFile(false);
writer.addDocument(doc);
writer.commit();
SegmentInfoPerCommit info = writer.newestSegment();
writer.close();
return info;
}
// in lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java
public <T extends IndexableField> void addDocument(final Iterable<T> doc) throws IOException {
addDocument(doc, w.getAnalyzer());
}
// in lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java
public <T extends IndexableField> void addDocument(final Iterable<T> doc, Analyzer a) throws IOException {
if (doDocValues && doc instanceof Document) {
randomPerDocFieldValues((Document) doc);
}
if (r.nextInt(5) == 3) {
// TODO: maybe, we should simply buffer up added docs
// (but we need to clone them), and only when
// getReader, commit, etc. are called, we do an
// addDocuments? Would be better testing.
w.addDocuments(new Iterable<Iterable<T>>() {
@Override
public Iterator<Iterable<T>> iterator() {
return new Iterator<Iterable<T>>() {
boolean done;
@Override
public boolean hasNext() {
return !done;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
@Override
public Iterable<T> next() {
if (done) {
throw new IllegalStateException();
}
done = true;
return doc;
}
};
}
}, a);
}
// in lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java
private void maybeCommit() throws IOException {
if (docCount++ == flushAt) {
if (LuceneTestCase.VERBOSE) {
System.out.println("RIW.add/updateDocument: now doing a commit at docCount=" + docCount);
}
w.commit();
flushAt += _TestUtil.nextInt(r, (int) (flushAtFactor * 10), (int) (flushAtFactor * 1000));
if (flushAtFactor < 2e6) {
// gradually but exponentially increase time b/w flushes
flushAtFactor *= 1.05;
}
switchDoDocValues();
}
}
// in lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java
public void addDocuments(Iterable<? extends Iterable<? extends IndexableField>> docs) throws IOException {
w.addDocuments(docs);
maybeCommit();
}
// in lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java
public void updateDocuments(Term delTerm, Iterable<? extends Iterable<? extends IndexableField>> docs) throws IOException {
w.updateDocuments(delTerm, docs);
maybeCommit();
}
// in lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java
public <T extends IndexableField> void updateDocument(Term t, final Iterable<T> doc) throws IOException {
if (doDocValues) {
randomPerDocFieldValues((Document) doc);
}
if (r.nextInt(5) == 3) {
w.updateDocuments(t, new Iterable<Iterable<T>>() {
@Override
public Iterator<Iterable<T>> iterator() {
return new Iterator<Iterable<T>>() {
boolean done;
@Override
public boolean hasNext() {
return !done;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
@Override
public Iterable<T> next() {
if (done) {
throw new IllegalStateException();
}
done = true;
return doc;
}
};
}
});
}
// in lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java
public void addIndexes(Directory... dirs) throws CorruptIndexException, IOException {
w.addIndexes(dirs);
}
// in lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java
public void addIndexes(IndexReader... readers) throws CorruptIndexException, IOException {
w.addIndexes(readers);
}
// in lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java
public void deleteDocuments(Term term) throws CorruptIndexException, IOException {
w.deleteDocuments(term);
}
// in lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java
public void deleteDocuments(Query q) throws CorruptIndexException, IOException {
w.deleteDocuments(q);
}
// in lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java
public void commit() throws CorruptIndexException, IOException {
w.commit();
switchDoDocValues();
}
// in lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java
public int numDocs() throws IOException {
return w.numDocs();
}
// in lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java
public void deleteAll() throws IOException {
w.deleteAll();
}
// in lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java
public DirectoryReader getReader() throws IOException {
return getReader(true);
}
// in lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java
public void forceMergeDeletes(boolean doWait) throws IOException {
w.forceMergeDeletes(doWait);
}
// in lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java
public void forceMergeDeletes() throws IOException {
w.forceMergeDeletes();
}
// in lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java
private void doRandomForceMerge() throws IOException {
if (doRandomForceMerge) {
final int segCount = w.getSegmentCount();
if (r.nextBoolean() || segCount == 0) {
// full forceMerge
w.forceMerge(1);
} else {
// partial forceMerge
final int limit = _TestUtil.nextInt(r, 1, segCount);
w.forceMerge(limit);
assert !doRandomForceMergeAssert || w.getSegmentCount() <= limit: "limit=" + limit + " actual=" + w.getSegmentCount();
}
}
switchDoDocValues();
}
// in lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java
public DirectoryReader getReader(boolean applyDeletions) throws IOException {
getReaderCalled = true;
if (r.nextInt(20) == 2) {
doRandomForceMerge();
}
if (!applyDeletions || r.nextBoolean()) {
if (LuceneTestCase.VERBOSE) {
System.out.println("RIW.getReader: use NRT reader");
}
if (r.nextInt(5) == 1) {
w.commit();
}
return w.getReader(applyDeletions);
} else {
if (LuceneTestCase.VERBOSE) {
System.out.println("RIW.getReader: open new reader");
}
w.commit();
switchDoDocValues();
if (r.nextBoolean()) {
return DirectoryReader.open(w.getDirectory(), _TestUtil.nextInt(r, 1, 10));
} else {
return w.getReader(applyDeletions);
}
}
}
// in lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java
public void close() throws IOException {
// if someone isn't using getReader() API, we want to be sure to
// forceMerge since presumably they might open a reader on the dir.
if (getReaderCalled == false && r.nextInt(8) == 2) {
doRandomForceMerge();
}
w.close();
}
// in lucene/test-framework/src/java/org/apache/lucene/index/RandomIndexWriter.java
public void forceMerge(int maxSegmentCount) throws IOException {
w.forceMerge(maxSegmentCount);
}
// in lucene/test-framework/src/java/org/apache/lucene/index/MockRandomMergePolicy.java
Override
public MergeSpecification findForcedMerges(
SegmentInfos segmentInfos, int maxSegmentCount, Map<SegmentInfoPerCommit,Boolean> segmentsToMerge)
throws CorruptIndexException, IOException {
final List<SegmentInfoPerCommit> eligibleSegments = new ArrayList<SegmentInfoPerCommit>();
for(SegmentInfoPerCommit info : segmentInfos) {
if (segmentsToMerge.containsKey(info)) {
eligibleSegments.add(info);
}
}
//System.out.println("MRMP: findMerges sis=" + segmentInfos + " eligible=" + eligibleSegments);
MergeSpecification mergeSpec = null;
if (eligibleSegments.size() > 1 || (eligibleSegments.size() == 1 && eligibleSegments.get(0).hasDeletions())) {
mergeSpec = new MergeSpecification();
// Already shuffled having come out of a set but
// shuffle again for good measure:
Collections.shuffle(eligibleSegments, random);
int upto = 0;
while(upto < eligibleSegments.size()) {
int max = Math.min(10, eligibleSegments.size()-upto);
int inc = max <= 2 ? max : _TestUtil.nextInt(random, 2, max);
mergeSpec.add(new OneMerge(eligibleSegments.subList(upto, upto+inc)));
upto += inc;
}
}
if (mergeSpec != null) {
for(OneMerge merge : mergeSpec.merges) {
for(SegmentInfoPerCommit info : merge.segments) {
assert segmentsToMerge.containsKey(info);
}
}
}
return mergeSpec;
}
// in lucene/test-framework/src/java/org/apache/lucene/index/MockRandomMergePolicy.java
Override
public MergeSpecification findForcedDeletesMerges(
SegmentInfos segmentInfos)
throws CorruptIndexException, IOException {
return findMerges(segmentInfos);
}
// in lucene/test-framework/src/java/org/apache/lucene/index/MockRandomMergePolicy.java
Override
public boolean useCompoundFile(SegmentInfos infos, SegmentInfoPerCommit mergedInfo) throws IOException {
// 80% of the time we create CFS:
return random.nextInt(5) != 1;
}
// in lucene/test-framework/src/java/org/apache/lucene/index/AlcoholicMergePolicy.java
Override
//@BlackMagic(level=Voodoo);
protected long size(SegmentInfoPerCommit info) throws IOException {
int hourOfDay = calendar.get(Calendar.HOUR_OF_DAY);
if (hourOfDay < 6 ||
hourOfDay > 20 ||
// its 5 o'clock somewhere
random.nextInt(23) == 5) {
Drink[] values = Drink.values();
// pick a random drink during the day
return values[random.nextInt(values.length)].drunkFactor * info.sizeInBytes();
}
return info.sizeInBytes();
}
// in lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
public static void rmDir(File dir) throws IOException {
if (dir.exists()) {
if (dir.isFile() && !dir.delete()) {
throw new IOException("could not delete " + dir);
}
for (File f : dir.listFiles()) {
if (f.isDirectory()) {
rmDir(f);
} else {
if (!f.delete()) {
throw new IOException("could not delete " + f);
}
}
}
if (!dir.delete()) {
throw new IOException("could not delete " + dir);
}
}
}
// in lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
public static void unzip(File zipName, File destDir) throws IOException {
ZipFile zipFile = new ZipFile(zipName);
Enumeration<? extends ZipEntry> entries = zipFile.entries();
rmDir(destDir);
destDir.mkdir();
LuceneTestCase.closeAfterSuite(new CloseableFile(destDir));
while (entries.hasMoreElements()) {
ZipEntry entry = entries.nextElement();
InputStream in = zipFile.getInputStream(entry);
File targetFile = new File(destDir, entry.getName());
if (entry.isDirectory()) {
// allow unzipping with directory structure
targetFile.mkdirs();
} else {
if (targetFile.getParentFile()!=null) {
// be on the safe side: do not rely on that directories are always extracted
// before their children (although this makes sense, but is it guaranteed?)
targetFile.getParentFile().mkdirs();
}
OutputStream out = new BufferedOutputStream(new FileOutputStream(targetFile));
byte[] buffer = new byte[8192];
int len;
while((len = in.read(buffer)) >= 0) {
out.write(buffer, 0, len);
}
in.close();
out.close();
}
}
zipFile.close();
}
// in lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
public static CheckIndex.Status checkIndex(Directory dir) throws IOException {
return checkIndex(dir, true);
}
// in lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
public static CheckIndex.Status checkIndex(Directory dir, boolean crossCheckTermVectors) throws IOException {
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
CheckIndex checker = new CheckIndex(dir);
checker.setCrossCheckTermVectors(crossCheckTermVectors);
checker.setInfoStream(new PrintStream(bos), false);
CheckIndex.Status indexStatus = checker.checkIndex(null);
if (indexStatus == null || indexStatus.clean == false) {
System.out.println("CheckIndex failed");
System.out.println(bos.toString());
throw new RuntimeException("CheckIndex failed");
} else {
if (LuceneTestCase.INFOSTREAM) {
System.out.println(bos.toString());
}
return indexStatus;
}
}
// in lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
public static boolean anyFilesExceptWriteLock(Directory dir) throws IOException {
String[] files = dir.listAll();
if (files.length > 1 || (files.length == 1 && !files[0].equals("write.lock"))) {
return true;
} else {
return false;
}
}
// in lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
public static File createTempFile(String prefix, String suffix, File directory)
throws IOException {
// Force a prefix null check first
if (prefix.length() < 3) {
throw new IllegalArgumentException("prefix must be 3");
}
String newSuffix = suffix == null ? ".tmp" : suffix;
File result;
do {
result = genTempFile(prefix, newSuffix, directory);
} while (!result.createNewFile());
return result;
}
// in lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
public static DocsEnum docs(Random random, IndexReader r, String field, BytesRef term, Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
final Terms terms = MultiFields.getTerms(r, field);
if (terms == null) {
return null;
}
final TermsEnum termsEnum = terms.iterator(null);
if (!termsEnum.seekExact(term, random.nextBoolean())) {
return null;
}
if (random.nextBoolean()) {
if (random.nextBoolean()) {
// TODO: cast re-use to D&PE if we can...?
DocsAndPositionsEnum docsAndPositions = termsEnum.docsAndPositions(liveDocs, null, true);
if (docsAndPositions == null) {
docsAndPositions = termsEnum.docsAndPositions(liveDocs, null, false);
}
if (docsAndPositions != null) {
return docsAndPositions;
}
}
final DocsEnum docsAndFreqs = termsEnum.docs(liveDocs, reuse, true);
if (docsAndFreqs != null) {
return docsAndFreqs;
}
}
return termsEnum.docs(liveDocs, reuse, needsFreqs);
}
// in lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
public static DocsEnum docs(Random random, TermsEnum termsEnum, Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
if (random.nextBoolean()) {
if (random.nextBoolean()) {
// TODO: cast re-use to D&PE if we can...?
DocsAndPositionsEnum docsAndPositions = termsEnum.docsAndPositions(liveDocs, null, true);
if (docsAndPositions == null) {
docsAndPositions = termsEnum.docsAndPositions(liveDocs, null, false);
}
if (docsAndPositions != null) {
return docsAndPositions;
}
}
final DocsEnum docsAndFreqs = termsEnum.docs(liveDocs, null, true);
if (docsAndFreqs != null) {
return docsAndFreqs;
}
}
return termsEnum.docs(liveDocs, null, needsFreqs);
}
// in lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
public static FieldInfos getFieldInfos(SegmentInfo info) throws IOException {
Directory cfsDir = null;
try {
if (info.getUseCompoundFile()) {
cfsDir = new CompoundFileDirectory(info.dir,
IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION),
IOContext.READONCE,
false);
} else {
cfsDir = info.dir;
}
return info.getCodec().fieldInfosFormat().getFieldInfosReader().read(cfsDir,
info.name,
IOContext.READONCE);
} finally {
if (info.getUseCompoundFile() && cfsDir != null) {
cfsDir.close();
}
}
}
// in lucene/test-framework/src/java/org/apache/lucene/util/LineFileDocs.java
public synchronized void close() throws IOException {
if (reader != null) {
reader.close();
reader = null;
}
}
// in lucene/test-framework/src/java/org/apache/lucene/util/LineFileDocs.java
private synchronized void open(Random random) throws IOException {
InputStream is = getClass().getResourceAsStream(path);
boolean needSkip = true;
long size = 0L, seekTo = 0L;
if (is == null) {
// if its not in classpath, we load it as absolute filesystem path (e.g. Hudson's home dir)
File file = new File(path);
size = file.length();
if (path.endsWith(".gz")) {
// if it is a gzip file, we need to use InputStream and slowly skipTo:
is = new FileInputStream(file);
} else {
// optimized seek using RandomAccessFile:
seekTo = randomSeekPos(random, size);
final FileChannel channel = new RandomAccessFile(path, "r").getChannel();
if (LuceneTestCase.VERBOSE) {
System.out.println("TEST: LineFileDocs: file seek to fp=" + seekTo + " on open");
}
channel.position(seekTo);
is = Channels.newInputStream(channel);
needSkip = false;
}
} else {
// if the file comes from Classpath:
size = is.available();
}
if (path.endsWith(".gz")) {
is = new GZIPInputStream(is);
// guestimate:
size *= 2.8;
}
// If we only have an InputStream, we need to seek now,
// but this seek is a scan, so very inefficient!!!
if (needSkip) {
seekTo = randomSeekPos(random, size);
if (LuceneTestCase.VERBOSE) {
System.out.println("TEST: LineFileDocs: stream skip to fp=" + seekTo + " on open");
}
is.skip(seekTo);
}
// if we seeked somewhere, read until newline char
if (seekTo > 0L) {
int b;
do {
b = is.read();
} while (b >= 0 && b != 13 && b != 10);
}
CharsetDecoder decoder = IOUtils.CHARSET_UTF_8.newDecoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT);
reader = new BufferedReader(new InputStreamReader(is, decoder), BUFFER_SIZE);
if (seekTo > 0L) {
// read one more line, to make sure we are not inside a Windows linebreak (\r\n):
reader.readLine();
}
}
// in lucene/test-framework/src/java/org/apache/lucene/util/LineFileDocs.java
public synchronized void reset(Random random) throws IOException {
close();
open(random);
id.set(0);
}
// in lucene/test-framework/src/java/org/apache/lucene/util/LineFileDocs.java
public Document nextDoc() throws IOException {
String line;
synchronized(this) {
line = reader.readLine();
if (line == null) {
// Always rewind at end:
if (LuceneTestCase.VERBOSE) {
System.out.println("TEST: LineFileDocs: now rewind file...");
}
close();
open(null);
line = reader.readLine();
}
}
DocState docState = threadDocs.get();
if (docState == null) {
docState = new DocState(useDocValues);
threadDocs.set(docState);
}
int spot = line.indexOf(SEP);
if (spot == -1) {
throw new RuntimeException("line: [" + line + "] is in an invalid format !");
}
int spot2 = line.indexOf(SEP, 1 + spot);
if (spot2 == -1) {
throw new RuntimeException("line: [" + line + "] is in an invalid format !");
}
docState.body.setStringValue(line.substring(1+spot2, line.length()));
final String title = line.substring(0, spot);
docState.title.setStringValue(title);
if (docState.titleDV != null) {
docState.titleDV.setBytesValue(new BytesRef(title));
}
docState.titleTokenized.setStringValue(title);
docState.date.setStringValue(line.substring(1+spot, spot2));
docState.id.setStringValue(Integer.toString(id.getAndIncrement()));
return docState.doc;
}
// in lucene/test-framework/src/java/org/apache/lucene/util/CloseableFile.java
Override
public void close() throws IOException {
if (file.exists()) {
try {
_TestUtil.rmDir(file);
} catch (IOException e) {
// Ignore the exception from rmDir.
}
// Re-check.
if (file.exists()) {
throw new IOException(
"Could not remove: " + file.getAbsolutePath());
}
}
}
// in lucene/test-framework/src/java/org/apache/lucene/util/FailOnNonBulkMergesInfoStream.java
Override
public void close() throws IOException {
}
// in lucene/test-framework/src/java/org/apache/lucene/util/NullInfoStream.java
Override
public void close() throws IOException {
}
// in lucene/test-framework/src/java/org/apache/lucene/util/CloseableDirectory.java
Override
public void close() throws IOException {
// We only attempt to check open/closed state if there were no other test
// failures.
try {
if (failureMarker.wasSuccessful() && dir.isOpen()) {
Assert.fail("Directory not closed: " + dir);
}
} finally {
// TODO: perform real close of the delegate: LUCENE-4058
// dir.close();
}
}
// in lucene/test-framework/src/java/org/apache/lucene/util/ThrottledIndexOutput.java
Override
public void flush() throws IOException {
sleep(flushDelayMillis);
delegate.flush();
}
// in lucene/test-framework/src/java/org/apache/lucene/util/ThrottledIndexOutput.java
Override
public void close() throws IOException {
try {
sleep(closeDelayMillis + getDelay(true));
} finally {
delegate.close();
}
}
// in lucene/test-framework/src/java/org/apache/lucene/util/ThrottledIndexOutput.java
Override
public void seek(long pos) throws IOException {
sleep(seekDelayMillis);
delegate.seek(pos);
}
// in lucene/test-framework/src/java/org/apache/lucene/util/ThrottledIndexOutput.java
Override
public long length() throws IOException {
return delegate.length();
}
// in lucene/test-framework/src/java/org/apache/lucene/util/ThrottledIndexOutput.java
Override
public void writeByte(byte b) throws IOException {
bytes[0] = b;
writeBytes(bytes, 0, 1);
}
// in lucene/test-framework/src/java/org/apache/lucene/util/ThrottledIndexOutput.java
Override
public void writeBytes(byte[] b, int offset, int length) throws IOException {
final long before = System.nanoTime();
delegate.writeBytes(b, offset, length);
timeElapsed += System.nanoTime() - before;
pendingBytes += length;
sleep(getDelay(false));
}
// in lucene/test-framework/src/java/org/apache/lucene/util/ThrottledIndexOutput.java
Override
public void setLength(long length) throws IOException {
delegate.setLength(length);
}
// in lucene/test-framework/src/java/org/apache/lucene/util/ThrottledIndexOutput.java
Override
public void copyBytes(DataInput input, long numBytes) throws IOException {
delegate.copyBytes(input, numBytes);
}
// in lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
public static MockDirectoryWrapper newDirectory() throws IOException {
return newDirectory(random());
}
// in lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
public static MockDirectoryWrapper newDirectory(Random r) throws IOException {
Directory impl = newDirectoryImpl(r, TEST_DIRECTORY);
MockDirectoryWrapper dir = new MockDirectoryWrapper(r, maybeNRTWrap(r, impl));
closeAfterSuite(new CloseableDirectory(dir, suiteFailureMarker));
dir.setThrottling(TEST_THROTTLING);
if (VERBOSE) {
System.out.println("NOTE: LuceneTestCase.newDirectory: returning " + dir);
}
return dir;
}
// in lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
public static MockDirectoryWrapper newDirectory(Directory d) throws IOException {
return newDirectory(random(), d);
}
// in lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
public static MockDirectoryWrapper newFSDirectory(File f) throws IOException {
return newFSDirectory(f, null);
}
// in lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
public static MockDirectoryWrapper newFSDirectory(File f, LockFactory lf) throws IOException {
String fsdirClass = TEST_DIRECTORY;
if (fsdirClass.equals("random")) {
fsdirClass = RandomPicks.randomFrom(random(), FS_DIRECTORIES);
}
Class<? extends FSDirectory> clazz;
try {
try {
clazz = CommandLineUtil.loadFSDirectoryClass(fsdirClass);
} catch (ClassCastException e) {
// TEST_DIRECTORY is not a sub-class of FSDirectory, so draw one at random
fsdirClass = RandomPicks.randomFrom(random(), FS_DIRECTORIES);
clazz = CommandLineUtil.loadFSDirectoryClass(fsdirClass);
}
Directory fsdir = newFSDirectoryImpl(clazz, f);
MockDirectoryWrapper dir = new MockDirectoryWrapper(
random(), maybeNRTWrap(random(), fsdir));
if (lf != null) {
dir.setLockFactory(lf);
}
closeAfterSuite(new CloseableDirectory(dir, suiteFailureMarker));
dir.setThrottling(TEST_THROTTLING);
return dir;
} catch (Exception e) {
throw new RuntimeException(e);
}
}
// in lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
public static MockDirectoryWrapper newDirectory(Random r, Directory d) throws IOException {
Directory impl = newDirectoryImpl(r, TEST_DIRECTORY);
for (String file : d.listAll()) {
d.copy(impl, file, file, newIOContext(r));
}
MockDirectoryWrapper dir = new MockDirectoryWrapper(r, maybeNRTWrap(r, impl));
closeAfterSuite(new CloseableDirectory(dir, suiteFailureMarker));
dir.setThrottling(TEST_THROTTLING);
return dir;
}
// in lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
private static Directory newFSDirectoryImpl(
Class<? extends FSDirectory> clazz, File file)
throws IOException {
FSDirectory d = null;
try {
d = CommandLineUtil.newFSDirectory(clazz, file);
} catch (Exception e) {
d = FSDirectory.open(file);
}
return d;
}
// in lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
public static IndexReader maybeWrapReader(IndexReader r) throws IOException {
Random random = random();
if (rarely()) {
// TODO: remove this, and fix those tests to wrap before putting slow around:
final boolean wasOriginallyAtomic = r instanceof AtomicReader;
for (int i = 0, c = random.nextInt(6)+1; i < c; i++) {
switch(random.nextInt(4)) {
case 0:
r = SlowCompositeReaderWrapper.wrap(r);
break;
case 1:
// will create no FC insanity in atomic case, as ParallelAtomicReader has own cache key:
r = (r instanceof AtomicReader) ?
new ParallelAtomicReader((AtomicReader) r) :
new ParallelCompositeReader((CompositeReader) r);
break;
case 2:
// Häckidy-Hick-Hack: a standard MultiReader will cause FC insanity, so we use
// QueryUtils' reader with a fake cache key, so insanity checker cannot walk
// along our reader:
r = new FCInvisibleMultiReader(r);
break;
case 3:
final AtomicReader ar = SlowCompositeReaderWrapper.wrap(r);
final List<String> allFields = new ArrayList<String>();
for (FieldInfo fi : ar.getFieldInfos()) {
allFields.add(fi.name);
}
Collections.shuffle(allFields, random);
final int end = allFields.isEmpty() ? 0 : random.nextInt(allFields.size());
final Set<String> fields = new HashSet<String>(allFields.subList(0, end));
// will create no FC insanity as ParallelAtomicReader has own cache key:
r = new ParallelAtomicReader(
new FieldFilterAtomicReader(ar, fields, false),
new FieldFilterAtomicReader(ar, fields, true)
);
break;
default:
fail("should not get here");
}
}
if (wasOriginallyAtomic) {
r = SlowCompositeReaderWrapper.wrap(r);
} else if ((r instanceof CompositeReader) && !(r instanceof FCInvisibleMultiReader)) {
// prevent cache insanity caused by e.g. ParallelCompositeReader, to fix we wrap one more time:
r = new FCInvisibleMultiReader(r);
}
if (VERBOSE) {
System.out.println("maybeWrapReader wrapped: " +r);
}
}
return r;
}
// in lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
public static IndexSearcher newSearcher(IndexReader r) throws IOException {
return newSearcher(r, true);
}
// in lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
public static IndexSearcher newSearcher(IndexReader r, boolean maybeWrap) throws IOException {
Random random = random();
if (usually()) {
if (maybeWrap) {
r = maybeWrapReader(r);
}
IndexSearcher ret = random.nextBoolean() ? new AssertingIndexSearcher(random, r) : new AssertingIndexSearcher(random, r.getTopReaderContext());
ret.setSimilarity(classEnvRule.similarity);
return ret;
} else {
int threads = 0;
final ThreadPoolExecutor ex;
if (random.nextBoolean()) {
ex = null;
} else {
threads = _TestUtil.nextInt(random, 1, 8);
ex = new ThreadPoolExecutor(threads, threads, 0L, TimeUnit.MILLISECONDS,
new LinkedBlockingQueue<Runnable>(),
new NamedThreadFactory("LuceneTestCase"));
// uncomment to intensify LUCENE-3840
// ex.prestartAllCoreThreads();
}
if (ex != null) {
if (VERBOSE) {
System.out.println("NOTE: newSearcher using ExecutorService with " + threads + " threads");
}
r.addReaderClosedListener(new ReaderClosedListener() {
@Override
public void onClose(IndexReader reader) {
_TestUtil.shutdownExecutorService(ex);
}
});
}
IndexSearcher ret = random.nextBoolean()
? new AssertingIndexSearcher(random, r, ex)
: new AssertingIndexSearcher(random, r.getTopReaderContext(), ex);
ret.setSimilarity(classEnvRule.similarity);
return ret;
}
// in lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java
protected File getDataFile(String name) throws IOException {
try {
return new File(this.getClass().getResource(name).toURI());
} catch (Exception e) {
throw new IOException("Cannot find resource: " + name);
}
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java
Override
public IntIndexInput openInput(Directory dir, String fileName, IOContext context) throws IOException {
// Must only use extension, because IW.addIndexes can
// rename segment!
final IntStreamFactory f = delegates.get((Math.abs(salt ^ getExtension(fileName).hashCode())) % delegates.size());
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: read using int factory " + f + " from fileName=" + fileName);
}
return f.openInput(dir, fileName, context);
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java
Override
public IntIndexOutput createOutput(Directory dir, String fileName, IOContext context) throws IOException {
final IntStreamFactory f = delegates.get((Math.abs(salt ^ getExtension(fileName).hashCode())) % delegates.size());
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: write using int factory " + f + " to fileName=" + fileName);
}
return f.createOutput(dir, fileName, context);
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java
Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
// we pull this before the seed intentionally: because its not consumed at runtime
// (the skipInterval is written into postings header)
int skipInterval = _TestUtil.nextInt(seedRandom, 2, 10);
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: skipInterval=" + skipInterval);
}
final long seed = seedRandom.nextLong();
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: writing to seg=" + state.segmentInfo.name + " formatID=" + state.segmentSuffix + " seed=" + seed);
}
final String seedFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, SEED_EXT);
final IndexOutput out = state.directory.createOutput(seedFileName, state.context);
try {
out.writeLong(seed);
} finally {
out.close();
}
final Random random = new Random(seed);
random.nextInt(); // consume a random for buffersize
PostingsWriterBase postingsWriter;
if (random.nextBoolean()) {
postingsWriter = new SepPostingsWriter(state, new MockIntStreamFactory(random), skipInterval);
} else {
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: writing Standard postings");
}
postingsWriter = new Lucene40PostingsWriter(state, skipInterval);
}
if (random.nextBoolean()) {
final int totTFCutoff = _TestUtil.nextInt(random, 1, 20);
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: writing pulsing postings with totTFCutoff=" + totTFCutoff);
}
postingsWriter = new PulsingPostingsWriter(totTFCutoff, postingsWriter);
}
final FieldsConsumer fields;
if (random.nextBoolean()) {
// Use BlockTree terms dict
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: writing BlockTree terms dict");
}
// TODO: would be nice to allow 1 but this is very
// slow to write
final int minTermsInBlock = _TestUtil.nextInt(random, 2, 100);
final int maxTermsInBlock = Math.max(2, (minTermsInBlock-1)*2 + random.nextInt(100));
boolean success = false;
try {
fields = new BlockTreeTermsWriter(state, postingsWriter, minTermsInBlock, maxTermsInBlock);
success = true;
} finally {
if (!success) {
postingsWriter.close();
}
}
} else {
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: writing Block terms dict");
}
boolean success = false;
final TermsIndexWriterBase indexWriter;
try {
if (random.nextBoolean()) {
state.termIndexInterval = _TestUtil.nextInt(random, 1, 100);
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: fixed-gap terms index (tii=" + state.termIndexInterval + ")");
}
indexWriter = new FixedGapTermsIndexWriter(state);
} else {
final VariableGapTermsIndexWriter.IndexTermSelector selector;
final int n2 = random.nextInt(3);
if (n2 == 0) {
final int tii = _TestUtil.nextInt(random, 1, 100);
selector = new VariableGapTermsIndexWriter.EveryNTermSelector(tii);
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: variable-gap terms index (tii=" + tii + ")");
}
} else if (n2 == 1) {
final int docFreqThresh = _TestUtil.nextInt(random, 2, 100);
final int tii = _TestUtil.nextInt(random, 1, 100);
selector = new VariableGapTermsIndexWriter.EveryNOrDocFreqTermSelector(docFreqThresh, tii);
} else {
final long seed2 = random.nextLong();
final int gap = _TestUtil.nextInt(random, 2, 40);
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: random-gap terms index (max gap=" + gap + ")");
}
selector = new VariableGapTermsIndexWriter.IndexTermSelector() {
final Random rand = new Random(seed2);
@Override
public boolean isIndexTerm(BytesRef term, TermStats stats) {
return rand.nextInt(gap) == gap/2;
}
@Override
public void newField(FieldInfo fieldInfo) {
}
};
}
indexWriter = new VariableGapTermsIndexWriter(state, selector);
}
success = true;
} finally {
if (!success) {
postingsWriter.close();
}
}
success = false;
try {
fields = new BlockTermsWriter(indexWriter, state, postingsWriter);
success = true;
} finally {
if (!success) {
try {
postingsWriter.close();
} finally {
indexWriter.close();
}
}
}
}
return fields;
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java
Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
final String seedFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, SEED_EXT);
final IndexInput in = state.dir.openInput(seedFileName, state.context);
final long seed = in.readLong();
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: reading from seg=" + state.segmentInfo.name + " formatID=" + state.segmentSuffix + " seed=" + seed);
}
in.close();
final Random random = new Random(seed);
int readBufferSize = _TestUtil.nextInt(random, 1, 4096);
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: readBufferSize=" + readBufferSize);
}
PostingsReaderBase postingsReader;
if (random.nextBoolean()) {
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: reading Sep postings");
}
postingsReader = new SepPostingsReader(state.dir, state.fieldInfos, state.segmentInfo,
state.context, new MockIntStreamFactory(random), state.segmentSuffix);
} else {
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: reading Standard postings");
}
postingsReader = new Lucene40PostingsReader(state.dir, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix);
}
if (random.nextBoolean()) {
final int totTFCutoff = _TestUtil.nextInt(random, 1, 20);
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: reading pulsing postings with totTFCutoff=" + totTFCutoff);
}
postingsReader = new PulsingPostingsReader(postingsReader);
}
final FieldsProducer fields;
if (random.nextBoolean()) {
// Use BlockTree terms dict
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: reading BlockTree terms dict");
}
boolean success = false;
try {
fields = new BlockTreeTermsReader(state.dir,
state.fieldInfos,
state.segmentInfo.name,
postingsReader,
state.context,
state.segmentSuffix,
state.termsIndexDivisor);
success = true;
} finally {
if (!success) {
postingsReader.close();
}
}
} else {
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: reading Block terms dict");
}
final TermsIndexReaderBase indexReader;
boolean success = false;
try {
final boolean doFixedGap = random.nextBoolean();
// randomness diverges from writer, here:
if (state.termsIndexDivisor != -1) {
state.termsIndexDivisor = _TestUtil.nextInt(random, 1, 10);
}
if (doFixedGap) {
// if termsIndexDivisor is set to -1, we should not touch it. It means a
// test explicitly instructed not to load the terms index.
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: fixed-gap terms index (divisor=" + state.termsIndexDivisor + ")");
}
indexReader = new FixedGapTermsIndexReader(state.dir,
state.fieldInfos,
state.segmentInfo.name,
state.termsIndexDivisor,
BytesRef.getUTF8SortedAsUnicodeComparator(),
state.segmentSuffix, state.context);
} else {
final int n2 = random.nextInt(3);
if (n2 == 1) {
random.nextInt();
} else if (n2 == 2) {
random.nextLong();
}
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: variable-gap terms index (divisor=" + state.termsIndexDivisor + ")");
}
indexReader = new VariableGapTermsIndexReader(state.dir,
state.fieldInfos,
state.segmentInfo.name,
state.termsIndexDivisor,
state.segmentSuffix, state.context);
}
success = true;
} finally {
if (!success) {
postingsReader.close();
}
}
final int termsCacheSize = _TestUtil.nextInt(random, 1, 1024);
success = false;
try {
fields = new BlockTermsReader(indexReader,
state.dir,
state.fieldInfos,
state.segmentInfo.name,
postingsReader,
state.context,
termsCacheSize,
state.segmentSuffix);
success = true;
} finally {
if (!success) {
try {
postingsReader.close();
} finally {
indexReader.close();
}
}
}
}
return fields;
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40ords/Lucene40WithOrds.java
Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
PostingsWriterBase docs = new Lucene40PostingsWriter(state);
// TODO: should we make the terms index more easily
// pluggable? Ie so that this codec would record which
// index impl was used, and switch on loading?
// Or... you must make a new Codec for this?
TermsIndexWriterBase indexWriter;
boolean success = false;
try {
indexWriter = new FixedGapTermsIndexWriter(state);
success = true;
} finally {
if (!success) {
docs.close();
}
}
success = false;
try {
// Must use BlockTermsWriter (not BlockTree) because
// BlockTree doens't support ords (yet)...
FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, docs);
success = true;
return ret;
} finally {
if (!success) {
try {
docs.close();
} finally {
indexWriter.close();
}
}
}
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40ords/Lucene40WithOrds.java
Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
PostingsReaderBase postings = new Lucene40PostingsReader(state.dir, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix);
TermsIndexReaderBase indexReader;
boolean success = false;
try {
indexReader = new FixedGapTermsIndexReader(state.dir,
state.fieldInfos,
state.segmentInfo.name,
state.termsIndexDivisor,
BytesRef.getUTF8SortedAsUnicodeComparator(),
state.segmentSuffix, state.context);
success = true;
} finally {
if (!success) {
postings.close();
}
}
success = false;
try {
FieldsProducer ret = new BlockTermsReader(indexReader,
state.dir,
state.fieldInfos,
state.segmentInfo.name,
postings,
state.context,
TERMS_CACHE_SIZE,
state.segmentSuffix);
success = true;
return ret;
} finally {
if (!success) {
try {
postings.close();
} finally {
indexReader.close();
}
}
}
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockFixedIntBlockPostingsFormat.java
Override
public IntIndexInput openInput(Directory dir, String fileName, IOContext context) throws IOException {
return new FixedIntBlockIndexInput(dir.openInput(fileName, context)) {
@Override
protected BlockReader getBlockReader(final IndexInput in, final int[] buffer) throws IOException {
return new BlockReader() {
public void seek(long pos) {}
public void readBlock() throws IOException {
for(int i=0;i<buffer.length;i++) {
buffer[i] = in.readVInt();
}
}
};
}
};
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockFixedIntBlockPostingsFormat.java
Override
protected BlockReader getBlockReader(final IndexInput in, final int[] buffer) throws IOException {
return new BlockReader() {
public void seek(long pos) {}
public void readBlock() throws IOException {
for(int i=0;i<buffer.length;i++) {
buffer[i] = in.readVInt();
}
}
};
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockFixedIntBlockPostingsFormat.java
public void readBlock() throws IOException {
for(int i=0;i<buffer.length;i++) {
buffer[i] = in.readVInt();
}
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockFixedIntBlockPostingsFormat.java
Override
public IntIndexOutput createOutput(Directory dir, String fileName, IOContext context) throws IOException {
IndexOutput out = dir.createOutput(fileName, context);
boolean success = false;
try {
FixedIntBlockIndexOutput ret = new FixedIntBlockIndexOutput(out, blockSize) {
@Override
protected void flushBlock() throws IOException {
for(int i=0;i<buffer.length;i++) {
out.writeVInt(buffer[i]);
}
}
};
success = true;
return ret;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(out);
}
}
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockFixedIntBlockPostingsFormat.java
Override
protected void flushBlock() throws IOException {
for(int i=0;i<buffer.length;i++) {
out.writeVInt(buffer[i]);
}
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockFixedIntBlockPostingsFormat.java
Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
PostingsWriterBase postingsWriter = new SepPostingsWriter(state, new MockIntFactory(blockSize));
boolean success = false;
TermsIndexWriterBase indexWriter;
try {
indexWriter = new FixedGapTermsIndexWriter(state);
success = true;
} finally {
if (!success) {
postingsWriter.close();
}
}
success = false;
try {
FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter);
success = true;
return ret;
} finally {
if (!success) {
try {
postingsWriter.close();
} finally {
indexWriter.close();
}
}
}
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockFixedIntBlockPostingsFormat.java
Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
PostingsReaderBase postingsReader = new SepPostingsReader(state.dir,
state.fieldInfos,
state.segmentInfo,
state.context,
new MockIntFactory(blockSize), state.segmentSuffix);
TermsIndexReaderBase indexReader;
boolean success = false;
try {
indexReader = new FixedGapTermsIndexReader(state.dir,
state.fieldInfos,
state.segmentInfo.name,
state.termsIndexDivisor,
BytesRef.getUTF8SortedAsUnicodeComparator(), state.segmentSuffix,
IOContext.DEFAULT);
success = true;
} finally {
if (!success) {
postingsReader.close();
}
}
success = false;
try {
FieldsProducer ret = new BlockTermsReader(indexReader,
state.dir,
state.fieldInfos,
state.segmentInfo.name,
postingsReader,
state.context,
1024,
state.segmentSuffix);
success = true;
return ret;
} finally {
if (!success) {
try {
postingsReader.close();
} finally {
indexReader.close();
}
}
}
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockVariableIntBlockPostingsFormat.java
Override
public IntIndexInput openInput(Directory dir, String fileName, IOContext context) throws IOException {
final IndexInput in = dir.openInput(fileName, context);
final int baseBlockSize = in.readInt();
return new VariableIntBlockIndexInput(in) {
@Override
protected BlockReader getBlockReader(final IndexInput in, final int[] buffer) throws IOException {
return new BlockReader() {
public void seek(long pos) {}
public int readBlock() throws IOException {
buffer[0] = in.readVInt();
final int count = buffer[0] <= 3 ? baseBlockSize-1 : 2*baseBlockSize-1;
assert buffer.length >= count: "buffer.length=" + buffer.length + " count=" + count;
for(int i=0;i<count;i++) {
buffer[i+1] = in.readVInt();
}
return 1+count;
}
};
}
};
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockVariableIntBlockPostingsFormat.java
Override
protected BlockReader getBlockReader(final IndexInput in, final int[] buffer) throws IOException {
return new BlockReader() {
public void seek(long pos) {}
public int readBlock() throws IOException {
buffer[0] = in.readVInt();
final int count = buffer[0] <= 3 ? baseBlockSize-1 : 2*baseBlockSize-1;
assert buffer.length >= count: "buffer.length=" + buffer.length + " count=" + count;
for(int i=0;i<count;i++) {
buffer[i+1] = in.readVInt();
}
return 1+count;
}
};
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockVariableIntBlockPostingsFormat.java
public int readBlock() throws IOException {
buffer[0] = in.readVInt();
final int count = buffer[0] <= 3 ? baseBlockSize-1 : 2*baseBlockSize-1;
assert buffer.length >= count: "buffer.length=" + buffer.length + " count=" + count;
for(int i=0;i<count;i++) {
buffer[i+1] = in.readVInt();
}
return 1+count;
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockVariableIntBlockPostingsFormat.java
Override
public IntIndexOutput createOutput(Directory dir, String fileName, IOContext context) throws IOException {
final IndexOutput out = dir.createOutput(fileName, context);
boolean success = false;
try {
out.writeInt(baseBlockSize);
VariableIntBlockIndexOutput ret = new VariableIntBlockIndexOutput(out, 2*baseBlockSize) {
int pendingCount;
final int[] buffer = new int[2+2*baseBlockSize];
@Override
protected int add(int value) throws IOException {
buffer[pendingCount++] = value;
// silly variable block length int encoder: if
// first value <= 3, we write N vints at once;
// else, 2*N
final int flushAt = buffer[0] <= 3 ? baseBlockSize : 2*baseBlockSize;
// intentionally be non-causal here:
if (pendingCount == flushAt+1) {
for(int i=0;i<flushAt;i++) {
out.writeVInt(buffer[i]);
}
buffer[0] = buffer[flushAt];
pendingCount = 1;
return flushAt;
} else {
return 0;
}
}
};
success = true;
return ret;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(out);
}
}
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockVariableIntBlockPostingsFormat.java
Override
protected int add(int value) throws IOException {
buffer[pendingCount++] = value;
// silly variable block length int encoder: if
// first value <= 3, we write N vints at once;
// else, 2*N
final int flushAt = buffer[0] <= 3 ? baseBlockSize : 2*baseBlockSize;
// intentionally be non-causal here:
if (pendingCount == flushAt+1) {
for(int i=0;i<flushAt;i++) {
out.writeVInt(buffer[i]);
}
buffer[0] = buffer[flushAt];
pendingCount = 1;
return flushAt;
} else {
return 0;
}
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockVariableIntBlockPostingsFormat.java
Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
PostingsWriterBase postingsWriter = new SepPostingsWriter(state, new MockIntFactory(baseBlockSize));
boolean success = false;
TermsIndexWriterBase indexWriter;
try {
indexWriter = new FixedGapTermsIndexWriter(state);
success = true;
} finally {
if (!success) {
postingsWriter.close();
}
}
success = false;
try {
FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter);
success = true;
return ret;
} finally {
if (!success) {
try {
postingsWriter.close();
} finally {
indexWriter.close();
}
}
}
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockVariableIntBlockPostingsFormat.java
Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
PostingsReaderBase postingsReader = new SepPostingsReader(state.dir,
state.fieldInfos,
state.segmentInfo,
state.context,
new MockIntFactory(baseBlockSize), state.segmentSuffix);
TermsIndexReaderBase indexReader;
boolean success = false;
try {
indexReader = new FixedGapTermsIndexReader(state.dir,
state.fieldInfos,
state.segmentInfo.name,
state.termsIndexDivisor,
BytesRef.getUTF8SortedAsUnicodeComparator(),
state.segmentSuffix, state.context);
success = true;
} finally {
if (!success) {
postingsReader.close();
}
}
success = false;
try {
FieldsProducer ret = new BlockTermsReader(indexReader,
state.dir,
state.fieldInfos,
state.segmentInfo.name,
postingsReader,
state.context,
1024,
state.segmentSuffix);
success = true;
return ret;
} finally {
if (!success) {
try {
postingsReader.close();
} finally {
indexReader.close();
}
}
}
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java
Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
PostingsWriterBase docsWriter = new Lucene40PostingsWriter(state);
PostingsWriterBase pulsingWriterInner = new PulsingPostingsWriter(2, docsWriter);
PostingsWriterBase pulsingWriter = new PulsingPostingsWriter(1, pulsingWriterInner);
// Terms dict
boolean success = false;
try {
FieldsConsumer ret = new BlockTreeTermsWriter(state, pulsingWriter,
BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
success = true;
return ret;
} finally {
if (!success) {
pulsingWriter.close();
}
}
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java
Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
PostingsReaderBase docsReader = new Lucene40PostingsReader(state.dir, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix);
PostingsReaderBase pulsingReaderInner = new PulsingPostingsReader(docsReader);
PostingsReaderBase pulsingReader = new PulsingPostingsReader(pulsingReaderInner);
boolean success = false;
try {
FieldsProducer ret = new BlockTreeTermsReader(
state.dir, state.fieldInfos, state.segmentInfo.name,
pulsingReader,
state.context,
state.segmentSuffix,
state.termsIndexDivisor);
success = true;
return ret;
} finally {
if (!success) {
pulsingReader.close();
}
}
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java
Override
public long getSumDocFreq() throws IOException {
return sumDocFreq;
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java
Override
public int getDocCount() throws IOException {
return docCount;
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java
Override
public int freq() throws IOException {
return current.positions.length;
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java
Override
public int freq() throws IOException {
return current.positions.length;
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java
Override
public FieldsConsumer fieldsConsumer(SegmentWriteState writeState) throws IOException {
final int id = nextID.getAndIncrement();
// TODO -- ok to do this up front instead of
// on close....? should be ok?
// Write our ID:
final String idFileName = IndexFileNames.segmentFileName(writeState.segmentInfo.name, writeState.segmentSuffix, ID_EXTENSION);
IndexOutput out = writeState.directory.createOutput(idFileName, writeState.context);
boolean success = false;
try {
CodecUtil.writeHeader(out, RAM_ONLY_NAME, VERSION_LATEST);
out.writeVInt(id);
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(out);
} else {
IOUtils.close(out);
}
}
final RAMPostings postings = new RAMPostings();
final RAMFieldsConsumer consumer = new RAMFieldsConsumer(postings);
synchronized(state) {
state.put(id, postings);
}
return consumer;
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java
Override
public FieldsProducer fieldsProducer(SegmentReadState readState)
throws IOException {
// Load our ID:
final String idFileName = IndexFileNames.segmentFileName(readState.segmentInfo.name, readState.segmentSuffix, ID_EXTENSION);
IndexInput in = readState.dir.openInput(idFileName, readState.context);
boolean success = false;
final int id;
try {
CodecUtil.checkHeader(in, RAM_ONLY_NAME, VERSION_START, VERSION_LATEST);
id = in.readVInt();
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(in);
} else {
IOUtils.close(in);
}
}
synchronized(state) {
return state.get(id);
}
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSingleIntIndexOutput.java
Override
public void write(int v) throws IOException {
out.writeVInt(v);
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSingleIntIndexOutput.java
Override
public void close() throws IOException {
out.close();
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSingleIntIndexOutput.java
Override
public void write(IndexOutput indexOut, boolean absolute)
throws IOException {
if (absolute) {
indexOut.writeVLong(fp);
} else {
indexOut.writeVLong(fp - lastFP);
}
lastFP = fp;
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSepDocValuesFormat.java
Override
public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException {
return new SepDocValuesConsumer(state);
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSepDocValuesFormat.java
Override
public PerDocProducer docsProducer(SegmentReadState state) throws IOException {
return new SepDocValuesProducer(state);
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSingleIntFactory.java
Override
public IntIndexInput openInput(Directory dir, String fileName, IOContext context) throws IOException {
return new MockSingleIntIndexInput(dir, fileName, context);
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSingleIntFactory.java
Override
public IntIndexOutput createOutput(Directory dir, String fileName, IOContext context) throws IOException {
return new MockSingleIntIndexOutput(dir, fileName, context);
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSingleIntIndexInput.java
Override
public Reader reader() throws IOException {
return new Reader((IndexInput) in.clone());
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSingleIntIndexInput.java
Override
public void close() throws IOException {
in.close();
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSingleIntIndexInput.java
Override
public int next() throws IOException {
//System.out.println("msii.next() fp=" + in.getFilePointer() + " vs " + in.length());
return in.readVInt();
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSingleIntIndexInput.java
Override
public void read(DataInput indexIn, boolean absolute)
throws IOException {
if (absolute) {
fp = indexIn.readVLong();
} else {
fp += indexIn.readVLong();
}
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSingleIntIndexInput.java
Override
public void seek(IntIndexInput.Reader other) throws IOException {
((Reader) other).in.seek(fp);
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSepPostingsFormat.java
Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
PostingsWriterBase postingsWriter = new SepPostingsWriter(state, new MockSingleIntFactory());
boolean success = false;
TermsIndexWriterBase indexWriter;
try {
indexWriter = new FixedGapTermsIndexWriter(state);
success = true;
} finally {
if (!success) {
postingsWriter.close();
}
}
success = false;
try {
FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter);
success = true;
return ret;
} finally {
if (!success) {
try {
postingsWriter.close();
} finally {
indexWriter.close();
}
}
}
}
// in lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSepPostingsFormat.java
Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
PostingsReaderBase postingsReader = new SepPostingsReader(state.dir, state.fieldInfos, state.segmentInfo,
state.context, new MockSingleIntFactory(), state.segmentSuffix);
TermsIndexReaderBase indexReader;
boolean success = false;
try {
indexReader = new FixedGapTermsIndexReader(state.dir,
state.fieldInfos,
state.segmentInfo.name,
state.termsIndexDivisor,
BytesRef.getUTF8SortedAsUnicodeComparator(),
state.segmentSuffix, state.context);
success = true;
} finally {
if (!success) {
postingsReader.close();
}
}
success = false;
try {
FieldsProducer ret = new BlockTermsReader(indexReader,
state.dir,
state.fieldInfos,
state.segmentInfo.name,
postingsReader,
state.context,
1024,
state.segmentSuffix);
success = true;
return ret;
} finally {
if (!success) {
try {
postingsReader.close();
} finally {
indexReader.close();
}
}
}
}
// in lucene/analysis/common/src/tools/java/org/apache/lucene/analysis/standard/GenerateJflexTLDMacros.java
public void execute() throws IOException {
final SortedSet<String> TLDs = getIANARootZoneDatabase();
writeOutput(TLDs);
System.err.println("Wrote " + TLDs.size() + " top level domains to '"
+ outputFile + "'.");
}
// in lucene/analysis/common/src/tools/java/org/apache/lucene/analysis/standard/GenerateJflexTLDMacros.java
private SortedSet<String> getIANARootZoneDatabase() throws IOException {
final SortedSet<String> TLDs = new TreeSet<String>();
final URLConnection connection = tldFileURL.openConnection();
connection.setUseCaches(false);
connection.addRequestProperty("Cache-Control", "no-cache");
connection.connect();
tldFileLastModified = connection.getLastModified();
BufferedReader reader = new BufferedReader
(new InputStreamReader(connection.getInputStream(), "US-ASCII"));
try {
String line;
while (null != (line = reader.readLine())) {
Matcher matcher = TLD_PATTERN_1.matcher(line);
if (matcher.matches()) {
TLDs.add(matcher.group(1).toLowerCase(Locale.US));
} else {
matcher = TLD_PATTERN_2.matcher(line);
if (matcher.matches()) {
TLDs.add(matcher.group(1).toLowerCase(Locale.US));
}
}
}
} finally {
reader.close();
}
return TLDs;
}
// in lucene/analysis/common/src/tools/java/org/apache/lucene/analysis/standard/GenerateJflexTLDMacros.java
private void writeOutput(SortedSet<String> ASCIITLDs) throws IOException {
final DateFormat dateFormat = DateFormat.getDateTimeInstance
(DateFormat.FULL, DateFormat.FULL, Locale.US);
dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
final Writer writer = new OutputStreamWriter
(new FileOutputStream(outputFile), "UTF-8");
try {
writer.write(APACHE_LICENSE);
writer.write("// Generated from IANA Root Zone Database <");
writer.write(tldFileURL.toString());
writer.write(">");
writer.write(NL);
if (tldFileLastModified > 0L) {
writer.write("// file version from ");
writer.write(dateFormat.format(tldFileLastModified));
writer.write(NL);
}
writer.write("// generated on ");
writer.write(dateFormat.format(new Date()));
writer.write(NL);
writer.write("// by ");
writer.write(this.getClass().getName());
writer.write(NL);
writer.write(NL);
writer.write("ASCIITLD = \".\" (");
writer.write(NL);
boolean isFirst = true;
for (String ASCIITLD : ASCIITLDs) {
writer.write("\t");
if (isFirst) {
isFirst = false;
writer.write(" ");
} else {
writer.write("| ");
}
writer.write(getCaseInsensitiveRegex(ASCIITLD));
writer.write(NL);
}
writer.write("\t) \".\"? // Accept trailing root (empty) domain");
writer.write(NL);
writer.write(NL);
} finally {
writer.close();
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java
Override
public final boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (firstTokenPositioned) {
posIncrAtt.setPositionIncrement(positionIncrement);
} else {
firstTokenPositioned = true;
}
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java
Override
public void reset() throws IOException {
super.reset();
firstTokenPositioned = false;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (!keywordAttr.isKeyword()) {
final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
termAtt.setLength(newlen);
}
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (!keywordAttr.isKeyword()) {
final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
termAtt.setLength(newlen);
}
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceFilter.java
Override
public boolean incrementToken() throws IOException {
if (!input.incrementToken()) return false;
m.reset();
if (m.find()) {
// replaceAll/replaceFirst will reset() this previous find.
String transformed = all ? m.replaceAll(replacement) : m.replaceFirst(replacement);
termAtt.setEmpty().append(transformed);
}
return true;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java
Override
public boolean incrementToken() throws IOException {
if (index >= str.length()) return false;
clearAttributes();
if (group >= 0) {
// match a specific group
while (matcher.find()) {
index = matcher.start(group);
final int endIndex = matcher.end(group);
if (index == endIndex) continue;
termAtt.setEmpty().append(str, index, endIndex);
offsetAtt.setOffset(correctOffset(index), correctOffset(endIndex));
return true;
}
index = Integer.MAX_VALUE; // mark exhausted
return false;
} else {
// String.split() functionality
while (matcher.find()) {
if (matcher.start() - index > 0) {
// found a non-zero-length token
termAtt.setEmpty().append(str, index, matcher.start());
offsetAtt.setOffset(correctOffset(index), correctOffset(matcher.start()));
index = matcher.end();
return true;
}
index = matcher.end();
}
if (str.length() - index == 0) {
index = Integer.MAX_VALUE; // mark exhausted
return false;
}
termAtt.setEmpty().append(str, index, str.length());
offsetAtt.setOffset(correctOffset(index), correctOffset(str.length()));
index = Integer.MAX_VALUE; // mark exhausted
return true;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java
Override
public void end() throws IOException {
final int ofs = correctOffset(str.length());
offsetAtt.setOffset(ofs, ofs);
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java
Override
public void reset(Reader input) throws IOException {
super.reset(input);
fillBuffer(str, input);
matcher.reset(str);
index = 0;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java
private void fillBuffer(StringBuilder sb, Reader input) throws IOException {
int len;
sb.setLength(0);
while ((len = input.read(buffer)) > 0) {
sb.append(buffer, 0, len);
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceCharFilter.java
Override
public int read(char[] cbuf, int off, int len) throws IOException {
// Buffer all input on the first call.
if (transformedInput == null) {
StringBuilder buffered = new StringBuilder();
char [] temp = new char [1024];
for (int cnt = input.read(temp); cnt > 0; cnt = input.read(temp)) {
buffered.append(temp, 0, cnt);
}
transformedInput = new StringReader(processPattern(buffered).toString());
}
return transformedInput.read(cbuf, off, len);
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java
private static Step parseStep(LineNumberReader r, String header) throws IOException {
Matcher matcher = headerPattern.matcher(header);
if (!matcher.find()) {
throw new RuntimeException("Illegal Step header specified at line " + r.getLineNumber());
}
assert matcher.groupCount() == 4;
String name = matcher.group(1);
int min = Integer.parseInt(matcher.group(2));
int type = Integer.parseInt(matcher.group(3));
String suffixes[] = parseList(matcher.group(4));
Rule rules[] = parseRules(r, type);
return new Step(name, rules, min, suffixes);
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java
private static Rule[] parseRules(LineNumberReader r, int type) throws IOException {
List<Rule> rules = new ArrayList<Rule>();
String line;
while ((line = readLine(r)) != null) {
Matcher matcher = stripPattern.matcher(line);
if (matcher.matches()) {
rules.add(new Rule(matcher.group(1), Integer.parseInt(matcher.group(2)), ""));
} else {
matcher = repPattern.matcher(line);
if (matcher.matches()) {
rules.add(new Rule(matcher.group(1), Integer.parseInt(matcher.group(2)), matcher.group(3)));
} else {
matcher = excPattern.matcher(line);
if (matcher.matches()) {
if (type == 0) {
rules.add(new RuleWithSuffixExceptions(matcher.group(1),
Integer.parseInt(matcher.group(2)),
matcher.group(3),
parseList(matcher.group(4))));
} else {
rules.add(new RuleWithSetExceptions(matcher.group(1),
Integer.parseInt(matcher.group(2)),
matcher.group(3),
parseList(matcher.group(4))));
}
} else {
throw new RuntimeException("Illegal Step rule specified at line " + r.getLineNumber());
}
}
}
if (line.endsWith(";"))
return rules.toArray(new Rule[rules.size()]);
}
return null;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java
private static String readLine(LineNumberReader r) throws IOException {
String line = null;
while ((line = r.readLine()) != null) {
line = line.trim();
if (line.length() > 0 && line.charAt(0) != '#')
return line;
}
return line;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (!keywordAttr.isKeyword()) {
// this stemmer increases word length by 1: worst case '*ã' -> '*ão'
final int len = termAtt.length();
final int newlen = stemmer.stem(termAtt.resizeBuffer(len+1), len);
termAtt.setLength(newlen);
}
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (!keywordAttr.isKeyword()) {
final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
termAtt.setLength(newlen);
}
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (!keywordAttr.isKeyword()) {
final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
termAtt.setLength(newlen);
}
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java
Override
public boolean incrementToken() throws IOException {
// get the next piece of input
if (savedState != null) {
restoreState(savedState);
savedState = null;
saveTermBuffer();
return true;
} else if (!input.incrementToken()) {
return false;
}
/* We build n-grams before and after stopwords.
* When valid, the buffer always contains at least the separator.
* If its empty, there is nothing before this stopword.
*/
if (lastWasCommon || (isCommon() && buffer.length() > 0)) {
savedState = captureState();
gramToken();
return true;
}
saveTermBuffer();
return true;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java
Override
public void reset() throws IOException {
super.reset();
lastWasCommon = false;
savedState = null;
buffer.setLength(0);
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilter.java
Override
public void reset() throws IOException {
super.reset();
previous = null;
previousType = null;
exhausted = false;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilter.java
Override
public boolean incrementToken() throws IOException {
while (!exhausted && input.incrementToken()) {
State current = captureState();
if (previous != null && !isGramType()) {
restoreState(previous);
previous = current;
previousType = typeAttribute.type();
if (isGramType()) {
posIncAttribute.setPositionIncrement(1);
}
return true;
}
previous = current;
}
exhausted = true;
if (previous == null || GRAM_TYPE.equals(previousType)) {
return false;
}
restoreState(previous);
previous = null;
if (isGramType()) {
posIncAttribute.setPositionIncrement(1);
}
return true;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
int len = termAtt.length();
if (marker != NOMARKER) {
len++;
termAtt.resizeBuffer(len);
termAtt.buffer()[len - 1] = marker;
}
reverse( matchVersion, termAtt.buffer(), 0, len );
termAtt.setLength(len);
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechStemFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if(!keywordAttr.isKeyword()) {
final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
termAtt.setLength(newlen);
}
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if(!keywordAttr.isKeyword()) {
final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
termAtt.setLength(newlen);
}
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekLowerCaseFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
char[] chArray = termAtt.buffer();
int chLen = termAtt.length();
for (int i = 0; i < chLen;) {
i += Character.toChars(
lowerCase(charUtils.codePointAt(chArray, i)), chArray, i);
}
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianLightStemFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (!keywordAttr.isKeyword()) {
final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
termAtt.setLength(newlen);
}
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianMinimalStemFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (!keywordAttr.isKeyword()) {
final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
termAtt.setLength(newlen);
}
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java
Override
public final boolean incrementToken() throws IOException {
clearAttributes();
termAtt.append( resultToken );
if(resultToken.length() == 0){
posAtt.setPositionIncrement(1);
}
else{
posAtt.setPositionIncrement(0);
}
int length = 0;
boolean added = false;
if( endDelimiter ){
termAtt.append(replacement);
length++;
endDelimiter = false;
added = true;
}
while (true) {
int c = input.read();
if (c >= 0) {
charsRead++;
} else {
if( skipped > skip ) {
length += resultToken.length();
termAtt.setLength(length);
offsetAtt.setOffset(correctOffset(startPosition), correctOffset(startPosition + length));
if( added ){
resultToken.setLength(0);
resultToken.append(termAtt.buffer(), 0, length);
}
return added;
}
else{
return false;
}
}
if( !added ){
added = true;
skipped++;
if( skipped > skip ){
termAtt.append(c == delimiter ? replacement : (char)c);
length++;
}
else {
startPosition++;
}
}
else {
if( c == delimiter ){
if( skipped > skip ){
endDelimiter = true;
break;
}
skipped++;
if( skipped > skip ){
termAtt.append(replacement);
length++;
}
else {
startPosition++;
}
}
else {
if( skipped > skip ){
termAtt.append((char)c);
length++;
}
else {
startPosition++;
}
}
}
}
length += resultToken.length();
termAtt.setLength(length);
offsetAtt.setOffset(correctOffset(startPosition), correctOffset(startPosition+length));
resultToken.setLength(0);
resultToken.append(termAtt.buffer(), 0, length);
return true;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java
Override
public void reset() throws IOException {
super.reset();
resultToken.setLength(0);
charsRead = 0;
endDelimiter = false;
skipped = 0;
startPosition = 0;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java
Override
public final boolean incrementToken() throws IOException {
clearAttributes();
if(delimitersCount == -1){
int length = 0;
delimiterPositions.add(0);
while (true) {
int c = input.read();
if( c < 0 ) {
break;
}
length++;
if( c == delimiter ) {
delimiterPositions.add(length);
resultToken.append(replacement);
}
else{
resultToken.append((char)c);
}
}
delimitersCount = delimiterPositions.size();
if( delimiterPositions.get(delimitersCount-1) < length ){
delimiterPositions.add(length);
delimitersCount++;
}
if( resultTokenBuffer.length < resultToken.length() ){
resultTokenBuffer = new char[resultToken.length()];
}
resultToken.getChars(0, resultToken.length(), resultTokenBuffer, 0);
resultToken.setLength(0);
int idx = delimitersCount-1 - skip;
if (idx >= 0) {
// otherwise its ok, because we will skip and return false
endPosition = delimiterPositions.get(idx);
}
finalOffset = correctOffset(length);
posAtt.setPositionIncrement(1);
}
else{
posAtt.setPositionIncrement(0);
}
while( skipped < delimitersCount-skip-1 ){
int start = delimiterPositions.get(skipped);
termAtt.copyBuffer(resultTokenBuffer, start, endPosition - start);
offsetAtt.setOffset(correctOffset(start), correctOffset(endPosition));
skipped++;
return true;
}
return false;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java
Override
public void reset() throws IOException {
super.reset();
resultToken.setLength(0);
finalOffset = 0;
endPosition = 0;
skipped = 0;
delimitersCount = -1;
delimiterPositions.clear();
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/CharFilter.java
Override
public void close() throws IOException {
input.close();
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/CharFilter.java
Override
public int read(char[] cbuf, int off, int len) throws IOException {
return input.read(cbuf, off, len);
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/CharFilter.java
Override
public void mark( int readAheadLimit ) throws IOException {
input.mark(readAheadLimit);
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/CharFilter.java
Override
public void reset() throws IOException {
input.reset();
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilter.java
Override
public void reset() throws IOException {
super.reset();
buffer.reset(input);
replacement = null;
inputOff = 0;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilter.java
Override
public int read() throws IOException {
//System.out.println("\nread");
while(true) {
if (replacement != null && replacementPointer < replacement.length) {
//System.out.println(" return repl[" + replacementPointer + "]=" + replacement.chars[replacement.offset + replacementPointer]);
return replacement.chars[replacement.offset + replacementPointer++];
}
// TODO: a more efficient approach would be Aho/Corasick's
// algorithm
// (http://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_string_matching_algorithm)
// or this generalizatio: www.cis.uni-muenchen.de/people/Schulz/Pub/dictle5.ps
//
// I think this would be (almost?) equivalent to 1) adding
// epsilon arcs from all final nodes back to the init
// node in the FST, 2) adding a .* (skip any char)
// loop on the initial node, and 3) determinizing
// that. Then we would not have to restart matching
// at each position.
int lastMatchLen = -1;
CharsRef lastMatch = null;
final int firstCH = buffer.get(inputOff);
if (firstCH != -1) {
FST.Arc<CharsRef> arc = cachedRootArcs.get(Character.valueOf((char) firstCH));
if (arc != null) {
if (!FST.targetHasArcs(arc)) {
// Fast pass for single character match:
assert arc.isFinal();
lastMatchLen = 1;
lastMatch = arc.output;
} else {
int lookahead = 0;
CharsRef output = arc.output;
while (true) {
lookahead++;
if (arc.isFinal()) {
// Match! (to node is final)
lastMatchLen = lookahead;
lastMatch = outputs.add(output, arc.nextFinalOutput);
// Greedy: keep searching to see if there's a
// longer match...
}
if (!FST.targetHasArcs(arc)) {
break;
}
int ch = buffer.get(inputOff + lookahead);
if (ch == -1) {
break;
}
if ((arc = map.findTargetArc(ch, arc, scratchArc, fstReader)) == null) {
// Dead end
break;
}
output = outputs.add(output, arc.output);
}
}
}
}
if (lastMatch != null) {
inputOff += lastMatchLen;
//System.out.println(" match! len=" + lastMatchLen + " repl=" + lastMatch);
final int diff = lastMatchLen - lastMatch.length;
if (diff != 0) {
final int prevCumulativeDiff = getLastCumulativeDiff();
if (diff > 0) {
// Replacement is shorter than matched input:
addOffCorrectMap(inputOff - diff - prevCumulativeDiff, prevCumulativeDiff + diff);
} else {
// Replacement is longer than matched input: remap
// the "extra" chars all back to the same input
// offset:
final int outputStart = inputOff - prevCumulativeDiff;
for(int extraIDX=0;extraIDX<-diff;extraIDX++) {
addOffCorrectMap(outputStart + extraIDX, prevCumulativeDiff - extraIDX - 1);
}
}
}
replacement = lastMatch;
replacementPointer = 0;
} else {
final int ret = buffer.get(inputOff);
if (ret != -1) {
inputOff++;
buffer.freeBefore(inputOff);
}
return ret;
}
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilter.java
Override
public int read(char[] cbuf, int off, int len) throws IOException {
int numRead = 0;
for(int i = off; i < off + len; i++) {
int c = read();
if (c == -1) break;
cbuf[i] = (char) c;
numRead++;
}
return numRead == 0 ? -1 : numRead;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
Override
public int read() throws IOException {
if (outputSegment.isRead()) {
if (zzAtEOF) {
return -1;
}
int ch = nextChar();
++outputCharCount;
return ch;
}
int ch = outputSegment.nextChar();
++outputCharCount;
return ch;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
Override
public int read(char cbuf[], int off, int len) throws IOException {
int i = 0;
for ( ; i < len ; ++i) {
int ch = read();
if (ch == -1) break;
cbuf[off++] = (char)ch;
}
return i > 0 ? i : (len == 0 ? 0 : -1);
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
Override
public void close() throws IOException {
yyclose();
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
private boolean zzRefill() throws java.io.IOException {
/* first: make room (if you can) */
if (zzStartRead > 0) {
System.arraycopy(zzBuffer, zzStartRead,
zzBuffer, 0,
zzEndRead-zzStartRead);
/* translate stored positions */
zzEndRead-= zzStartRead;
zzCurrentPos-= zzStartRead;
zzMarkedPos-= zzStartRead;
zzStartRead = 0;
}
/* is the buffer big enough? */
if (zzCurrentPos >= zzBuffer.length) {
/* if not: blow it up */
char newBuffer[] = new char[zzCurrentPos*2];
System.arraycopy(zzBuffer, 0, newBuffer, 0, zzBuffer.length);
zzBuffer = newBuffer;
}
/* finally: fill the buffer with new input */
int numRead = zzReader.read(zzBuffer, zzEndRead,
zzBuffer.length-zzEndRead);
if (numRead > 0) {
zzEndRead+= numRead;
return false;
}
// unlikely but not impossible: read 0 characters, but not at end of stream
if (numRead == 0) {
int c = zzReader.read();
if (c == -1) {
return true;
} else {
zzBuffer[zzEndRead++] = (char) c;
return false;
}
}
// numRead < 0
return true;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
private final void yyclose() throws java.io.IOException {
zzAtEOF = true; /* indicate end of file */
zzEndRead = zzStartRead; /* invalidate buffer */
if (zzReader != null)
zzReader.close();
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
private int nextChar() throws java.io.IOException {
int zzInput;
int zzAction;
// cached fields:
int zzCurrentPosL;
int zzMarkedPosL;
int zzEndReadL = zzEndRead;
char [] zzBufferL = zzBuffer;
char [] zzCMapL = ZZ_CMAP;
int [] zzTransL = ZZ_TRANS;
int [] zzRowMapL = ZZ_ROWMAP;
int [] zzAttrL = ZZ_ATTRIBUTE;
while (true) {
zzMarkedPosL = zzMarkedPos;
yychar+= zzMarkedPosL-zzStartRead;
zzAction = -1;
zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
zzState = ZZ_LEXSTATE[zzLexicalState];
// set up zzAction for empty match case:
int zzAttributes = zzAttrL[zzState];
if ( (zzAttributes & 1) == 1 ) {
zzAction = zzState;
}
zzForAction: {
while (true) {
if (zzCurrentPosL < zzEndReadL)
zzInput = zzBufferL[zzCurrentPosL++];
else if (zzAtEOF) {
zzInput = YYEOF;
break zzForAction;
}
else {
// store back cached positions
zzCurrentPos = zzCurrentPosL;
zzMarkedPos = zzMarkedPosL;
boolean eof = zzRefill();
// get translated positions and possibly new buffer
zzCurrentPosL = zzCurrentPos;
zzMarkedPosL = zzMarkedPos;
zzBufferL = zzBuffer;
zzEndReadL = zzEndRead;
if (eof) {
zzInput = YYEOF;
break zzForAction;
}
else {
zzInput = zzBufferL[zzCurrentPosL++];
}
}
int zzNext = zzTransL[ zzRowMapL[zzState] + zzCMapL[zzInput] ];
if (zzNext == -1) break zzForAction;
zzState = zzNext;
zzAttributes = zzAttrL[zzState];
if ( (zzAttributes & 1) == 1 ) {
zzAction = zzState;
zzMarkedPosL = zzCurrentPosL;
if ( (zzAttributes & 8) == 8 ) break zzForAction;
}
}
}
// store back cached position
zzMarkedPos = zzMarkedPosL;
switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
case 15:
{
}
case 54: break;
case 39:
{ yybegin(STYLE);
}
case 55: break;
case 27:
{ // add (previously matched input length) + (this match length) - (substitution length)
cumulativeDiff += inputSegment.length() + yylength() - 1;
// position the correction at (already output length) + (substitution length)
addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
inputSegment.clear();
yybegin(YYINITIAL);
return BLOCK_LEVEL_START_TAG_REPLACEMENT;
}
case 56: break;
case 30:
{ int length = yylength();
inputSegment.write(zzBuffer, zzStartRead, length);
entitySegment.clear();
char ch = entityValues.get(zzBuffer, zzStartRead, length).charValue();
entitySegment.append(ch);
outputSegment = entitySegment;
yybegin(CHARACTER_REFERENCE_TAIL);
}
case 57: break;
case 48:
{ inputSegment.clear();
yybegin(YYINITIAL);
// add (previously matched input length) -- current match and substitution handled below
cumulativeDiff += yychar - inputStart;
// position the offset correction at (already output length) -- substitution handled below
int offsetCorrectionPos = outputCharCount;
int returnValue;
if (escapeSTYLE) {
inputSegment.write(zzBuffer, zzStartRead, yylength());
outputSegment = inputSegment;
returnValue = outputSegment.nextChar();
} else {
// add (this match length) - (substitution length)
cumulativeDiff += yylength() - 1;
// add (substitution length)
++offsetCorrectionPos;
returnValue = STYLE_REPLACEMENT;
}
addOffCorrectMap(offsetCorrectionPos, cumulativeDiff);
return returnValue;
}
case 58: break;
case 8:
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
if (null != escapedTags
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
yybegin(START_TAG_TAIL_INCLUDE);
} else {
yybegin(START_TAG_TAIL_SUBSTITUTE);
}
}
case 59: break;
case 2:
{ inputStart = yychar;
inputSegment.clear();
inputSegment.append('<');
yybegin(LEFT_ANGLE_BRACKET);
}
case 60: break;
case 44:
{ restoreState = STYLE_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
}
case 61: break;
case 21:
{ previousRestoreState = restoreState;
restoreState = SERVER_SIDE_INCLUDE;
yybegin(SINGLE_QUOTED_STRING);
}
case 62: break;
case 11:
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
yybegin(LEFT_ANGLE_BRACKET_SPACE);
}
case 63: break;
case 35:
{ yybegin(SCRIPT);
}
case 64: break;
case 42:
{ restoreState = COMMENT; yybegin(SERVER_SIDE_INCLUDE);
}
case 65: break;
case 10:
{ inputSegment.append('!'); yybegin(BANG);
}
case 66: break;
case 51:
{ // Handle paired UTF-16 surrogates.
String surrogatePair = yytext();
char highSurrogate = '\u0000';
char lowSurrogate = '\u0000';
try {
highSurrogate = (char)Integer.parseInt(surrogatePair.substring(2, 6), 16);
} catch(Exception e) { // should never happen
assert false: "Exception parsing high surrogate '"
+ surrogatePair.substring(2, 6) + "'";
}
try { // Low surrogates are in decimal range [56320, 57343]
lowSurrogate = (char)Integer.parseInt(surrogatePair.substring(9, 14));
} catch(Exception e) { // should never happen
assert false: "Exception parsing low surrogate '"
+ surrogatePair.substring(9, 14) + "'";
}
if (Character.isLowSurrogate(lowSurrogate)) {
outputSegment = entitySegment;
outputSegment.clear();
outputSegment.unsafeWrite(lowSurrogate);
// add (previously matched input length) + (this match length) - (substitution length)
cumulativeDiff += inputSegment.length() + yylength() - 2;
// position the correction at (already output length) + (substitution length)
addOffCorrectMap(outputCharCount + 2, cumulativeDiff);
inputSegment.clear();
yybegin(YYINITIAL);
return highSurrogate;
}
yypushback(surrogatePair.length() - 1); // Consume only '#'
inputSegment.append('#');
yybegin(NUMERIC_CHARACTER);
}
case 67: break;
case 4:
{ yypushback(1);
outputSegment = inputSegment;
outputSegment.restart();
yybegin(YYINITIAL);
return outputSegment.nextChar();
}
case 68: break;
case 43:
{ restoreState = SCRIPT_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
}
case 69: break;
case 52:
{ // Handle paired UTF-16 surrogates.
String surrogatePair = yytext();
char highSurrogate = '\u0000';
try { // High surrogates are in decimal range [55296, 56319]
highSurrogate = (char)Integer.parseInt(surrogatePair.substring(1, 6));
} catch(Exception e) { // should never happen
assert false: "Exception parsing high surrogate '"
+ surrogatePair.substring(1, 6) + "'";
}
if (Character.isHighSurrogate(highSurrogate)) {
outputSegment = entitySegment;
outputSegment.clear();
try {
outputSegment.unsafeWrite
((char)Integer.parseInt(surrogatePair.substring(10, 14), 16));
} catch(Exception e) { // should never happen
assert false: "Exception parsing low surrogate '"
+ surrogatePair.substring(10, 14) + "'";
}
// add (previously matched input length) + (this match length) - (substitution length)
cumulativeDiff += inputSegment.length() + yylength() - 2;
// position the correction at (already output length) + (substitution length)
addOffCorrectMap(outputCharCount + 2, cumulativeDiff);
inputSegment.clear();
yybegin(YYINITIAL);
return highSurrogate;
}
yypushback(surrogatePair.length() - 1); // Consume only '#'
inputSegment.append('#');
yybegin(NUMERIC_CHARACTER);
}
case 70: break;
case 28:
{ restoreState = STYLE_COMMENT; yybegin(SINGLE_QUOTED_STRING);
}
case 71: break;
case 50:
{ // Handle paired UTF-16 surrogates.
outputSegment = entitySegment;
outputSegment.clear();
String surrogatePair = yytext();
char highSurrogate = '\u0000';
try {
highSurrogate = (char)Integer.parseInt(surrogatePair.substring(2, 6), 16);
} catch(Exception e) { // should never happen
assert false: "Exception parsing high surrogate '"
+ surrogatePair.substring(2, 6) + "'";
}
try {
outputSegment.unsafeWrite
((char)Integer.parseInt(surrogatePair.substring(10, 14), 16));
} catch(Exception e) { // should never happen
assert false: "Exception parsing low surrogate '"
+ surrogatePair.substring(10, 14) + "'";
}
// add (previously matched input length) + (this match length) - (substitution length)
cumulativeDiff += inputSegment.length() + yylength() - 2;
// position the correction at (already output length) + (substitution length)
addOffCorrectMap(outputCharCount + 2, cumulativeDiff);
inputSegment.clear();
yybegin(YYINITIAL);
return highSurrogate;
}
case 72: break;
case 16:
{ restoreState = SCRIPT_COMMENT; yybegin(SINGLE_QUOTED_STRING);
}
case 73: break;
case 22:
{ previousRestoreState = restoreState;
restoreState = SERVER_SIDE_INCLUDE;
yybegin(DOUBLE_QUOTED_STRING);
}
case 74: break;
case 26:
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
cumulativeDiff += inputSegment.length() + yylength();
// position the correction at (already output length) [ + (substitution length) = 0 ]
addOffCorrectMap(outputCharCount, cumulativeDiff);
inputSegment.clear();
outputSegment = inputSegment;
yybegin(YYINITIAL);
}
case 75: break;
case 20:
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
}
case 76: break;
case 47:
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
cumulativeDiff += inputSegment.length() + yylength();
// position the correction at (already output length) [ + (substitution length) = 0 ]
addOffCorrectMap(outputCharCount, cumulativeDiff);
inputSegment.clear();
yybegin(CDATA);
}
case 77: break;
case 33:
{ yybegin(YYINITIAL);
if (escapeBR) {
inputSegment.write(zzBuffer, zzStartRead, yylength());
outputSegment = inputSegment;
return outputSegment.nextChar();
} else {
// add (previously matched input length) + (this match length) - (substitution length)
cumulativeDiff += inputSegment.length() + yylength() - 1;
// position the correction at (already output length) + (substitution length)
addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
inputSegment.reset();
return BR_START_TAG_REPLACEMENT;
}
}
case 78: break;
case 23:
{ yybegin(restoreState); restoreState = previousRestoreState;
}
case 79: break;
case 32:
{ yybegin(COMMENT);
}
case 80: break;
case 24:
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
outputSegment = inputSegment;
yybegin(YYINITIAL);
return outputSegment.nextChar();
}
case 81: break;
case 3:
{ inputStart = yychar;
inputSegment.clear();
inputSegment.append('&');
yybegin(AMPERSAND);
}
case 82: break;
case 46:
{ yybegin(SCRIPT);
if (escapeSCRIPT) {
inputSegment.write(zzBuffer, zzStartRead, yylength());
outputSegment = inputSegment;
inputStart += 1 + yylength();
return outputSegment.nextChar();
}
}
case 83: break;
case 14:
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
cumulativeDiff += inputSegment.length() + yylength();
// position the correction at (already output length) [ + (substitution length) = 0 ]
addOffCorrectMap(outputCharCount, cumulativeDiff);
inputSegment.clear();
yybegin(YYINITIAL);
}
case 84: break;
case 6:
{ int matchLength = yylength();
inputSegment.write(zzBuffer, zzStartRead, matchLength);
if (matchLength <= 7) { // 0x10FFFF = 1114111: max 7 decimal chars
String decimalCharRef = yytext();
int codePoint = 0;
try {
codePoint = Integer.parseInt(decimalCharRef);
} catch(Exception e) {
assert false: "Exception parsing code point '" + decimalCharRef + "'";
}
if (codePoint <= 0x10FFFF) {
outputSegment = entitySegment;
outputSegment.clear();
if (codePoint >= Character.MIN_SURROGATE
&& codePoint <= Character.MAX_SURROGATE) {
outputSegment.unsafeWrite(REPLACEMENT_CHARACTER);
} else {
outputSegment.setLength
(Character.toChars(codePoint, outputSegment.getArray(), 0));
}
yybegin(CHARACTER_REFERENCE_TAIL);
} else {
outputSegment = inputSegment;
yybegin(YYINITIAL);
return outputSegment.nextChar();
}
} else {
outputSegment = inputSegment;
yybegin(YYINITIAL);
return outputSegment.nextChar();
}
}
case 85: break;
case 34:
{ // add (previously matched input length) + (this match length) [ - (substitution length) = 0]
cumulativeDiff += yychar - inputStart + yylength();
// position the correction at (already output length) [ + (substitution length) = 0]
addOffCorrectMap(outputCharCount, cumulativeDiff);
inputSegment.clear();
yybegin(YYINITIAL);
}
case 86: break;
case 5:
{ inputSegment.append('#'); yybegin(NUMERIC_CHARACTER);
}
case 87: break;
case 13:
{ inputSegment.append(zzBuffer[zzStartRead]);
}
case 88: break;
case 18:
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
if (null != escapedTags
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
yybegin(END_TAG_TAIL_INCLUDE);
} else {
yybegin(END_TAG_TAIL_SUBSTITUTE);
}
}
case 89: break;
case 40:
{ yybegin(SCRIPT_COMMENT);
}
case 90: break;
case 37:
{ // add (this match length) [ - (substitution length) = 0 ]
cumulativeDiff += yylength();
// position the correction at (already output length) [ + (substitution length) = 0 ]
addOffCorrectMap(outputCharCount, cumulativeDiff);
yybegin(YYINITIAL);
}
case 91: break;
case 12:
{ inputSegment.append('/'); yybegin(LEFT_ANGLE_BRACKET_SLASH);
}
case 92: break;
case 9:
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
if (null != escapedTags
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
yybegin(START_TAG_TAIL_INCLUDE);
} else {
yybegin(START_TAG_TAIL_EXCLUDE);
}
}
case 93: break;
case 49:
{ inputSegment.clear();
yybegin(YYINITIAL);
// add (previously matched input length) -- current match and substitution handled below
cumulativeDiff += yychar - inputStart;
// position at (already output length) -- substitution handled below
int offsetCorrectionPos = outputCharCount;
int returnValue;
if (escapeSCRIPT) {
inputSegment.write(zzBuffer, zzStartRead, yylength());
outputSegment = inputSegment;
returnValue = outputSegment.nextChar();
} else {
// add (this match length) - (substitution length)
cumulativeDiff += yylength() - 1;
// add (substitution length)
++offsetCorrectionPos;
returnValue = SCRIPT_REPLACEMENT;
}
addOffCorrectMap(offsetCorrectionPos, cumulativeDiff);
return returnValue;
}
case 94: break;
case 29:
{ restoreState = STYLE_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
}
case 95: break;
case 17:
{ restoreState = SCRIPT_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
}
case 96: break;
case 45:
{ yybegin(STYLE);
if (escapeSTYLE) {
inputSegment.write(zzBuffer, zzStartRead, yylength());
outputSegment = inputSegment;
inputStart += 1 + yylength();
return outputSegment.nextChar();
}
}
case 97: break;
case 7:
{ // add (previously matched input length) + (this match length) - (substitution length)
cumulativeDiff += inputSegment.length() + yylength() - outputSegment.length();
// position the correction at (already output length) + (substitution length)
addOffCorrectMap(outputCharCount + outputSegment.length(), cumulativeDiff);
yybegin(YYINITIAL);
return outputSegment.nextChar();
}
case 98: break;
case 19:
{ inputSegment.write(zzBuffer, zzStartRead, yylength());
if (null != escapedTags
&& escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
yybegin(END_TAG_TAIL_INCLUDE);
} else {
yybegin(END_TAG_TAIL_EXCLUDE);
}
}
case 99: break;
case 25:
{ // add (previously matched input length) + (this match length) - (substitution length)
cumulativeDiff += inputSegment.length() + yylength() - 1;
// position the correction at (already output length) + (substitution length)
addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
inputSegment.clear();
yybegin(YYINITIAL);
return BLOCK_LEVEL_END_TAG_REPLACEMENT;
}
case 100: break;
case 31:
{ int matchLength = yylength();
inputSegment.write(zzBuffer, zzStartRead, matchLength);
if (matchLength <= 6) { // 10FFFF: max 6 hex chars
String hexCharRef
= new String(zzBuffer, zzStartRead + 1, matchLength - 1);
int codePoint = 0;
try {
codePoint = Integer.parseInt(hexCharRef, 16);
} catch(Exception e) {
assert false: "Exception parsing hex code point '" + hexCharRef + "'";
}
if (codePoint <= 0x10FFFF) {
outputSegment = entitySegment;
outputSegment.clear();
if (codePoint >= Character.MIN_SURROGATE
&& codePoint <= Character.MAX_SURROGATE) {
outputSegment.unsafeWrite(REPLACEMENT_CHARACTER);
} else {
outputSegment.setLength
(Character.toChars(codePoint, outputSegment.getArray(), 0));
}
yybegin(CHARACTER_REFERENCE_TAIL);
} else {
outputSegment = inputSegment;
yybegin(YYINITIAL);
return outputSegment.nextChar();
}
} else {
outputSegment = inputSegment;
yybegin(YYINITIAL);
return outputSegment.nextChar();
}
}
case 101: break;
case 53:
{ // Handle paired UTF-16 surrogates.
String surrogatePair = yytext();
char highSurrogate = '\u0000';
try { // High surrogates are in decimal range [55296, 56319]
highSurrogate = (char)Integer.parseInt(surrogatePair.substring(1, 6));
} catch(Exception e) { // should never happen
assert false: "Exception parsing high surrogate '"
+ surrogatePair.substring(1, 6) + "'";
}
if (Character.isHighSurrogate(highSurrogate)) {
char lowSurrogate = '\u0000';
try { // Low surrogates are in decimal range [56320, 57343]
lowSurrogate = (char)Integer.parseInt(surrogatePair.substring(9, 14));
} catch(Exception e) { // should never happen
assert false: "Exception parsing low surrogate '"
+ surrogatePair.substring(9, 14) + "'";
}
if (Character.isLowSurrogate(lowSurrogate)) {
outputSegment = entitySegment;
outputSegment.clear();
outputSegment.unsafeWrite(lowSurrogate);
// add (previously matched input length) + (this match length) - (substitution length)
cumulativeDiff += inputSegment.length() + yylength() - 2;
// position the correction at (already output length) + (substitution length)
addOffCorrectMap(outputCharCount + 2, cumulativeDiff);
inputSegment.clear();
yybegin(YYINITIAL);
return highSurrogate;
}
}
yypushback(surrogatePair.length() - 1); // Consume only '#'
inputSegment.append('#');
yybegin(NUMERIC_CHARACTER);
}
case 102: break;
case 36:
{ yybegin(YYINITIAL);
if (escapeBR) {
inputSegment.write(zzBuffer, zzStartRead, yylength());
outputSegment = inputSegment;
return outputSegment.nextChar();
} else {
// add (previously matched input length) + (this match length) - (substitution length)
cumulativeDiff += inputSegment.length() + yylength() - 1;
// position the correction at (already output length) + (substitution length)
addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
inputSegment.reset();
return BR_END_TAG_REPLACEMENT;
}
}
case 103: break;
case 38:
{ yybegin(restoreState);
}
case 104: break;
case 41:
{ yybegin(STYLE_COMMENT);
}
case 105: break;
case 1:
{ return zzBuffer[zzStartRead];
}
case 106: break;
default:
if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
zzAtEOF = true;
zzDoEOF();
{ return eofReturnValue;
}
}
else {
zzScanError(ZZ_NO_MATCH);
}
}
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilter.java
Override
public final boolean incrementToken() throws IOException {
boolean iOrAfter = false;
if (input.incrementToken()) {
final char[] buffer = termAtt.buffer();
int length = termAtt.length();
for (int i = 0; i < length;) {
final int ch = Character.codePointAt(buffer, i);
iOrAfter = (ch == LATIN_CAPITAL_LETTER_I ||
(iOrAfter && Character.getType(ch) == Character.NON_SPACING_MARK));
if (iOrAfter) { // all the special I turkish handling happens here.
switch(ch) {
// remove COMBINING_DOT_ABOVE to mimic composed lowercase
case COMBINING_DOT_ABOVE:
length = delete(buffer, i, length);
continue;
// i itself, it depends if it is followed by COMBINING_DOT_ABOVE
// if it is, we will make it small i and later remove the dot
case LATIN_CAPITAL_LETTER_I:
if (isBeforeDot(buffer, i + 1, length)) {
buffer[i] = LATIN_SMALL_LETTER_I;
} else {
buffer[i] = LATIN_SMALL_LETTER_DOTLESS_I;
// below is an optimization. no COMBINING_DOT_ABOVE follows,
// so don't waste time calculating Character.getType(), etc
iOrAfter = false;
}
i++;
continue;
}
}
i += Character.toChars(Character.toLowerCase(ch), buffer, i);
}
termAtt.setLength(length);
return true;
} else
return false;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/ASCIIFoldingFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
final char[] buffer = termAtt.buffer();
final int length = termAtt.length();
// If no characters actually require rewriting then we
// just return token as-is:
for(int i = 0 ; i < length ; ++i) {
final char c = buffer[i];
if (c >= '\u0080')
{
foldToASCII(buffer, length);
termAtt.copyBuffer(output, 0, outputPos);
break;
}
}
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java
public boolean incrementToken() throws IOException {
while (true) {
if (!hasSavedState) {
// process a new input word
if (!input.incrementToken()) {
return false;
}
int termLength = termAttribute.length();
char[] termBuffer = termAttribute.buffer();
accumPosInc += posIncAttribute.getPositionIncrement();
iterator.setText(termBuffer, termLength);
iterator.next();
// word of no delimiters, or protected word: just return it
if ((iterator.current == 0 && iterator.end == termLength) ||
(protWords != null && protWords.contains(termBuffer, 0, termLength))) {
posIncAttribute.setPositionIncrement(accumPosInc);
accumPosInc = 0;
return true;
}
// word of simply delimiters
if (iterator.end == WordDelimiterIterator.DONE && !has(PRESERVE_ORIGINAL)) {
// if the posInc is 1, simply ignore it in the accumulation
if (posIncAttribute.getPositionIncrement() == 1) {
accumPosInc--;
}
continue;
}
saveState();
hasOutputToken = false;
hasOutputFollowingOriginal = !has(PRESERVE_ORIGINAL);
lastConcatCount = 0;
if (has(PRESERVE_ORIGINAL)) {
posIncAttribute.setPositionIncrement(accumPosInc);
accumPosInc = 0;
return true;
}
}
// at the end of the string, output any concatenations
if (iterator.end == WordDelimiterIterator.DONE) {
if (!concat.isEmpty()) {
if (flushConcatenation(concat)) {
return true;
}
}
if (!concatAll.isEmpty()) {
// only if we haven't output this same combo above!
if (concatAll.subwordCount > lastConcatCount) {
concatAll.writeAndClear();
return true;
}
concatAll.clear();
}
// no saved concatenations, on to the next input word
hasSavedState = false;
continue;
}
// word surrounded by delimiters: always output
if (iterator.isSingleWord()) {
generatePart(true);
iterator.next();
return true;
}
int wordType = iterator.type();
// do we already have queued up incompatible concatenations?
if (!concat.isEmpty() && (concat.type & wordType) == 0) {
if (flushConcatenation(concat)) {
hasOutputToken = false;
return true;
}
hasOutputToken = false;
}
// add subwords depending upon options
if (shouldConcatenate(wordType)) {
if (concat.isEmpty()) {
concat.type = wordType;
}
concatenate(concat);
}
// add all subwords (catenateAll)
if (has(CATENATE_ALL)) {
concatenate(concatAll);
}
// if we should output the word or number part
if (shouldGenerateParts(wordType)) {
generatePart(false);
iterator.next();
return true;
}
iterator.next();
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java
Override
public void reset() throws IOException {
super.reset();
hasSavedState = false;
concat.clear();
concatAll.clear();
accumPosInc = 0;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java
Override
public boolean accept() throws IOException {
return words.contains(termAtt.buffer(), 0, termAtt.length());
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilter.java
Override
public boolean incrementToken() throws IOException {
if (tokenCount < maxTokenCount && input.incrementToken()) {
tokenCount++;
return true;
}
return false;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LimitTokenCountFilter.java
Override
public void reset() throws IOException {
super.reset();
tokenCount = 0;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilter.java
Override
public boolean incrementToken() throws IOException {
if (!input.incrementToken()) return false;
char[] termBuffer = termAtt.buffer();
int termBufferLength = termAtt.length();
char[] backup = null;
if (maxWordCount < DEFAULT_MAX_WORD_COUNT) {
//make a backup in case we exceed the word count
backup = new char[termBufferLength];
System.arraycopy(termBuffer, 0, backup, 0, termBufferLength);
}
if (termBufferLength < maxTokenLength) {
int wordCount = 0;
int lastWordStart = 0;
for (int i = 0; i < termBufferLength; i++) {
char c = termBuffer[i];
if (c <= ' ' || c == '.') {
int len = i - lastWordStart;
if (len > 0) {
processWord(termBuffer, lastWordStart, len, wordCount++);
lastWordStart = i + 1;
i++;
}
}
}
// process the last word
if (lastWordStart < termBufferLength) {
processWord(termBuffer, lastWordStart, termBufferLength - lastWordStart, wordCount++);
}
if (wordCount > maxWordCount) {
termAtt.copyBuffer(backup, 0, termBufferLength);
}
}
return true;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAndSuffixAwareTokenFilter.java
Override
public final boolean incrementToken() throws IOException {
return suffix.incrementToken();
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAndSuffixAwareTokenFilter.java
Override
public void reset() throws IOException {
suffix.reset();
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAndSuffixAwareTokenFilter.java
Override
public void close() throws IOException {
suffix.close();
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAndSuffixAwareTokenFilter.java
Override
public void end() throws IOException {
suffix.end();
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/HyphenatedWordsFilter.java
Override
public boolean incrementToken() throws IOException {
while (!exhausted && input.incrementToken()) {
char[] term = termAttribute.buffer();
int termLength = termAttribute.length();
lastEndOffset = offsetAttribute.endOffset();
if (termLength > 0 && term[termLength - 1] == '-') {
// a hyphenated word
// capture the state of the first token only
if (savedState == null) {
savedState = captureState();
}
hyphenated.append(term, 0, termLength - 1);
} else if (savedState == null) {
// not part of a hyphenated word.
return true;
} else {
// the final portion of a hyphenated word
hyphenated.append(term, 0, termLength);
unhyphenate();
return true;
}
}
exhausted = true;
if (savedState != null) {
// the final term ends with a hyphen
// add back the hyphen, for backwards compatibility.
hyphenated.append('-');
unhyphenate();
return true;
}
return false;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/HyphenatedWordsFilter.java
Override
public void reset() throws IOException {
super.reset();
hyphenated.setLength(0);
savedState = null;
exhausted = false;
lastEndOffset = 0;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SingleTokenTokenStream.java
Override
public final boolean incrementToken() throws IOException {
if (exhausted) {
return false;
} else {
clearAttributes();
singleToken.copyTo(tokenAtt);
exhausted = true;
return true;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SingleTokenTokenStream.java
Override
public void reset() throws IOException {
exhausted = false;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java
Override
public boolean incrementToken() throws IOException {
if (!input.incrementToken()) return false;
char[] termBuffer = termAtt.buffer();
int len = termAtt.length();
//TODO: Is this the right behavior or should we return false? Currently, " ", returns true, so I think this should
//also return true
if (len == 0){
return true;
}
int start = 0;
int end = 0;
int endOff = 0;
// eat the first characters
//QUESTION: Should we use Character.isWhitespace() instead?
for (start = 0; start < len && termBuffer[start] <= ' '; start++) {
}
// eat the end characters
for (end = len; end >= start && termBuffer[end - 1] <= ' '; end--) {
endOff++;
}
if (start > 0 || end < len) {
if (start < end) {
termAtt.copyBuffer(termBuffer, start, (end - start));
} else {
termAtt.setEmpty();
}
if (updateOffsets && len == offsetAtt.endOffset() - offsetAtt.startOffset()) {
int newStart = offsetAtt.startOffset()+start;
int newEnd = offsetAtt.endOffset() - (start<end ? endOff:0);
offsetAtt.setOffset(newStart, newEnd);
}
}
return true;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java
Override
public boolean incrementToken() throws IOException {
while (input.incrementToken()) {
final char term[] = termAttribute.buffer();
final int length = termAttribute.length();
final int posIncrement = posIncAttribute.getPositionIncrement();
if (posIncrement > 0) {
previous.clear();
}
boolean duplicate = (posIncrement == 0 && previous.contains(term, 0, length));
// clone the term, and add to the set of seen terms.
char saved[] = new char[length];
System.arraycopy(term, 0, saved, 0, length);
previous.add(saved);
if (!duplicate) {
return true;
}
}
return false;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java
Override
public void reset() throws IOException {
super.reset();
previous.clear();
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java
Override
public final boolean incrementToken() throws IOException {
if (!prefixExhausted) {
Token nextToken = getNextPrefixInputToken(reusableToken);
if (nextToken == null) {
prefixExhausted = true;
} else {
previousPrefixToken.reinit(nextToken);
// Make it a deep copy
Payload p = previousPrefixToken.getPayload();
if (p != null) {
previousPrefixToken.setPayload(p.clone());
}
setCurrentToken(nextToken);
return true;
}
}
Token nextToken = getNextSuffixInputToken(reusableToken);
if (nextToken == null) {
return false;
}
nextToken = updateSuffixToken(nextToken, previousPrefixToken);
setCurrentToken(nextToken);
return true;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java
private Token getNextPrefixInputToken(Token token) throws IOException {
if (!prefix.incrementToken()) return null;
token.copyBuffer(p_termAtt.buffer(), 0, p_termAtt.length());
token.setPositionIncrement(p_posIncrAtt.getPositionIncrement());
token.setFlags(p_flagsAtt.getFlags());
token.setOffset(p_offsetAtt.startOffset(), p_offsetAtt.endOffset());
token.setType(p_typeAtt.type());
token.setPayload(p_payloadAtt.getPayload());
return token;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java
private Token getNextSuffixInputToken(Token token) throws IOException {
if (!suffix.incrementToken()) return null;
token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
token.setPositionIncrement(posIncrAtt.getPositionIncrement());
token.setFlags(flagsAtt.getFlags());
token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
token.setType(typeAtt.type());
token.setPayload(payloadAtt.getPayload());
return token;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java
Override
public void end() throws IOException {
prefix.end();
suffix.end();
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java
Override
public void close() throws IOException {
prefix.close();
suffix.close();
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java
Override
public void reset() throws IOException {
super.reset();
if (prefix != null) {
prefixExhausted = false;
prefix.reset();
}
if (suffix != null) {
suffix.reset();
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/EmptyTokenStream.java
Override
public final boolean incrementToken() throws IOException {
return false;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilter.java
Override
public final boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (keywordSet.contains(termAtt.buffer(), 0, termAtt.length())) {
keywordAttr.setKeyword(true);
}
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (!keywordAtt.isKeyword()) { // don't muck with already-keyworded terms
String stem = dictionary.get(termAtt.buffer(), 0, termAtt.length());
if (stem != null) {
termAtt.setEmpty().append(stem);
keywordAtt.setKeyword(true);
}
}
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java
Override
public boolean accept() throws IOException {
final int len = termAtt.length();
return (len >= min && len <= max);
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianStemFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if(!keywordAtt.isKeyword()) {
final int newlen =
stemmer.stem(termAtt.buffer(), termAtt.length(), stemDerivational);
termAtt.setLength(newlen);
}
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (!keywordAtt.isKeyword())
termAtt.setLength(stemmer.stem(termAtt.buffer(), termAtt.length()));
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizationFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (!keywordAtt.isKeyword())
termAtt.setLength(normalizer.normalize(termAtt.buffer(),
termAtt.length()));
return true;
}
return false;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
Override
public boolean incrementToken() throws IOException {
if (hasMoreTokensInClone) {
int start = breaker.current();
int end = breaker.next();
if (end != BreakIterator.DONE) {
clonedToken.copyTo(this);
termAtt.copyBuffer(clonedTermAtt.buffer(), start, end - start);
if (hasIllegalOffsets) {
offsetAtt.setOffset(clonedOffsetAtt.startOffset(), clonedOffsetAtt.endOffset());
} else {
offsetAtt.setOffset(clonedOffsetAtt.startOffset() + start, clonedOffsetAtt.startOffset() + end);
}
posAtt.setPositionIncrement(1);
return true;
}
hasMoreTokensInClone = false;
}
if (!input.incrementToken()) {
return false;
}
if (termAtt.length() == 0 || UnicodeBlock.of(termAtt.charAt(0)) != UnicodeBlock.THAI) {
return true;
}
hasMoreTokensInClone = true;
// if length by start + end offsets doesn't match the term text then assume
// this is a synonym and don't adjust the offsets.
hasIllegalOffsets = offsetAtt.endOffset() - offsetAtt.startOffset() != termAtt.length();
// we lazy init the cloned token, as in ctor not all attributes may be added
if (clonedToken == null) {
clonedToken = cloneAttributes();
clonedTermAtt = clonedToken.getAttribute(CharTermAttribute.class);
clonedOffsetAtt = clonedToken.getAttribute(OffsetAttribute.class);
} else {
this.copyTo(clonedToken);
}
// reinit CharacterIterator
charIterator.setText(clonedTermAtt.buffer(), 0, clonedTermAtt.length());
breaker.setText(charIterator);
int end = breaker.next();
if (end != BreakIterator.DONE) {
termAtt.setLength(end);
if (hasIllegalOffsets) {
offsetAtt.setOffset(clonedOffsetAtt.startOffset(), clonedOffsetAtt.endOffset());
} else {
offsetAtt.setOffset(clonedOffsetAtt.startOffset(), clonedOffsetAtt.startOffset() + end);
}
// position increment keeps as it is for first token
return true;
}
return false;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
Override
public void reset() throws IOException {
super.reset();
hasMoreTokensInClone = false;
clonedToken = null;
clonedTermAtt = null;
clonedOffsetAtt = null;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java
Override
public final boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (!keywordAttr.isKeyword()) {
char termBuffer[] = termAtt.buffer();
final int length = termAtt.length();
stemmer.setCurrent(termBuffer, length);
stemmer.stem();
final char finalTerm[] = stemmer.getCurrentBuffer();
final int newLength = stemmer.getCurrentBufferLength();
if (finalTerm != termBuffer)
termAtt.copyBuffer(finalTerm, 0, newLength);
else
termAtt.setLength(newLength);
}
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java
Override
public final boolean incrementToken() throws IOException {
boolean tokenAvailable = false;
int builtGramSize = 0;
if (gramSize.atMinValue() || inputWindow.size() < gramSize.getValue()) {
shiftInputWindow();
gramBuilder.setLength(0);
} else {
builtGramSize = gramSize.getPreviousValue();
}
if (inputWindow.size() >= gramSize.getValue()) {
boolean isAllFiller = true;
InputWindowToken nextToken = null;
Iterator<InputWindowToken> iter = inputWindow.iterator();
for (int gramNum = 1 ;
iter.hasNext() && builtGramSize < gramSize.getValue() ;
++gramNum) {
nextToken = iter.next();
if (builtGramSize < gramNum) {
if (builtGramSize > 0) {
gramBuilder.append(tokenSeparator);
}
gramBuilder.append(nextToken.termAtt.buffer(), 0,
nextToken.termAtt.length());
++builtGramSize;
}
if (isAllFiller && nextToken.isFiller) {
if (gramNum == gramSize.getValue()) {
gramSize.advance();
}
} else {
isAllFiller = false;
}
}
if ( ! isAllFiller && builtGramSize == gramSize.getValue()) {
inputWindow.getFirst().attSource.copyTo(this);
posIncrAtt.setPositionIncrement(isOutputHere ? 0 : 1);
termAtt.setEmpty().append(gramBuilder);
if (gramSize.getValue() > 1) {
typeAtt.setType(tokenType);
noShingleOutput = false;
}
offsetAtt.setOffset(offsetAtt.startOffset(), nextToken.offsetAtt.endOffset());
posLenAtt.setPositionLength(builtGramSize);
isOutputHere = true;
gramSize.advance();
tokenAvailable = true;
}
}
return tokenAvailable;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java
private InputWindowToken getNextToken(InputWindowToken target)
throws IOException {
InputWindowToken newTarget = target;
if (numFillerTokensToInsert > 0) {
if (null == target) {
newTarget = new InputWindowToken(nextInputStreamToken.cloneAttributes());
} else {
nextInputStreamToken.copyTo(target.attSource);
}
// A filler token occupies no space
newTarget.offsetAtt.setOffset(newTarget.offsetAtt.startOffset(),
newTarget.offsetAtt.startOffset());
newTarget.termAtt.copyBuffer(FILLER_TOKEN, 0, FILLER_TOKEN.length);
newTarget.isFiller = true;
--numFillerTokensToInsert;
} else if (isNextInputStreamToken) {
if (null == target) {
newTarget = new InputWindowToken(nextInputStreamToken.cloneAttributes());
} else {
nextInputStreamToken.copyTo(target.attSource);
}
isNextInputStreamToken = false;
newTarget.isFiller = false;
} else if (!exhausted && input.incrementToken()) {
if (null == target) {
newTarget = new InputWindowToken(cloneAttributes());
} else {
this.copyTo(target.attSource);
}
if (posIncrAtt.getPositionIncrement() > 1) {
// Each output shingle must contain at least one input token,
// so no more than (maxShingleSize - 1) filler tokens will be inserted.
numFillerTokensToInsert
= Math.min(posIncrAtt.getPositionIncrement() - 1, maxShingleSize - 1);
// Save the current token as the next input stream token
if (null == nextInputStreamToken) {
nextInputStreamToken = cloneAttributes();
} else {
this.copyTo(nextInputStreamToken);
}
isNextInputStreamToken = true;
// A filler token occupies no space
newTarget.offsetAtt.setOffset(offsetAtt.startOffset(), offsetAtt.startOffset());
newTarget.termAtt.copyBuffer(FILLER_TOKEN, 0, FILLER_TOKEN.length);
newTarget.isFiller = true;
--numFillerTokensToInsert;
} else {
newTarget.isFiller = false;
}
} else {
newTarget = null;
exhausted = true;
}
return newTarget;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java
private void shiftInputWindow() throws IOException {
InputWindowToken firstToken = null;
if (inputWindow.size() > 0) {
firstToken = inputWindow.removeFirst();
}
while (inputWindow.size() < maxShingleSize) {
if (null != firstToken) { // recycle the firstToken, if available
if (null != getNextToken(firstToken)) {
inputWindow.add(firstToken); // the firstToken becomes the last
firstToken = null;
} else {
break; // end of input stream
}
} else {
InputWindowToken nextToken = getNextToken(null);
if (null != nextToken) {
inputWindow.add(nextToken);
} else {
break; // end of input stream
}
}
}
if (outputUnigramsIfNoShingles && noShingleOutput
&& gramSize.minValue > 1 && inputWindow.size() < minShingleSize) {
gramSize.minValue = 1;
}
gramSize.reset();
isOutputHere = false;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java
Override
public void reset() throws IOException {
super.reset();
gramSize.reset();
inputWindow.clear();
nextInputStreamToken = null;
isNextInputStreamToken = false;
numFillerTokensToInsert = 0;
isOutputHere = false;
noShingleOutput = true;
exhausted = false;
if (outputUnigramsIfNoShingles && ! outputUnigrams) {
// Fix up gramSize if minValue was reset for outputUnigramsIfNoShingles
gramSize.minValue = minShingleSize;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TeeSinkTokenFilter.java
public void consumeAllTokens() throws IOException {
while (incrementToken()) {}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TeeSinkTokenFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
// capture state lazily - maybe no SinkFilter accepts this state
AttributeSource.State state = null;
for (WeakReference<SinkTokenStream> ref : sinks) {
final SinkTokenStream sink = ref.get();
if (sink != null) {
if (sink.accept(this)) {
if (state == null) {
state = this.captureState();
}
sink.addState(state);
}
}
}
return true;
}
return false;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TeeSinkTokenFilter.java
Override
public final void end() throws IOException {
super.end();
AttributeSource.State finalState = captureState();
for (WeakReference<SinkTokenStream> ref : sinks) {
final SinkTokenStream sink = ref.get();
if (sink != null) {
sink.setFinalState(finalState);
}
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TeeSinkTokenFilter.java
public void reset() throws IOException {
// nothing to do; can be overridden
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TeeSinkTokenFilter.java
Override
public final boolean incrementToken() throws IOException {
// lazy init the iterator
if (it == null) {
it = cachedStates.iterator();
}
if (!it.hasNext()) {
return false;
}
AttributeSource.State state = it.next();
restoreState(state);
return true;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TeeSinkTokenFilter.java
Override
public final void end() throws IOException {
if (finalState != null) {
restoreState(finalState);
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/TokenRangeSinkFilter.java
Override
public void reset() throws IOException {
count = 0;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemmer.java
public static void main(String[] args) throws IOException, ParseException {
boolean ignoreCase = false;
int offset = 0;
if (args.length < 2) {
System.out.println("usage: HunspellStemmer [-i] <affix location> <dic location>");
System.exit(1);
}
if(args[offset].equals("-i")) {
ignoreCase = true;
System.out.println("Ignoring case. All stems will be returned lowercased");
offset++;
}
InputStream affixInputStream = new FileInputStream(args[offset++]);
InputStream dicInputStream = new FileInputStream(args[offset++]);
HunspellDictionary dictionary = new HunspellDictionary(affixInputStream, dicInputStream, Version.LUCENE_40, ignoreCase);
affixInputStream.close();
dicInputStream.close();
HunspellStemmer stemmer = new HunspellStemmer(dictionary);
Scanner scanner = new Scanner(System.in);
System.out.print("> ");
while (scanner.hasNextLine()) {
String word = scanner.nextLine();
if ("exit".equals(word)) {
break;
}
printStemResults(word, stemmer.stem(word.toCharArray(), word.length()));
System.out.print("> ");
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellDictionary.java
private void readAffixFile(InputStream affixStream, CharsetDecoder decoder) throws IOException {
prefixes = new CharArrayMap<List<HunspellAffix>>(version, 8, ignoreCase);
suffixes = new CharArrayMap<List<HunspellAffix>>(version, 8, ignoreCase);
BufferedReader reader = new BufferedReader(new InputStreamReader(affixStream, decoder));
String line = null;
while ((line = reader.readLine()) != null) {
if (line.startsWith(ALIAS_KEY)) {
parseAlias(line);
} else if (line.startsWith(PREFIX_KEY)) {
parseAffix(prefixes, line, reader, PREFIX_CONDITION_REGEX_PATTERN);
} else if (line.startsWith(SUFFIX_KEY)) {
parseAffix(suffixes, line, reader, SUFFIX_CONDITION_REGEX_PATTERN);
} else if (line.startsWith(FLAG_KEY)) {
// Assume that the FLAG line comes before any prefix or suffixes
// Store the strategy so it can be used when parsing the dic file
flagParsingStrategy = getFlagParsingStrategy(line);
}
}
reader.close();
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellDictionary.java
private void parseAffix(CharArrayMap<List<HunspellAffix>> affixes,
String header,
BufferedReader reader,
String conditionPattern) throws IOException {
String args[] = header.split("\\s+");
boolean crossProduct = args[2].equals("Y");
int numLines = Integer.parseInt(args[3]);
for (int i = 0; i < numLines; i++) {
String line = reader.readLine();
String ruleArgs[] = line.split("\\s+");
HunspellAffix affix = new HunspellAffix();
affix.setFlag(flagParsingStrategy.parseFlag(ruleArgs[1]));
affix.setStrip(ruleArgs[2].equals("0") ? "" : ruleArgs[2]);
String affixArg = ruleArgs[3];
int flagSep = affixArg.lastIndexOf('/');
if (flagSep != -1) {
String flagPart = affixArg.substring(flagSep + 1);
if (aliasCount > 0) {
flagPart = getAliasValue(Integer.parseInt(flagPart));
}
char appendFlags[] = flagParsingStrategy.parseFlags(flagPart);
Arrays.sort(appendFlags);
affix.setAppendFlags(appendFlags);
affix.setAppend(affixArg.substring(0, flagSep));
} else {
affix.setAppend(affixArg);
}
String condition = ruleArgs[4];
affix.setCondition(condition, String.format(conditionPattern, condition));
affix.setCrossProduct(crossProduct);
List<HunspellAffix> list = affixes.get(affix.getAppend());
if (list == null) {
list = new ArrayList<HunspellAffix>();
affixes.put(affix.getAppend(), list);
}
list.add(affix);
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellDictionary.java
private String getDictionaryEncoding(InputStream affix) throws IOException, ParseException {
final StringBuilder encoding = new StringBuilder();
for (;;) {
encoding.setLength(0);
int ch;
while ((ch = affix.read()) >= 0) {
if (ch == '\n') {
break;
}
if (ch != '\r') {
encoding.append((char)ch);
}
}
if (
encoding.length() == 0 || encoding.charAt(0) == '#' ||
// this test only at the end as ineffective but would allow lines only containing spaces:
encoding.toString().trim().length() == 0
) {
if (ch < 0) {
throw new ParseException("Unexpected end of affix file.", 0);
}
continue;
}
if ("SET ".equals(encoding.substring(0, 4))) {
// cleanup the encoding string, too (whitespace)
return encoding.substring(4).trim();
}
throw new ParseException("The first non-comment line in the affix file must "+
"be a 'SET charset', was: '" + encoding +"'", 0);
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellDictionary.java
private void readDictionaryFile(InputStream dictionary, CharsetDecoder decoder) throws IOException {
BufferedReader reader = new BufferedReader(new InputStreamReader(dictionary, decoder));
// TODO: don't create millions of strings.
String line = reader.readLine(); // first line is number of entries
int numEntries = Integer.parseInt(line);
// TODO: the flags themselves can be double-chars (long) or also numeric
// either way the trick is to encode them as char... but they must be parsed differently
while ((line = reader.readLine()) != null) {
String entry;
HunspellWord wordForm;
int flagSep = line.lastIndexOf('/');
if (flagSep == -1) {
wordForm = NOFLAGS;
entry = line;
} else {
// note, there can be comments (morph description) after a flag.
// we should really look for any whitespace
int end = line.indexOf('\t', flagSep);
if (end == -1)
end = line.length();
String flagPart = line.substring(flagSep + 1, end);
if (aliasCount > 0) {
flagPart = getAliasValue(Integer.parseInt(flagPart));
}
wordForm = new HunspellWord(flagParsingStrategy.parseFlags(flagPart));
Arrays.sort(wordForm.getFlags());
entry = line.substring(0, flagSep);
if(ignoreCase) {
entry = entry.toLowerCase(Locale.ENGLISH);
}
}
List<HunspellWord> entries = words.get(entry);
if (entries == null) {
entries = new ArrayList<HunspellWord>();
words.put(entry, entries);
}
entries.add(wordForm);
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilter.java
Override
public boolean incrementToken() throws IOException {
if (buffer != null && !buffer.isEmpty()) {
Stem nextStem = buffer.remove(0);
restoreState(savedState);
posIncAtt.setPositionIncrement(0);
termAtt.copyBuffer(nextStem.getStem(), 0, nextStem.getStemLength());
termAtt.setLength(nextStem.getStemLength());
return true;
}
if (!input.incrementToken()) {
return false;
}
if (keywordAtt.isKeyword()) {
return true;
}
buffer = dedup ? stemmer.uniqueStems(termAtt.buffer(), termAtt.length()) : stemmer.stem(termAtt.buffer(), termAtt.length());
if (buffer.isEmpty()) { // we do not know this word, return it unchanged
return true;
}
Stem stem = buffer.remove(0);
termAtt.copyBuffer(stem.getStem(), 0, stem.getStemLength());
termAtt.setLength(stem.getStemLength());
if (!buffer.isEmpty()) {
savedState = captureState();
}
return true;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilter.java
Override
public void reset() throws IOException {
super.reset();
buffer = null;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizationFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
termAtt.setLength(normalizer.normalize(termAtt.buffer(), termAtt.length()));
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (!keywordAttr.isKeyword()) {
final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
termAtt.setLength(newlen);
}
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishLowerCaseFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
char[] chArray = termAtt.buffer();
int chLen = termAtt.length();
int idx = 0;
if (chLen > 1 && (chArray[0] == 'n' || chArray[0] == 't') && isUpperVowel(chArray[1])) {
chArray = termAtt.resizeBuffer(chLen + 1);
for (int i = chLen; i > 1; i--) {
chArray[i] = chArray[i - 1];
}
chArray[1] = '-';
termAtt.setLength(chLen + 1);
idx = 2;
chLen = chLen + 1;
}
for (int i = idx; i < chLen;) {
i += Character.toChars(Character.toLowerCase(chArray[i]), chArray, i);
}
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if(!keywordAttr.isKeyword()) {
final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
termAtt.setLength(newlen);
}
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java
public static CharsRef analyze(Analyzer analyzer, String text, CharsRef reuse) throws IOException {
TokenStream ts = analyzer.tokenStream("", new StringReader(text));
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
ts.reset();
reuse.length = 0;
while (ts.incrementToken()) {
int length = termAtt.length();
if (length == 0) {
throw new IllegalArgumentException("term: " + text + " analyzed to a zero-length token");
}
if (posIncAtt.getPositionIncrement() != 1) {
throw new IllegalArgumentException("term: " + text + " analyzed to a token with posinc != 1");
}
reuse.grow(reuse.length + length + 1); /* current + word + separator */
int end = reuse.offset + reuse.length;
if (reuse.length > 0) {
reuse.chars[end++] = SynonymMap.WORD_SEPARATOR;
reuse.length++;
}
System.arraycopy(termAtt.buffer(), 0, reuse.chars, end, length);
reuse.length += length;
}
ts.end();
ts.close();
if (reuse.length == 0) {
throw new IllegalArgumentException("term: " + text + " was completely eliminated by analyzer");
}
return reuse;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java
public SynonymMap build() throws IOException {
ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
// TODO: are we using the best sharing options?
org.apache.lucene.util.fst.Builder<BytesRef> builder =
new org.apache.lucene.util.fst.Builder<BytesRef>(FST.INPUT_TYPE.BYTE4, outputs);
BytesRef scratch = new BytesRef(64);
ByteArrayDataOutput scratchOutput = new ByteArrayDataOutput();
final Set<Integer> dedupSet;
if (dedup) {
dedupSet = new HashSet<Integer>();
} else {
dedupSet = null;
}
final byte[] spare = new byte[5];
Set<CharsRef> keys = workingSet.keySet();
CharsRef sortedKeys[] = keys.toArray(new CharsRef[keys.size()]);
Arrays.sort(sortedKeys, CharsRef.getUTF16SortedAsUTF8Comparator());
final IntsRef scratchIntsRef = new IntsRef();
//System.out.println("fmap.build");
for (int keyIdx = 0; keyIdx < sortedKeys.length; keyIdx++) {
CharsRef input = sortedKeys[keyIdx];
MapEntry output = workingSet.get(input);
int numEntries = output.ords.size();
// output size, assume the worst case
int estimatedSize = 5 + numEntries * 5; // numEntries + one ord for each entry
scratch.grow(estimatedSize);
scratchOutput.reset(scratch.bytes, scratch.offset, scratch.bytes.length);
assert scratch.offset == 0;
// now write our output data:
int count = 0;
for (int i = 0; i < numEntries; i++) {
if (dedupSet != null) {
// box once
final Integer ent = output.ords.get(i);
if (dedupSet.contains(ent)) {
continue;
}
dedupSet.add(ent);
}
scratchOutput.writeVInt(output.ords.get(i));
count++;
}
final int pos = scratchOutput.getPosition();
scratchOutput.writeVInt(count << 1 | (output.includeOrig ? 0 : 1));
final int pos2 = scratchOutput.getPosition();
final int vIntLen = pos2-pos;
// Move the count + includeOrig to the front of the byte[]:
System.arraycopy(scratch.bytes, pos, spare, 0, vIntLen);
System.arraycopy(scratch.bytes, 0, scratch.bytes, vIntLen, pos);
System.arraycopy(spare, 0, scratch.bytes, 0, vIntLen);
if (dedupSet != null) {
dedupSet.clear();
}
scratch.length = scratchOutput.getPosition() - scratch.offset;
//System.out.println(" add input=" + input + " output=" + scratch + " offset=" + scratch.offset + " length=" + scratch.length + " count=" + count);
builder.add(Util.toUTF32(input, scratchIntsRef), BytesRef.deepCopyOf(scratch));
}
FST<BytesRef> fst = builder.finish();
return new SynonymMap(fst, words, maxHorizontalContext);
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java
private void parse() throws IOException {
//System.out.println("\nS: parse");
assert inputSkipCount == 0;
int curNextRead = nextRead;
// Holds the longest match we've seen so far:
BytesRef matchOutput = null;
int matchInputLength = 0;
int matchEndOffset = -1;
BytesRef pendingOutput = fst.outputs.getNoOutput();
fst.getFirstArc(scratchArc);
assert scratchArc.output == fst.outputs.getNoOutput();
int tokenCount = 0;
byToken:
while(true) {
// Pull next token's chars:
final char[] buffer;
final int bufferLen;
//System.out.println(" cycle nextRead=" + curNextRead + " nextWrite=" + nextWrite);
int inputEndOffset = 0;
if (curNextRead == nextWrite) {
// We used up our lookahead buffer of input tokens
// -- pull next real input token:
if (finished) {
break;
} else {
//System.out.println(" input.incrToken");
assert futureInputs[nextWrite].consumed;
// Not correct: a syn match whose output is longer
// than its input can set future inputs keepOrig
// to true:
//assert !futureInputs[nextWrite].keepOrig;
if (input.incrementToken()) {
buffer = termAtt.buffer();
bufferLen = termAtt.length();
final PendingInput input = futureInputs[nextWrite];
lastStartOffset = input.startOffset = offsetAtt.startOffset();
lastEndOffset = input.endOffset = offsetAtt.endOffset();
inputEndOffset = input.endOffset;
//System.out.println(" new token=" + new String(buffer, 0, bufferLen));
if (nextRead != nextWrite) {
capture();
} else {
input.consumed = false;
}
} else {
// No more input tokens
//System.out.println(" set end");
finished = true;
break;
}
}
} else {
// Still in our lookahead
buffer = futureInputs[curNextRead].term.chars;
bufferLen = futureInputs[curNextRead].term.length;
inputEndOffset = futureInputs[curNextRead].endOffset;
//System.out.println(" old token=" + new String(buffer, 0, bufferLen));
}
tokenCount++;
// Run each char in this token through the FST:
int bufUpto = 0;
while(bufUpto < bufferLen) {
final int codePoint = Character.codePointAt(buffer, bufUpto, bufferLen);
if (fst.findTargetArc(ignoreCase ? Character.toLowerCase(codePoint) : codePoint, scratchArc, scratchArc, fstReader) == null) {
//System.out.println(" stop");
break byToken;
}
// Accum the output
pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output);
//System.out.println(" char=" + buffer[bufUpto] + " output=" + pendingOutput + " arc.output=" + scratchArc.output);
bufUpto += Character.charCount(codePoint);
}
// OK, entire token matched; now see if this is a final
// state:
if (scratchArc.isFinal()) {
matchOutput = fst.outputs.add(pendingOutput, scratchArc.nextFinalOutput);
matchInputLength = tokenCount;
matchEndOffset = inputEndOffset;
//System.out.println(" found matchLength=" + matchInputLength + " output=" + matchOutput);
}
// See if the FST wants to continue matching (ie, needs to
// see the next input token):
if (fst.findTargetArc(SynonymMap.WORD_SEPARATOR, scratchArc, scratchArc, fstReader) == null) {
// No further rules can match here; we're done
// searching for matching rules starting at the
// current input position.
break;
} else {
// More matching is possible -- accum the output (if
// any) of the WORD_SEP arc:
pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output);
if (nextRead == nextWrite) {
capture();
}
}
curNextRead = rollIncr(curNextRead);
}
if (nextRead == nextWrite && !finished) {
//System.out.println(" skip write slot=" + nextWrite);
nextWrite = rollIncr(nextWrite);
}
if (matchOutput != null) {
//System.out.println(" add matchLength=" + matchInputLength + " output=" + matchOutput);
inputSkipCount = matchInputLength;
addOutput(matchOutput, matchInputLength, matchEndOffset);
} else if (nextRead != nextWrite) {
// Even though we had no match here, we set to 1
// because we need to skip current input token before
// trying to match again:
inputSkipCount = 1;
} else {
assert finished;
}
//System.out.println(" parse done inputSkipCount=" + inputSkipCount + " nextRead=" + nextRead + " nextWrite=" + nextWrite);
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java
Override
public boolean incrementToken() throws IOException {
//System.out.println("\nS: incrToken inputSkipCount=" + inputSkipCount + " nextRead=" + nextRead + " nextWrite=" + nextWrite);
while(true) {
// First play back any buffered future inputs/outputs
// w/o running parsing again:
while (inputSkipCount != 0) {
// At each position, we first output the original
// token
// TODO: maybe just a PendingState class, holding
// both input & outputs?
final PendingInput input = futureInputs[nextRead];
final PendingOutputs outputs = futureOutputs[nextRead];
//System.out.println(" cycle nextRead=" + nextRead + " nextWrite=" + nextWrite + " inputSkipCount="+ inputSkipCount + " input.keepOrig=" + input.keepOrig + " input.consumed=" + input.consumed + " input.state=" + input.state);
if (!input.consumed && (input.keepOrig || !input.matched)) {
if (input.state != null) {
// Return a previously saved token (because we
// had to lookahead):
restoreState(input.state);
} else {
// Pass-through case: return token we just pulled
// but didn't capture:
assert inputSkipCount == 1: "inputSkipCount=" + inputSkipCount + " nextRead=" + nextRead;
}
input.reset();
if (outputs.count > 0) {
outputs.posIncr = 0;
} else {
nextRead = rollIncr(nextRead);
inputSkipCount--;
}
//System.out.println(" return token=" + termAtt.toString());
return true;
} else if (outputs.upto < outputs.count) {
// Still have pending outputs to replay at this
// position
input.reset();
final int posIncr = outputs.posIncr;
final CharsRef output = outputs.pullNext();
clearAttributes();
termAtt.copyBuffer(output.chars, output.offset, output.length);
typeAtt.setType(TYPE_SYNONYM);
int endOffset = outputs.getLastEndOffset();
if (endOffset == -1) {
endOffset = input.endOffset;
}
offsetAtt.setOffset(input.startOffset, endOffset);
posIncrAtt.setPositionIncrement(posIncr);
posLenAtt.setPositionLength(outputs.getLastPosLength());
if (outputs.count == 0) {
// Done with the buffered input and all outputs at
// this position
nextRead = rollIncr(nextRead);
inputSkipCount--;
}
//System.out.println(" return token=" + termAtt.toString());
return true;
} else {
// Done with the buffered input and all outputs at
// this position
input.reset();
nextRead = rollIncr(nextRead);
inputSkipCount--;
}
}
if (finished && nextRead == nextWrite) {
// End case: if any output syns went beyond end of
// input stream, enumerate them now:
final PendingOutputs outputs = futureOutputs[nextRead];
if (outputs.upto < outputs.count) {
final int posIncr = outputs.posIncr;
final CharsRef output = outputs.pullNext();
futureInputs[nextRead].reset();
if (outputs.count == 0) {
nextWrite = nextRead = rollIncr(nextRead);
}
clearAttributes();
// Keep offset from last input token:
offsetAtt.setOffset(lastStartOffset, lastEndOffset);
termAtt.copyBuffer(output.chars, output.offset, output.length);
typeAtt.setType(TYPE_SYNONYM);
//System.out.println(" set posIncr=" + outputs.posIncr + " outputs=" + outputs);
posIncrAtt.setPositionIncrement(posIncr);
//System.out.println(" return token=" + termAtt.toString());
return true;
} else {
return false;
}
}
// Find new synonym matches:
parse();
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java
Override
public void reset() throws IOException {
super.reset();
captureCount = 0;
finished = false;
inputSkipCount = 0;
nextRead = nextWrite = 0;
// In normal usage these resets would not be needed,
// since they reset-as-they-are-consumed, but the app
// may not consume all input tokens (or we might hit an
// exception), in which case we have leftover state
// here:
for (PendingInput input : futureInputs) {
input.reset();
}
for (PendingOutputs output : futureOutputs) {
output.reset();
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/WordnetSynonymParser.java
public void add(Reader in) throws IOException, ParseException {
LineNumberReader br = new LineNumberReader(in);
try {
String line = null;
String lastSynSetID = "";
CharsRef synset[] = new CharsRef[8];
int synsetSize = 0;
while ((line = br.readLine()) != null) {
String synSetID = line.substring(2, 11);
if (!synSetID.equals(lastSynSetID)) {
addInternal(synset, synsetSize);
synsetSize = 0;
}
if (synset.length <= synsetSize+1) {
CharsRef larger[] = new CharsRef[synset.length * 2];
System.arraycopy(synset, 0, larger, 0, synsetSize);
synset = larger;
}
synset[synsetSize] = parseSynonym(line, synset[synsetSize]);
synsetSize++;
lastSynSetID = synSetID;
}
// final synset in the file
addInternal(synset, synsetSize);
} catch (IllegalArgumentException e) {
ParseException ex = new ParseException("Invalid synonym rule at line " + br.getLineNumber(), 0);
ex.initCause(e);
throw ex;
} finally {
br.close();
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/WordnetSynonymParser.java
private CharsRef parseSynonym(String line, CharsRef reuse) throws IOException {
if (reuse == null) {
reuse = new CharsRef(8);
}
int start = line.indexOf('\'')+1;
int end = line.lastIndexOf('\'');
String text = line.substring(start, end).replace("''", "'");
return analyze(analyzer, text, reuse);
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/WordnetSynonymParser.java
private void addInternal(CharsRef synset[], int size) throws IOException {
if (size <= 1) {
return; // nothing to do
}
if (expand) {
for (int i = 0; i < size; i++) {
for (int j = 0; j < size; j++) {
add(synset[i], synset[j], false);
}
}
} else {
for (int i = 0; i < size; i++) {
add(synset[i], synset[0], false);
}
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SolrSynonymParser.java
public void add(Reader in) throws IOException, ParseException {
LineNumberReader br = new LineNumberReader(in);
try {
addInternal(br);
} catch (IllegalArgumentException e) {
ParseException ex = new ParseException("Invalid synonym rule at line " + br.getLineNumber(), 0);
ex.initCause(e);
throw ex;
} finally {
br.close();
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SolrSynonymParser.java
private void addInternal(BufferedReader in) throws IOException {
String line = null;
while ((line = in.readLine()) != null) {
if (line.length() == 0 || line.charAt(0) == '#') {
continue; // ignore empty lines and comments
}
CharsRef inputs[];
CharsRef outputs[];
// TODO: we could process this more efficiently.
String sides[] = split(line, "=>");
if (sides.length > 1) { // explicit mapping
if (sides.length != 2) {
throw new IllegalArgumentException("more than one explicit mapping specified on the same line");
}
String inputStrings[] = split(sides[0], ",");
inputs = new CharsRef[inputStrings.length];
for (int i = 0; i < inputs.length; i++) {
inputs[i] = analyze(analyzer, unescape(inputStrings[i]).trim(), new CharsRef());
}
String outputStrings[] = split(sides[1], ",");
outputs = new CharsRef[outputStrings.length];
for (int i = 0; i < outputs.length; i++) {
outputs[i] = analyze(analyzer, unescape(outputStrings[i]).trim(), new CharsRef());
}
} else {
String inputStrings[] = split(line, ",");
inputs = new CharsRef[inputStrings.length];
for (int i = 0; i < inputs.length; i++) {
inputs[i] = analyze(analyzer, unescape(inputStrings[i]).trim(), new CharsRef());
}
if (expand) {
outputs = inputs;
} else {
outputs = new CharsRef[1];
outputs[0] = inputs[0];
}
}
// currently we include the term itself in the map,
// and use includeOrig = false always.
// this is how the existing filter does it, but its actually a bug,
// especially if combined with ignoreCase = true
for (int i = 0; i < inputs.length; i++) {
for (int j = 0; j < outputs.length; j++) {
add(inputs[i], outputs[j], false);
}
}
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianStemFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (!keywordAttr.isKeyword()) {
final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
termAtt.setLength(newlen);
}
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (!keywordAttr.isKeyword()) {
final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
termAtt.setLength(newlen);
}
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java
Override
public final boolean incrementToken() throws IOException {
if (input.incrementToken()) {
String type = typeAtt.type();
if (type != null && type.equals("") == false) {
payloadAtt.setPayload(new Payload(type.getBytes("UTF-8")));
}
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java
Override
public final boolean incrementToken() throws IOException {
if (input.incrementToken()) {
byte[] data = new byte[8];
PayloadHelper.encodeInt(offsetAtt.startOffset(), data, 0);
PayloadHelper.encodeInt(offsetAtt.endOffset(), data, 4);
Payload payload = new Payload(data);
payAtt.setPayload(payload);
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java
Override
public final boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (typeAtt.type().equals(typeMatch))
payloadAtt.setPayload(thePayload);
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
final char[] buffer = termAtt.buffer();
final int length = termAtt.length();
for (int i = 0; i < length; i++) {
if (buffer[i] == delimiter) {
payAtt.setPayload(encoder.encode(buffer, i + 1, (length - (i + 1))));
termAtt.setLength(i); // simply set a new length
return true;
}
}
// we have not seen the delimiter
payAtt.setPayload(null);
return true;
} else return false;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java
private boolean zzRefill() throws java.io.IOException {
/* first: make room (if you can) */
if (zzStartRead > 0) {
System.arraycopy(zzBuffer, zzStartRead,
zzBuffer, 0,
zzEndRead-zzStartRead);
/* translate stored positions */
zzEndRead-= zzStartRead;
zzCurrentPos-= zzStartRead;
zzMarkedPos-= zzStartRead;
zzStartRead = 0;
}
/* is the buffer big enough? */
if (zzCurrentPos >= zzBuffer.length) {
/* if not: blow it up */
char newBuffer[] = new char[zzCurrentPos*2];
System.arraycopy(zzBuffer, 0, newBuffer, 0, zzBuffer.length);
zzBuffer = newBuffer;
}
/* finally: fill the buffer with new input */
int numRead = zzReader.read(zzBuffer, zzEndRead,
zzBuffer.length-zzEndRead);
if (numRead > 0) {
zzEndRead+= numRead;
return false;
}
// unlikely but not impossible: read 0 characters, but not at end of stream
if (numRead == 0) {
int c = zzReader.read();
if (c == -1) {
return true;
} else {
zzBuffer[zzEndRead++] = (char) c;
return false;
}
}
// numRead < 0
return true;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java
public final void yyclose() throws java.io.IOException {
zzAtEOF = true; /* indicate end of file */
zzEndRead = zzStartRead; /* invalidate buffer */
if (zzReader != null)
zzReader.close();
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java
public int getNextToken() throws java.io.IOException {
int zzInput;
int zzAction;
// cached fields:
int zzCurrentPosL;
int zzMarkedPosL;
int zzEndReadL = zzEndRead;
char [] zzBufferL = zzBuffer;
char [] zzCMapL = ZZ_CMAP;
int [] zzTransL = ZZ_TRANS;
int [] zzRowMapL = ZZ_ROWMAP;
int [] zzAttrL = ZZ_ATTRIBUTE;
while (true) {
zzMarkedPosL = zzMarkedPos;
yychar+= zzMarkedPosL-zzStartRead;
zzAction = -1;
zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
zzState = ZZ_LEXSTATE[zzLexicalState];
// set up zzAction for empty match case:
int zzAttributes = zzAttrL[zzState];
if ( (zzAttributes & 1) == 1 ) {
zzAction = zzState;
}
zzForAction: {
while (true) {
if (zzCurrentPosL < zzEndReadL)
zzInput = zzBufferL[zzCurrentPosL++];
else if (zzAtEOF) {
zzInput = YYEOF;
break zzForAction;
}
else {
// store back cached positions
zzCurrentPos = zzCurrentPosL;
zzMarkedPos = zzMarkedPosL;
boolean eof = zzRefill();
// get translated positions and possibly new buffer
zzCurrentPosL = zzCurrentPos;
zzMarkedPosL = zzMarkedPos;
zzBufferL = zzBuffer;
zzEndReadL = zzEndRead;
if (eof) {
zzInput = YYEOF;
break zzForAction;
}
else {
zzInput = zzBufferL[zzCurrentPosL++];
}
}
int zzNext = zzTransL[ zzRowMapL[zzState] + zzCMapL[zzInput] ];
if (zzNext == -1) break zzForAction;
zzState = zzNext;
zzAttributes = zzAttrL[zzState];
if ( (zzAttributes & 1) == 1 ) {
zzAction = zzState;
zzMarkedPosL = zzCurrentPosL;
if ( (zzAttributes & 8) == 8 ) break zzForAction;
}
}
}
// store back cached position
zzMarkedPos = zzMarkedPosL;
switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
case 44:
{ numWikiTokensSeen = 0; positionInc = 1; currentTokType = CATEGORY; yybegin(CATEGORY_STATE);/* Break so we don't hit fall-through warning: */ break;
}
case 47: break;
case 37:
{ currentTokType = BOLD_ITALICS; yybegin(FIVE_SINGLE_QUOTES_STATE); /* Break so we don't hit fall-through warning: */ break;
}
case 48: break;
case 16:
{ currentTokType = HEADING; yybegin(DOUBLE_EQUALS_STATE); numWikiTokensSeen++; return currentTokType;
}
case 49: break;
case 20:
{ numBalanced = 0; numWikiTokensSeen = 0; currentTokType = EXTERNAL_LINK;yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
}
case 50: break;
case 40:
{ positionInc = 1; return ACRONYM;
}
case 51: break;
case 5:
{ positionInc = 1; /* Break so we don't hit fall-through warning: */ break;
}
case 52: break;
case 36:
{ positionInc = 1; return COMPANY;
}
case 53: break;
case 10:
{ numLinkToks = 0; positionInc = 0; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;
}
case 54: break;
case 15:
{ currentTokType = SUB_HEADING; numWikiTokensSeen = 0; yybegin(STRING); /* Break so we don't hit fall-through warning: */ break;
}
case 55: break;
case 22:
{ numWikiTokensSeen = 0; positionInc = 1; if (numBalanced == 0){numBalanced++;yybegin(TWO_SINGLE_QUOTES_STATE);} else{numBalanced = 0;}/* Break so we don't hit fall-through warning: */ break;
}
case 56: break;
case 35:
{ positionInc = 1; return NUM;
}
case 57: break;
case 33:
{ positionInc = 1; return APOSTROPHE;
}
case 58: break;
case 21:
{ yybegin(STRING); return currentTokType;/*pipe*/
}
case 59: break;
case 18:
{ /* Break so we don't hit fall-through warning: */ break;/* ignore STRING */
}
case 60: break;
case 2:
{ positionInc = 1; return ALPHANUM;
}
case 61: break;
case 1:
{ numWikiTokensSeen = 0; positionInc = 1; /* Break so we don't hit fall-through warning: */ break;
}
case 62: break;
case 17:
{ yybegin(DOUBLE_BRACE_STATE); numWikiTokensSeen = 0; return currentTokType;
}
case 63: break;
case 39:
{ numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end sub header*/
}
case 64: break;
case 29:
{ currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
}
case 65: break;
case 46:
{ numBalanced = 0; numWikiTokensSeen = 0; currentTokType = CATEGORY;yybegin(CATEGORY_STATE); /* Break so we don't hit fall-through warning: */ break;
}
case 66: break;
case 27:
{ numLinkToks = 0; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;
}
case 67: break;
case 4:
{ numWikiTokensSeen = 0; positionInc = 1; currentTokType = EXTERNAL_LINK_URL; yybegin(EXTERNAL_LINK_STATE);/* Break so we don't hit fall-through warning: */ break;
}
case 68: break;
case 38:
{ numBalanced = 0;currentTokType = ALPHANUM;yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end bold*/
}
case 69: break;
case 13:
{ currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
}
case 70: break;
case 3:
{ positionInc = 1; return CJ;
}
case 71: break;
case 45:
{ currentTokType = CATEGORY; numWikiTokensSeen = 0; yybegin(CATEGORY_STATE); /* Break so we don't hit fall-through warning: */ break;
}
case 72: break;
case 6:
{ yybegin(CATEGORY_STATE); numWikiTokensSeen++; return currentTokType;
}
case 73: break;
case 11:
{ currentTokType = BOLD; yybegin(THREE_SINGLE_QUOTES_STATE); /* Break so we don't hit fall-through warning: */ break;
}
case 74: break;
case 25:
{ numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE);/* Break so we don't hit fall-through warning: */ break;
}
case 75: break;
case 8:
{ /* Break so we don't hit fall-through warning: */ break;/* ignore */
}
case 76: break;
case 19:
{ yybegin(STRING); numWikiTokensSeen++; return currentTokType;/* STRING ALPHANUM*/
}
case 77: break;
case 43:
{ positionInc = 1; numWikiTokensSeen++; yybegin(EXTERNAL_LINK_STATE); return currentTokType;
}
case 78: break;
case 42:
{ numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end bold italics*/
}
case 79: break;
case 30:
{ yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;
}
case 80: break;
case 14:
{ yybegin(STRING); numWikiTokensSeen++; return currentTokType;
}
case 81: break;
case 9:
{ if (numLinkToks == 0){positionInc = 0;} else{positionInc = 1;} numWikiTokensSeen++; currentTokType = EXTERNAL_LINK; yybegin(EXTERNAL_LINK_STATE); numLinkToks++; return currentTokType;
}
case 82: break;
case 7:
{ yybegin(INTERNAL_LINK_STATE); numWikiTokensSeen++; return currentTokType;
}
case 83: break;
case 41:
{ positionInc = 1; return EMAIL;
}
case 84: break;
case 28:
{ currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
}
case 85: break;
case 23:
{ numWikiTokensSeen = 0; positionInc = 1; yybegin(DOUBLE_EQUALS_STATE);/* Break so we don't hit fall-through warning: */ break;
}
case 86: break;
case 34:
{ positionInc = 1; return HOST;
}
case 87: break;
case 32:
{ numBalanced = 0; numWikiTokensSeen = 0; currentTokType = INTERNAL_LINK;yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
}
case 88: break;
case 12:
{ currentTokType = ITALICS; numWikiTokensSeen++; yybegin(STRING); return currentTokType;/*italics*/
}
case 89: break;
case 24:
{ numWikiTokensSeen = 0; positionInc = 1; currentTokType = INTERNAL_LINK; yybegin(INTERNAL_LINK_STATE);/* Break so we don't hit fall-through warning: */ break;
}
case 90: break;
case 31:
{ numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end italics*/
}
case 91: break;
case 26:
{ yybegin(YYINITIAL);/* Break so we don't hit fall-through warning: */ break;
}
case 92: break;
default:
if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
zzAtEOF = true;
return YYEOF;
}
else {
zzScanError(ZZ_NO_MATCH);
}
}
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java
Override
public final boolean incrementToken() throws IOException {
if (tokens != null && tokens.hasNext()){
AttributeSource.State state = tokens.next();
restoreState(state);
return true;
}
clearAttributes();
int tokenType = scanner.getNextToken();
if (tokenType == WikipediaTokenizerImpl.YYEOF) {
return false;
}
String type = WikipediaTokenizerImpl.TOKEN_TYPES[tokenType];
if (tokenOutput == TOKENS_ONLY || untokenizedTypes.contains(type) == false){
setupToken();
} else if (tokenOutput == UNTOKENIZED_ONLY && untokenizedTypes.contains(type) == true){
collapseTokens(tokenType);
}
else if (tokenOutput == BOTH){
//collapse into a single token, add it to tokens AND output the individual tokens
//output the untokenized Token first
collapseAndSaveTokens(tokenType, type);
}
int posinc = scanner.getPositionIncrement();
if (first && posinc == 0) {
posinc = 1; // don't emit posinc=0 for the first token!
}
posIncrAtt.setPositionIncrement(posinc);
typeAtt.setType(type);
first = false;
return true;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java
private void collapseAndSaveTokens(int tokenType, String type) throws IOException {
//collapse
StringBuilder buffer = new StringBuilder(32);
int numAdded = scanner.setText(buffer);
//TODO: how to know how much whitespace to add
int theStart = scanner.yychar();
int lastPos = theStart + numAdded;
int tmpTokType;
int numSeen = 0;
List<AttributeSource.State> tmp = new ArrayList<AttributeSource.State>();
setupSavedToken(0, type);
tmp.add(captureState());
//while we can get a token and that token is the same type and we have not transitioned to a new wiki-item of the same type
while ((tmpTokType = scanner.getNextToken()) != WikipediaTokenizerImpl.YYEOF && tmpTokType == tokenType && scanner.getNumWikiTokensSeen() > numSeen){
int currPos = scanner.yychar();
//append whitespace
for (int i = 0; i < (currPos - lastPos); i++){
buffer.append(' ');
}
numAdded = scanner.setText(buffer);
setupSavedToken(scanner.getPositionIncrement(), type);
tmp.add(captureState());
numSeen++;
lastPos = currPos + numAdded;
}
//trim the buffer
// TODO: this is inefficient
String s = buffer.toString().trim();
termAtt.setEmpty().append(s);
offsetAtt.setOffset(correctOffset(theStart), correctOffset(theStart + s.length()));
flagsAtt.setFlags(UNTOKENIZED_TOKEN_FLAG);
//The way the loop is written, we will have proceeded to the next token. We need to pushback the scanner to lastPos
if (tmpTokType != WikipediaTokenizerImpl.YYEOF){
scanner.yypushback(scanner.yylength());
}
tokens = tmp.iterator();
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java
private void collapseTokens(int tokenType) throws IOException {
//collapse
StringBuilder buffer = new StringBuilder(32);
int numAdded = scanner.setText(buffer);
//TODO: how to know how much whitespace to add
int theStart = scanner.yychar();
int lastPos = theStart + numAdded;
int tmpTokType;
int numSeen = 0;
//while we can get a token and that token is the same type and we have not transitioned to a new wiki-item of the same type
while ((tmpTokType = scanner.getNextToken()) != WikipediaTokenizerImpl.YYEOF && tmpTokType == tokenType && scanner.getNumWikiTokensSeen() > numSeen){
int currPos = scanner.yychar();
//append whitespace
for (int i = 0; i < (currPos - lastPos); i++){
buffer.append(' ');
}
numAdded = scanner.setText(buffer);
numSeen++;
lastPos = currPos + numAdded;
}
//trim the buffer
// TODO: this is inefficient
String s = buffer.toString().trim();
termAtt.setEmpty().append(s);
offsetAtt.setOffset(correctOffset(theStart), correctOffset(theStart + s.length()));
flagsAtt.setFlags(UNTOKENIZED_TOKEN_FLAG);
//The way the loop is written, we will have proceeded to the next token. We need to pushback the scanner to lastPos
if (tmpTokType != WikipediaTokenizerImpl.YYEOF){
scanner.yypushback(scanner.yylength());
} else {
tokens = null;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java
Override
public void reset() throws IOException {
super.reset();
tokens = null;
scanner.reset();
first = true;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java
Override
public void reset(Reader reader) throws IOException {
super.reset(reader);
scanner.yyreset(input);
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java
Override
public void end() throws IOException {
// set final offset
final int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());
this.offsetAtt.setOffset(finalOffset, finalOffset);
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java
Override
public boolean incrementToken() throws IOException {
clearAttributes();
// if we are just starting, read the whole input
if (!started) {
started = true;
gramSize = minGram;
char[] chars = new char[1024];
charsRead = 0;
// TODO: refactor to a shared readFully somewhere:
while (charsRead < chars.length) {
final int inc = input.read(chars, charsRead, chars.length-charsRead);
if (inc == -1) {
break;
}
charsRead += inc;
}
inStr = new String(chars, 0, charsRead).trim(); // remove any trailing empty strings
if (charsRead == chars.length) {
// Read extra throwaway chars so that on end() we
// report the correct offset:
char[] throwaway = new char[1024];
while(true) {
final int inc = input.read(throwaway, 0, throwaway.length);
if (inc == -1) {
break;
}
charsRead += inc;
}
}
inLen = inStr.length();
if (inLen == 0) {
return false;
}
}
// if the remaining input is too short, we can't generate any n-grams
if (gramSize > inLen) {
return false;
}
// if we have hit the end of our n-gram size range, quit
if (gramSize > maxGram) {
return false;
}
// grab gramSize chars from front or back
int start = side == Side.FRONT ? 0 : inLen - gramSize;
int end = start + gramSize;
termAtt.setEmpty().append(inStr, start, end);
offsetAtt.setOffset(correctOffset(start), correctOffset(end));
gramSize++;
return true;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java
Override
public void reset() throws IOException {
super.reset();
started = false;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
Override
public final boolean incrementToken() throws IOException {
while (true) {
if (curTermBuffer == null) {
if (!input.incrementToken()) {
return false;
} else {
curTermBuffer = termAtt.buffer().clone();
curTermLength = termAtt.length();
curGramSize = minGram;
tokStart = offsetAtt.startOffset();
tokEnd = offsetAtt.endOffset();
// if length by start + end offsets doesn't match the term text then assume
// this is a synonym and don't adjust the offsets.
hasIllegalOffsets = (tokStart + curTermLength) != tokEnd;
}
}
if (curGramSize <= maxGram) {
if (! (curGramSize > curTermLength // if the remaining input is too short, we can't generate any n-grams
|| curGramSize > maxGram)) { // if we have hit the end of our n-gram size range, quit
// grab gramSize chars from front or back
int start = side == Side.FRONT ? 0 : curTermLength - curGramSize;
int end = start + curGramSize;
clearAttributes();
if (hasIllegalOffsets) {
offsetAtt.setOffset(tokStart, tokEnd);
} else {
offsetAtt.setOffset(tokStart + start, tokStart + end);
}
termAtt.copyBuffer(curTermBuffer, start, curGramSize);
curGramSize++;
return true;
}
}
curTermBuffer = null;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
Override
public void reset() throws IOException {
super.reset();
curTermBuffer = null;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
Override
public boolean incrementToken() throws IOException {
clearAttributes();
if (!started) {
started = true;
gramSize = minGram;
char[] chars = new char[1024];
charsRead = 0;
// TODO: refactor to a shared readFully somewhere:
while (charsRead < chars.length) {
int inc = input.read(chars, charsRead, chars.length-charsRead);
if (inc == -1) {
break;
}
charsRead += inc;
}
inStr = new String(chars, 0, charsRead).trim(); // remove any trailing empty strings
if (charsRead == chars.length) {
// Read extra throwaway chars so that on end() we
// report the correct offset:
char[] throwaway = new char[1024];
while(true) {
final int inc = input.read(throwaway, 0, throwaway.length);
if (inc == -1) {
break;
}
charsRead += inc;
}
}
inLen = inStr.length();
if (inLen == 0) {
return false;
}
}
if (pos+gramSize > inLen) { // if we hit the end of the string
pos = 0; // reset to beginning of string
gramSize++; // increase n-gram size
if (gramSize > maxGram) // we are done
return false;
if (pos+gramSize > inLen)
return false;
}
int oldPos = pos;
pos++;
termAtt.setEmpty().append(inStr, oldPos, oldPos+gramSize);
offsetAtt.setOffset(correctOffset(oldPos), correctOffset(oldPos+gramSize));
return true;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
Override
public void reset() throws IOException {
super.reset();
started = false;
pos = 0;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
Override
public final boolean incrementToken() throws IOException {
while (true) {
if (curTermBuffer == null) {
if (!input.incrementToken()) {
return false;
} else {
curTermBuffer = termAtt.buffer().clone();
curTermLength = termAtt.length();
curGramSize = minGram;
curPos = 0;
tokStart = offsetAtt.startOffset();
tokEnd = offsetAtt.endOffset();
// if length by start + end offsets doesn't match the term text then assume
// this is a synonym and don't adjust the offsets.
hasIllegalOffsets = (tokStart + curTermLength) != tokEnd;
}
}
while (curGramSize <= maxGram) {
while (curPos+curGramSize <= curTermLength) { // while there is input
clearAttributes();
termAtt.copyBuffer(curTermBuffer, curPos, curGramSize);
if (hasIllegalOffsets) {
offsetAtt.setOffset(tokStart, tokEnd);
} else {
offsetAtt.setOffset(tokStart + curPos, tokStart + curPos + curGramSize);
}
curPos++;
return true;
}
curGramSize++; // increase n-gram size
curPos = 0;
}
curTermBuffer = null;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
Override
public void reset() throws IOException {
super.reset();
curTermBuffer = null;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
Override
public final boolean incrementToken() throws IOException {
clearAttributes();
int posIncr = 1;
while(true) {
int tokenType = scanner.getNextToken();
if (tokenType == StandardTokenizerInterface.YYEOF) {
return false;
}
if (scanner.yylength() <= maxTokenLength) {
posIncrAtt.setPositionIncrement(posIncr);
scanner.getText(termAtt);
final int start = scanner.yychar();
offsetAtt.setOffset(correctOffset(start), correctOffset(start+termAtt.length()));
typeAtt.setType(StandardTokenizer.TOKEN_TYPES[tokenType]);
return true;
} else
// When we skip a too-long term, we still increment the
// position increment
posIncr++;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
Override
public void reset(Reader reader) throws IOException {
super.reset(reader);
scanner.yyreset(reader);
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java
private boolean zzRefill() throws java.io.IOException {
/* first: make room (if you can) */
if (zzStartRead > 0) {
System.arraycopy(zzBuffer, zzStartRead,
zzBuffer, 0,
zzEndRead-zzStartRead);
/* translate stored positions */
zzEndRead-= zzStartRead;
zzCurrentPos-= zzStartRead;
zzMarkedPos-= zzStartRead;
zzStartRead = 0;
}
/* is the buffer big enough? */
if (zzCurrentPos >= zzBuffer.length) {
/* if not: blow it up */
char newBuffer[] = new char[zzCurrentPos*2];
System.arraycopy(zzBuffer, 0, newBuffer, 0, zzBuffer.length);
zzBuffer = newBuffer;
}
/* finally: fill the buffer with new input */
int numRead = zzReader.read(zzBuffer, zzEndRead,
zzBuffer.length-zzEndRead);
if (numRead > 0) {
zzEndRead+= numRead;
return false;
}
// unlikely but not impossible: read 0 characters, but not at end of stream
if (numRead == 0) {
int c = zzReader.read();
if (c == -1) {
return true;
} else {
zzBuffer[zzEndRead++] = (char) c;
return false;
}
}
// numRead < 0
return true;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java
public final void yyclose() throws java.io.IOException {
zzAtEOF = true; /* indicate end of file */
zzEndRead = zzStartRead; /* invalidate buffer */
if (zzReader != null)
zzReader.close();
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java
public int getNextToken() throws java.io.IOException {
int zzInput;
int zzAction;
// cached fields:
int zzCurrentPosL;
int zzMarkedPosL;
int zzEndReadL = zzEndRead;
char [] zzBufferL = zzBuffer;
char [] zzCMapL = ZZ_CMAP;
int [] zzTransL = ZZ_TRANS;
int [] zzRowMapL = ZZ_ROWMAP;
int [] zzAttrL = ZZ_ATTRIBUTE;
while (true) {
zzMarkedPosL = zzMarkedPos;
yychar+= zzMarkedPosL-zzStartRead;
zzAction = -1;
zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
zzState = ZZ_LEXSTATE[zzLexicalState];
// set up zzAction for empty match case:
int zzAttributes = zzAttrL[zzState];
if ( (zzAttributes & 1) == 1 ) {
zzAction = zzState;
}
zzForAction: {
while (true) {
if (zzCurrentPosL < zzEndReadL)
zzInput = zzBufferL[zzCurrentPosL++];
else if (zzAtEOF) {
zzInput = YYEOF;
break zzForAction;
}
else {
// store back cached positions
zzCurrentPos = zzCurrentPosL;
zzMarkedPos = zzMarkedPosL;
boolean eof = zzRefill();
// get translated positions and possibly new buffer
zzCurrentPosL = zzCurrentPos;
zzMarkedPosL = zzMarkedPos;
zzBufferL = zzBuffer;
zzEndReadL = zzEndRead;
if (eof) {
zzInput = YYEOF;
break zzForAction;
}
else {
zzInput = zzBufferL[zzCurrentPosL++];
}
}
int zzNext = zzTransL[ zzRowMapL[zzState] + zzCMapL[zzInput] ];
if (zzNext == -1) break zzForAction;
zzState = zzNext;
zzAttributes = zzAttrL[zzState];
if ( (zzAttributes & 1) == 1 ) {
zzAction = zzState;
zzMarkedPosL = zzCurrentPosL;
if ( (zzAttributes & 8) == 8 ) break zzForAction;
}
}
}
// store back cached position
zzMarkedPos = zzMarkedPosL;
switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
case 10:
{ return EMAIL;
}
case 11: break;
case 2:
{ return ALPHANUM;
}
case 12: break;
case 4:
{ return HOST;
}
case 13: break;
case 8:
{ return ACRONYM_DEP;
}
case 14: break;
case 5:
{ return NUM;
}
case 15: break;
case 1:
{ /* Break so we don't hit fall-through warning: */ break;/* ignore */
}
case 16: break;
case 9:
{ return ACRONYM;
}
case 17: break;
case 7:
{ return COMPANY;
}
case 18: break;
case 6:
{ return APOSTROPHE;
}
case 19: break;
case 3:
{ return CJ;
}
case 20: break;
default:
if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
zzAtEOF = true;
return YYEOF;
}
else {
zzScanError(ZZ_NO_MATCH);
}
}
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java
private boolean zzRefill() throws java.io.IOException {
/* first: make room (if you can) */
if (zzStartRead > 0) {
System.arraycopy(zzBuffer, zzStartRead,
zzBuffer, 0,
zzEndRead-zzStartRead);
/* translate stored positions */
zzEndRead-= zzStartRead;
zzCurrentPos-= zzStartRead;
zzMarkedPos-= zzStartRead;
zzStartRead = 0;
}
/* is the buffer big enough? */
if (zzCurrentPos >= zzBuffer.length) {
/* if not: blow it up */
char newBuffer[] = new char[zzCurrentPos*2];
System.arraycopy(zzBuffer, 0, newBuffer, 0, zzBuffer.length);
zzBuffer = newBuffer;
}
/* finally: fill the buffer with new input */
int numRead = zzReader.read(zzBuffer, zzEndRead,
zzBuffer.length-zzEndRead);
if (numRead > 0) {
zzEndRead+= numRead;
return false;
}
// unlikely but not impossible: read 0 characters, but not at end of stream
if (numRead == 0) {
int c = zzReader.read();
if (c == -1) {
return true;
} else {
zzBuffer[zzEndRead++] = (char) c;
return false;
}
}
// numRead < 0
return true;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java
public final void yyclose() throws java.io.IOException {
zzAtEOF = true; /* indicate end of file */
zzEndRead = zzStartRead; /* invalidate buffer */
if (zzReader != null)
zzReader.close();
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java
public int getNextToken() throws java.io.IOException {
int zzInput;
int zzAction;
// cached fields:
int zzCurrentPosL;
int zzMarkedPosL;
int zzEndReadL = zzEndRead;
char [] zzBufferL = zzBuffer;
char [] zzCMapL = ZZ_CMAP;
int [] zzTransL = ZZ_TRANS;
int [] zzRowMapL = ZZ_ROWMAP;
int [] zzAttrL = ZZ_ATTRIBUTE;
while (true) {
zzMarkedPosL = zzMarkedPos;
yychar+= zzMarkedPosL-zzStartRead;
zzAction = -1;
zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
zzState = ZZ_LEXSTATE[zzLexicalState];
// set up zzAction for empty match case:
int zzAttributes = zzAttrL[zzState];
if ( (zzAttributes & 1) == 1 ) {
zzAction = zzState;
}
zzForAction: {
while (true) {
if (zzCurrentPosL < zzEndReadL)
zzInput = zzBufferL[zzCurrentPosL++];
else if (zzAtEOF) {
zzInput = YYEOF;
break zzForAction;
}
else {
// store back cached positions
zzCurrentPos = zzCurrentPosL;
zzMarkedPos = zzMarkedPosL;
boolean eof = zzRefill();
// get translated positions and possibly new buffer
zzCurrentPosL = zzCurrentPos;
zzMarkedPosL = zzMarkedPos;
zzBufferL = zzBuffer;
zzEndReadL = zzEndRead;
if (eof) {
zzInput = YYEOF;
break zzForAction;
}
else {
zzInput = zzBufferL[zzCurrentPosL++];
}
}
int zzNext = zzTransL[ zzRowMapL[zzState] + zzCMapL[zzInput] ];
if (zzNext == -1) break zzForAction;
zzState = zzNext;
zzAttributes = zzAttrL[zzState];
if ( (zzAttributes & 1) == 1 ) {
zzAction = zzState;
zzMarkedPosL = zzCurrentPosL;
if ( (zzAttributes & 8) == 8 ) break zzForAction;
}
}
}
// store back cached position
zzMarkedPos = zzMarkedPosL;
switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
case 11:
// lookahead expression with fixed base length
zzMarkedPos = zzStartRead + 6;
{ return WORD_TYPE;
}
case 12: break;
case 2:
{ return WORD_TYPE;
}
case 13: break;
case 5:
{ return SOUTH_EAST_ASIAN_TYPE;
}
case 14: break;
case 1:
{ /* Break so we don't hit fall-through warning: */ break;/* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */
}
case 15: break;
case 10:
{ return URL_TYPE;
}
case 16: break;
case 9:
{ return EMAIL_TYPE;
}
case 17: break;
case 4:
{ return KATAKANA_TYPE;
}
case 18: break;
case 6:
{ return IDEOGRAPHIC_TYPE;
}
case 19: break;
case 8:
{ return HANGUL_TYPE;
}
case 20: break;
case 3:
{ return NUMERIC_TYPE;
}
case 21: break;
case 7:
{ return HIRAGANA_TYPE;
}
case 22: break;
default:
if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
zzAtEOF = true;
{
return StandardTokenizerInterface.YYEOF;
}
}
else {
zzScanError(ZZ_NO_MATCH);
}
}
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
Override
protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
final ClassicTokenizer src = new ClassicTokenizer(matchVersion, reader);
src.setMaxTokenLength(maxTokenLength);
TokenStream tok = new ClassicFilter(src);
tok = new LowerCaseFilter(matchVersion, tok);
tok = new StopFilter(matchVersion, tok, stopwords);
return new TokenStreamComponents(src, tok) {
@Override
protected void reset(final Reader reader) throws IOException {
src.setMaxTokenLength(ClassicAnalyzer.this.maxTokenLength);
super.reset(reader);
}
};
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
Override
protected void reset(final Reader reader) throws IOException {
src.setMaxTokenLength(ClassicAnalyzer.this.maxTokenLength);
super.reset(reader);
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
private boolean zzRefill() throws java.io.IOException {
/* first: make room (if you can) */
if (zzStartRead > 0) {
System.arraycopy(zzBuffer, zzStartRead,
zzBuffer, 0,
zzEndRead-zzStartRead);
/* translate stored positions */
zzEndRead-= zzStartRead;
zzCurrentPos-= zzStartRead;
zzMarkedPos-= zzStartRead;
zzStartRead = 0;
}
/* is the buffer big enough? */
if (zzCurrentPos >= zzBuffer.length) {
/* if not: blow it up */
char newBuffer[] = new char[zzCurrentPos*2];
System.arraycopy(zzBuffer, 0, newBuffer, 0, zzBuffer.length);
zzBuffer = newBuffer;
}
/* finally: fill the buffer with new input */
int numRead = zzReader.read(zzBuffer, zzEndRead,
zzBuffer.length-zzEndRead);
if (numRead > 0) {
zzEndRead+= numRead;
return false;
}
// unlikely but not impossible: read 0 characters, but not at end of stream
if (numRead == 0) {
int c = zzReader.read();
if (c == -1) {
return true;
} else {
zzBuffer[zzEndRead++] = (char) c;
return false;
}
}
// numRead < 0
return true;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
public final void yyclose() throws java.io.IOException {
zzAtEOF = true; /* indicate end of file */
zzEndRead = zzStartRead; /* invalidate buffer */
if (zzReader != null)
zzReader.close();
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
public int getNextToken() throws java.io.IOException {
int zzInput;
int zzAction;
// cached fields:
int zzCurrentPosL;
int zzMarkedPosL;
int zzEndReadL = zzEndRead;
char [] zzBufferL = zzBuffer;
char [] zzCMapL = ZZ_CMAP;
int [] zzTransL = ZZ_TRANS;
int [] zzRowMapL = ZZ_ROWMAP;
int [] zzAttrL = ZZ_ATTRIBUTE;
while (true) {
zzMarkedPosL = zzMarkedPos;
yychar+= zzMarkedPosL-zzStartRead;
zzAction = -1;
zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
zzState = ZZ_LEXSTATE[zzLexicalState];
// set up zzAction for empty match case:
int zzAttributes = zzAttrL[zzState];
if ( (zzAttributes & 1) == 1 ) {
zzAction = zzState;
}
zzForAction: {
while (true) {
if (zzCurrentPosL < zzEndReadL)
zzInput = zzBufferL[zzCurrentPosL++];
else if (zzAtEOF) {
zzInput = YYEOF;
break zzForAction;
}
else {
// store back cached positions
zzCurrentPos = zzCurrentPosL;
zzMarkedPos = zzMarkedPosL;
boolean eof = zzRefill();
// get translated positions and possibly new buffer
zzCurrentPosL = zzCurrentPos;
zzMarkedPosL = zzMarkedPos;
zzBufferL = zzBuffer;
zzEndReadL = zzEndRead;
if (eof) {
zzInput = YYEOF;
break zzForAction;
}
else {
zzInput = zzBufferL[zzCurrentPosL++];
}
}
int zzNext = zzTransL[ zzRowMapL[zzState] + zzCMapL[zzInput] ];
if (zzNext == -1) break zzForAction;
zzState = zzNext;
zzAttributes = zzAttrL[zzState];
if ( (zzAttributes & 1) == 1 ) {
zzAction = zzState;
zzMarkedPosL = zzCurrentPosL;
if ( (zzAttributes & 8) == 8 ) break zzForAction;
}
}
}
// store back cached position
zzMarkedPos = zzMarkedPosL;
switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
case 2:
{ return WORD_TYPE;
}
case 9: break;
case 5:
{ return SOUTH_EAST_ASIAN_TYPE;
}
case 10: break;
case 4:
{ return KATAKANA_TYPE;
}
case 11: break;
case 6:
{ return IDEOGRAPHIC_TYPE;
}
case 12: break;
case 8:
{ return HANGUL_TYPE;
}
case 13: break;
case 3:
{ return NUMERIC_TYPE;
}
case 14: break;
case 7:
{ return HIRAGANA_TYPE;
}
case 15: break;
case 1:
{ /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */
}
case 16: break;
default:
if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
zzAtEOF = true;
{
return StandardTokenizerInterface.YYEOF;
}
}
else {
zzScanError(ZZ_NO_MATCH);
}
}
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
Override
public final boolean incrementToken() throws IOException {
return input.incrementToken(); // TODO: add some niceties for the new grammar
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
Override
public final boolean incrementToken() throws IOException {
clearAttributes();
int posIncr = 1;
while(true) {
int tokenType = scanner.getNextToken();
if (tokenType == StandardTokenizerInterface.YYEOF) {
return false;
}
if (scanner.yylength() <= maxTokenLength) {
posIncrAtt.setPositionIncrement(posIncr);
scanner.getText(termAtt);
final int start = scanner.yychar();
offsetAtt.setOffset(correctOffset(start), correctOffset(start+termAtt.length()));
typeAtt.setType(TOKEN_TYPES[tokenType]);
return true;
} else
// When we skip a too-long term, we still increment the
// position increment
posIncr++;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
Override
public void reset(Reader reader) throws IOException {
super.reset(reader);
scanner.yyreset(reader);
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java
Override
protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
final UAX29URLEmailTokenizer src = new UAX29URLEmailTokenizer(matchVersion, reader);
src.setMaxTokenLength(maxTokenLength);
TokenStream tok = new StandardFilter(matchVersion, src);
tok = new LowerCaseFilter(matchVersion, tok);
tok = new StopFilter(matchVersion, tok, stopwords);
return new TokenStreamComponents(src, tok) {
@Override
protected void reset(final Reader reader) throws IOException {
src.setMaxTokenLength(UAX29URLEmailAnalyzer.this.maxTokenLength);
super.reset(reader);
}
};
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java
Override
protected void reset(final Reader reader) throws IOException {
src.setMaxTokenLength(UAX29URLEmailAnalyzer.this.maxTokenLength);
super.reset(reader);
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicFilter.java
Override
public final boolean incrementToken() throws java.io.IOException {
if (!input.incrementToken()) {
return false;
}
final char[] buffer = termAtt.buffer();
final int bufferLength = termAtt.length();
final String type = typeAtt.type();
if (type == APOSTROPHE_TYPE && // remove 's
bufferLength >= 2 &&
buffer[bufferLength-2] == '\'' &&
(buffer[bufferLength-1] == 's' || buffer[bufferLength-1] == 'S')) {
// Strip last 2 characters off
termAtt.setLength(bufferLength - 2);
} else if (type == ACRONYM_TYPE) { // remove dots
int upto = 0;
for(int i=0;i<bufferLength;i++) {
char c = buffer[i];
if (c != '.')
buffer[upto++] = c;
}
termAtt.setLength(upto);
}
return true;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
Override
protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
src.setMaxTokenLength(maxTokenLength);
TokenStream tok = new StandardFilter(matchVersion, src);
tok = new LowerCaseFilter(matchVersion, tok);
tok = new StopFilter(matchVersion, tok, stopwords);
return new TokenStreamComponents(src, tok) {
@Override
protected void reset(final Reader reader) throws IOException {
src.setMaxTokenLength(StandardAnalyzer.this.maxTokenLength);
super.reset(reader);
}
};
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
Override
protected void reset(final Reader reader) throws IOException {
src.setMaxTokenLength(StandardAnalyzer.this.maxTokenLength);
super.reset(reader);
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
Override
public final boolean incrementToken() throws IOException {
clearAttributes();
int posIncr = 1;
while(true) {
int tokenType = scanner.getNextToken();
if (tokenType == StandardTokenizerInterface.YYEOF) {
return false;
}
if (scanner.yylength() <= maxTokenLength) {
posIncrAtt.setPositionIncrement(posIncr);
scanner.getText(termAtt);
final int start = scanner.yychar();
offsetAtt.setOffset(correctOffset(start), correctOffset(start+termAtt.length()));
if (tokenType == ClassicTokenizer.ACRONYM_DEP) {
typeAtt.setType(ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.HOST]);
termAtt.setLength(termAtt.length() - 1); // remove extra '.'
} else {
typeAtt.setType(ClassicTokenizer.TOKEN_TYPES[tokenType]);
}
return true;
} else
// When we skip a too-long term, we still increment the
// position increment
posIncr++;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
Override
public void reset(Reader reader) throws IOException {
super.reset(reader);
scanner.yyreset(reader);
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java
Override
public final boolean incrementToken() throws IOException {
if (!tokens.isEmpty()) {
assert current != null;
CompoundToken token = tokens.removeFirst();
restoreState(current); // keep all other attributes untouched
termAtt.setEmpty().append(token.txt);
offsetAtt.setOffset(token.startOffset, token.endOffset);
posIncAtt.setPositionIncrement(0);
return true;
}
current = null; // not really needed, but for safety
if (input.incrementToken()) {
// Only words longer than minWordSize get processed
if (termAtt.length() >= this.minWordSize) {
decompose();
// only capture the state if we really need it for producing new tokens
if (!tokens.isEmpty()) {
current = captureState();
}
}
// return original token:
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java
Override
public void reset() throws IOException {
super.reset();
tokens.clear();
current = null;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
final String term = termAtt.toString();
// Check the exclusion table.
if (!keywordAttr.isKeyword() && (exclusions == null || !exclusions.contains(term))) {
final String s = stemmer.stem(term);
// If not stemmed, don't waste the time adjusting the token.
if ((s != null) && !s.equals(term))
termAtt.setEmpty().append(s);
}
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java
Override
public boolean incrementToken() throws IOException {
if (!input.incrementToken()) {
return false;
}
final char[] buffer = termAtt.buffer();
final int bufferLength = termAtt.length();
if (bufferLength >= 2 &&
(buffer[bufferLength-2] == '\'' ||
buffer[bufferLength-2] == '\u2019' ||
buffer[bufferLength-2] == '\uFF07') &&
(buffer[bufferLength-1] == 's' || buffer[bufferLength-1] == 'S')) {
termAtt.setLength(bufferLength - 2); // Strip last 2 characters off
}
return true;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishMinimalStemFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (!keywordAttr.isKeyword()) {
final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
termAtt.setLength(newlen);
}
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemFilter.java
Override
public boolean incrementToken() throws IOException {
if (!input.incrementToken())
return false;
char[] term = termAttribute.buffer();
int len = termAttribute.length();
if ((!keywordAtt.isKeyword()) && stemmer.stem(term, len)) {
termAttribute.setEmpty().append(stemmer.asCharSequence());
}
return true;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/en/PorterStemFilter.java
Override
public final boolean incrementToken() throws IOException {
if (!input.incrementToken())
return false;
if ((!keywordAttr.isKeyword()) && stemmer.stem(termAtt.buffer(), 0, termAtt.length()))
termAtt.copyBuffer(stemmer.getResultBuffer(), 0, stemmer.getResultLength());
return true;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (!keywordAttr.isKeyword()) {
final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
termAtt.setLength(newlen);
}
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java
Override
public final boolean incrementToken() throws IOException {
if (input.incrementToken()) {
char[] termBuffer = termAtt.buffer();
int termLength = termAtt.length();
int minPoz = Integer.MAX_VALUE;
for (int i = 0; i < apostrophes.length; i++) {
char apos = apostrophes[i];
// The equivalent of String.indexOf(ch)
for (int poz = 0; poz < termLength ; poz++) {
if (termBuffer[poz] == apos) {
minPoz = Math.min(poz, minPoz);
break;
}
}
}
// An apostrophe has been found. If the prefix is an article strip it off.
if (minPoz != Integer.MAX_VALUE
&& articles.contains(termAtt.buffer(), 0, minPoz)) {
termAtt.copyBuffer(termAtt.buffer(), minPoz + 1, termAtt.length() - (minPoz + 1));
}
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (!keywordAttr.isKeyword()) {
final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
termAtt.setLength(newlen);
}
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizationFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
int newlen = normalizer.normalize(termAtt.buffer(), termAtt.length());
termAtt.setLength(newlen);
return true;
}
return false;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if(!keywordAttr.isKeyword()) {
final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
termAtt.setLength(newlen);
}
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (!keywordAttr.isKeyword()) {
final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
termAtt.setLength(newlen);
}
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianCharFilter.java
Override
public int read(char[] cbuf, int off, int len) throws IOException {
final int charsRead = super.read(cbuf, off, len);
if (charsRead > 0) {
final int end = off + charsRead;
while (off < end) {
if (cbuf[off] == '\u200C')
cbuf[off] = ' ';
off++;
}
}
return charsRead;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
final int newlen = normalizer.normalize(termAtt.buffer(),
termAtt.length());
termAtt.setLength(newlen);
return true;
}
return false;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianMinimalStemFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (!keywordAttr.isKeyword()) {
final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
termAtt.setLength(newlen);
}
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianStemFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (!keywordAttr.isKeyword()) {
// this stemmer increases word length by 1: worst case '*çom' -> '*ción'
final int len = termAtt.length();
final int newlen = stemmer.stem(termAtt.resizeBuffer(len+1), len);
termAtt.setLength(newlen);
}
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (!keywordAttr.isKeyword()) {
final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
termAtt.setLength(newlen);
}
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java
Override
protected boolean accept() throws IOException {
return !stopWords.contains(termAtt.buffer(), 0, termAtt.length());
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java
Override
public final boolean incrementToken() throws IOException {
if (!done) {
clearAttributes();
done = true;
int upto = 0;
char[] buffer = termAtt.buffer();
while (true) {
final int length = input.read(buffer, upto, buffer.length-upto);
if (length == -1) break;
upto += length;
if (upto == buffer.length)
buffer = termAtt.resizeBuffer(1+buffer.length);
}
termAtt.setLength(upto);
finalOffset = correctOffset(upto);
offsetAtt.setOffset(correctOffset(0), finalOffset);
return true;
}
return false;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java
Override
public void reset(Reader input) throws IOException {
super.reset(input);
this.done = false;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java
Override
public final boolean incrementToken() throws IOException {
if (input.incrementToken()) {
final char[] buffer = termAtt.buffer();
final int length = termAtt.length();
for (int i = 0; i < length;) {
i += Character.toChars(
Character.toLowerCase(
charUtils.codePointAt(buffer, i)), buffer, i);
}
return true;
} else
return false;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilter.java
Override
protected boolean accept() throws IOException {
return useWhiteList == stopTypes.contains(typeAttribute.type());
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StopwordAnalyzerBase.java
protected static CharArraySet loadStopwordSet(final boolean ignoreCase,
final Class<? extends Analyzer> aClass, final String resource,
final String comment) throws IOException {
Reader reader = null;
try {
reader = IOUtils.getDecodingReader(aClass.getResourceAsStream(resource), IOUtils.CHARSET_UTF_8);
return WordlistLoader.getWordSet(reader, comment, new CharArraySet(Version.LUCENE_CURRENT, 16, ignoreCase));
} finally {
IOUtils.close(reader);
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StopwordAnalyzerBase.java
protected static CharArraySet loadStopwordSet(File stopwords,
Version matchVersion) throws IOException {
Reader reader = null;
try {
reader = IOUtils.getDecodingReader(stopwords, IOUtils.CHARSET_UTF_8);
return WordlistLoader.getWordSet(reader, matchVersion);
} finally {
IOUtils.close(reader);
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StopwordAnalyzerBase.java
protected static CharArraySet loadStopwordSet(Reader stopwords,
Version matchVersion) throws IOException {
try {
return WordlistLoader.getWordSet(stopwords, matchVersion);
} finally {
IOUtils.close(stopwords);
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
Override
public final boolean incrementToken() throws IOException {
clearAttributes();
int length = 0;
int start = -1; // this variable is always initialized
int end = -1;
char[] buffer = termAtt.buffer();
while (true) {
if (bufferIndex >= dataLen) {
offset += dataLen;
if(!charUtils.fill(ioBuffer, input)) { // read supplementary char aware with CharacterUtils
dataLen = 0; // so next offset += dataLen won't decrement offset
if (length > 0) {
break;
} else {
finalOffset = correctOffset(offset);
return false;
}
}
dataLen = ioBuffer.getLength();
bufferIndex = 0;
}
// use CharacterUtils here to support < 3.1 UTF-16 code unit behavior if the char based methods are gone
final int c = charUtils.codePointAt(ioBuffer.getBuffer(), bufferIndex);
final int charCount = Character.charCount(c);
bufferIndex += charCount;
if (isTokenChar(c)) { // if it's a token char
if (length == 0) { // start of token
assert start == -1;
start = offset + bufferIndex - charCount;
end = start;
} else if (length >= buffer.length-1) { // check if a supplementary could run out of bounds
buffer = termAtt.resizeBuffer(2+length); // make sure a supplementary fits in the buffer
}
end += charCount;
length += Character.toChars(normalize(c), buffer, length); // buffer it, normalized
if (length >= MAX_WORD_LEN) // buffer overflow! make sure to check for >= surrogate pair could break == test
break;
} else if (length > 0) // at non-Letter w/ chars
break; // return 'em
}
termAtt.setLength(length);
assert start != -1;
offsetAtt.setOffset(correctOffset(start), finalOffset = correctOffset(end));
return true;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
Override
public void reset(Reader input) throws IOException {
super.reset(input);
bufferIndex = 0;
offset = 0;
dataLen = 0;
finalOffset = 0;
ioBuffer.reset(); // make sure to reset the IO buffer!!
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java
protected CharArraySet getWordSet(ResourceLoader loader,
String wordFiles, boolean ignoreCase) throws IOException {
assureMatchVersion();
List<String> files = splitFileNames(wordFiles);
CharArraySet words = null;
if (files.size() > 0) {
// default stopwords list has 35 or so words, but maybe don't make it that
// big to start
words = new CharArraySet(luceneMatchVersion,
files.size() * 10, ignoreCase);
for (String file : files) {
List<String> wlist = loader.getLines(file.trim());
words.addAll(StopFilter.makeStopSet(luceneMatchVersion, wlist,
ignoreCase));
}
}
return words;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java
protected CharArraySet getSnowballWordSet(ResourceLoader loader,
String wordFiles, boolean ignoreCase) throws IOException {
assureMatchVersion();
List<String> files = splitFileNames(wordFiles);
CharArraySet words = null;
if (files.size() > 0) {
// default stopwords list has 35 or so words, but maybe don't make it that
// big to start
words = new CharArraySet(luceneMatchVersion,
files.size() * 10, ignoreCase);
for (String file : files) {
InputStream stream = null;
Reader reader = null;
try {
stream = loader.openResource(file.trim());
CharsetDecoder decoder = IOUtils.CHARSET_UTF_8.newDecoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT);
reader = new InputStreamReader(stream, decoder);
WordlistLoader.getSnowballWordSet(reader, words);
} finally {
IOUtils.closeWhileHandlingException(reader, stream);
}
}
}
return words;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java
Override
public boolean fill(final CharacterBuffer buffer, final Reader reader) throws IOException {
final char[] charBuffer = buffer.buffer;
buffer.offset = 0;
final int offset;
// Install the previously saved ending high surrogate:
if (buffer.lastTrailingHighSurrogate != 0) {
charBuffer[0] = buffer.lastTrailingHighSurrogate;
offset = 1;
} else {
offset = 0;
}
final int read = reader.read(charBuffer,
offset,
charBuffer.length - offset);
if (read == -1) {
buffer.length = offset;
buffer.lastTrailingHighSurrogate = 0;
return offset != 0;
}
assert read > 0;
buffer.length = read + offset;
// If we read only a single char, and that char was a
// high surrogate, read again:
if (buffer.length == 1
&& Character.isHighSurrogate(charBuffer[buffer.length - 1])) {
final int read2 = reader.read(charBuffer,
1,
charBuffer.length - 1);
if (read2 == -1) {
// NOTE: mal-formed input (ended on a high
// surrogate)! Consumer must deal with it...
return true;
}
assert read2 > 0;
buffer.length += read2;
}
if (buffer.length > 1
&& Character.isHighSurrogate(charBuffer[buffer.length - 1])) {
buffer.lastTrailingHighSurrogate = charBuffer[--buffer.length];
} else {
buffer.lastTrailingHighSurrogate = 0;
}
return true;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java
Override
public boolean fill(final CharacterBuffer buffer, final Reader reader) throws IOException {
buffer.offset = 0;
final int read = reader.read(buffer.buffer);
if(read == -1)
return false;
buffer.length = read;
return true;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java
public static CharArraySet getWordSet(Reader reader, CharArraySet result) throws IOException {
BufferedReader br = null;
try {
br = getBufferedReader(reader);
String word = null;
while ((word = br.readLine()) != null) {
result.add(word.trim());
}
}
finally {
IOUtils.close(br);
}
return result;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java
public static CharArraySet getWordSet(Reader reader, Version matchVersion) throws IOException {
return getWordSet(reader, new CharArraySet(matchVersion, INITITAL_CAPACITY, false));
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java
public static CharArraySet getWordSet(Reader reader, String comment, Version matchVersion) throws IOException {
return getWordSet(reader, comment, new CharArraySet(matchVersion, INITITAL_CAPACITY, false));
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java
public static CharArraySet getWordSet(Reader reader, String comment, CharArraySet result) throws IOException {
BufferedReader br = null;
try {
br = getBufferedReader(reader);
String word = null;
while ((word = br.readLine()) != null) {
if (word.startsWith(comment) == false){
result.add(word.trim());
}
}
}
finally {
IOUtils.close(br);
}
return result;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java
public static CharArraySet getSnowballWordSet(Reader reader, CharArraySet result)
throws IOException {
BufferedReader br = null;
try {
br = getBufferedReader(reader);
String line = null;
while ((line = br.readLine()) != null) {
int comment = line.indexOf('|');
if (comment >= 0) line = line.substring(0, comment);
String words[] = line.split("\\s+");
for (int i = 0; i < words.length; i++)
if (words[i].length() > 0) result.add(words[i]);
}
} finally {
IOUtils.close(br);
}
return result;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java
public static CharArraySet getSnowballWordSet(Reader reader, Version matchVersion) throws IOException {
return getSnowballWordSet(reader, new CharArraySet(matchVersion, INITITAL_CAPACITY, false));
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java
public static CharArrayMap<String> getStemDict(Reader reader, CharArrayMap<String> result) throws IOException {
BufferedReader br = null;
try {
br = getBufferedReader(reader);
String line;
while ((line = br.readLine()) != null) {
String[] wordstem = line.split("\t", 2);
result.put(wordstem[0], wordstem[1]);
}
} finally {
IOUtils.close(br);
}
return result;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/util/FilteringTokenFilter.java
Override
public final boolean incrementToken() throws IOException {
if (enablePositionIncrements) {
int skippedPositions = 0;
while (input.incrementToken()) {
if (accept()) {
if (skippedPositions != 0) {
posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
}
return true;
}
skippedPositions += posIncrAtt.getPositionIncrement();
}
} else {
while (input.incrementToken()) {
if (accept()) {
if (first) {
// first token having posinc=0 is illegal.
if (posIncrAtt.getPositionIncrement() == 0) {
posIncrAtt.setPositionIncrement(1);
}
first = false;
}
return true;
}
}
}
// reached EOS -- return false
return false;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/util/FilteringTokenFilter.java
Override
public void reset() throws IOException {
super.reset();
first = true;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (!keywordAttr.isKeyword()) {
final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
termAtt.setLength(newlen);
}
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (!keywordAttr.isKeyword()) {
final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
termAtt.setLength(newlen);
}
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanNormalizationFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
int state = N;
char buffer[] = termAtt.buffer();
int length = termAtt.length();
for (int i = 0; i < length; i++) {
final char c = buffer[i];
switch(c) {
case 'a':
case 'o':
state = U;
break;
case 'u':
state = (state == N) ? U : V;
break;
case 'e':
if (state == U)
length = StemmerUtil.delete(buffer, i--, length);
state = V;
break;
case 'i':
case 'q':
case 'y':
state = V;
break;
case 'ä':
buffer[i] = 'a';
state = V;
break;
case 'ö':
buffer[i] = 'o';
state = V;
break;
case 'ü':
buffer[i] = 'u';
state = V;
break;
case 'Ã':
buffer[i++] = 's';
buffer = termAtt.resizeBuffer(1+length);
if (i < length)
System.arraycopy(buffer, i, buffer, i+1, (length-i));
buffer[i] = 's';
length++;
state = N;
break;
default:
state = N;
}
}
termAtt.setLength(length);
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
String term = termAtt.toString();
if (!keywordAttr.isKeyword()) {
String s = stemmer.stem(term);
// If not stemmed, don't waste the time adjusting the token.
if ((s != null) && !s.equals(term))
termAtt.setEmpty().append(s);
}
return true;
} else {
return false;
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilter.java
Override
public boolean incrementToken() throws IOException {
while (true) {
if (hasBufferedBigram()) {
// case 1: we have multiple remaining codepoints buffered,
// so we can emit a bigram here.
flushBigram();
return true;
} else if (doNext()) {
// case 2: look at the token type. should we form any n-grams?
String type = typeAtt.type();
if (type == doHan || type == doHiragana || type == doKatakana || type == doHangul) {
// acceptable CJK type: we form n-grams from these.
// as long as the offsets are aligned, we just add these to our current buffer.
// otherwise, we clear the buffer and start over.
if (offsetAtt.startOffset() != lastEndOffset) { // unaligned, clear queue
if (hasBufferedUnigram()) {
// we have a buffered unigram, and we peeked ahead to see if we could form
// a bigram, but we can't, because the offsets are unaligned. capture the state
// of this peeked data to be revisited next time thru the loop, and dump our unigram.
loneState = captureState();
flushUnigram();
return true;
}
index = 0;
bufferLen = 0;
}
refill();
} else {
// not a CJK type: we just return these as-is.
if (hasBufferedUnigram()) {
// we have a buffered unigram, and we peeked ahead to see if we could form
// a bigram, but we can't, because its not a CJK type. capture the state
// of this peeked data to be revisited next time thru the loop, and dump our unigram.
loneState = captureState();
flushUnigram();
return true;
}
return true;
}
} else {
// case 3: we have only zero or 1 codepoints buffered,
// so not enough to form a bigram. But, we also have no
// more input. So if we have a buffered codepoint, emit
// a unigram, otherwise, its end of stream.
if (hasBufferedUnigram()) {
flushUnigram(); // flush our remaining unigram
return true;
}
return false;
}
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilter.java
private boolean doNext() throws IOException {
if (loneState != null) {
restoreState(loneState);
loneState = null;
return true;
} else {
if (exhausted) {
return false;
} else if (input.incrementToken()) {
return true;
} else {
exhausted = true;
return false;
}
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilter.java
private void refill() throws IOException {
// compact buffers to keep them smallish if they become large
// just a safety check, but technically we only need the last codepoint
if (bufferLen > 64) {
int last = bufferLen - 1;
buffer[0] = buffer[last];
startOffset[0] = startOffset[last];
endOffset[0] = endOffset[last];
bufferLen = 1;
index -= last;
}
char termBuffer[] = termAtt.buffer();
int len = termAtt.length();
int start = offsetAtt.startOffset();
int end = offsetAtt.endOffset();
int newSize = bufferLen + len;
buffer = ArrayUtil.grow(buffer, newSize);
startOffset = ArrayUtil.grow(startOffset, newSize);
endOffset = ArrayUtil.grow(endOffset, newSize);
lastEndOffset = end;
if (end - start != len) {
// crazy offsets (modified by synonym or charfilter): just preserve
for (int i = 0, cp = 0; i < len; i += Character.charCount(cp)) {
cp = buffer[bufferLen] = Character.codePointAt(termBuffer, i, len);
startOffset[bufferLen] = start;
endOffset[bufferLen] = end;
bufferLen++;
}
} else {
// normal offsets
for (int i = 0, cp = 0, cpLen = 0; i < len; i += cpLen) {
cp = buffer[bufferLen] = Character.codePointAt(termBuffer, i, len);
cpLen = Character.charCount(cp);
startOffset[bufferLen] = start;
start = endOffset[bufferLen] = start + cpLen;
bufferLen++;
}
}
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilter.java
Override
public void reset() throws IOException {
super.reset();
bufferLen = 0;
index = 0;
lastEndOffset = 0;
loneState = null;
exhausted = false;
}
// in lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKWidthFilter.java
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
char text[] = termAtt.buffer();
int length = termAtt.length();
for (int i = 0; i < length; i++) {
final char ch = text[i];
if (ch >= 0xFF01 && ch <= 0xFF5E) {
// Fullwidth ASCII variants
text[i] -= 0xFEE0;
} else if (ch >= 0xFF65 && ch <= 0xFF9F) {
// Halfwidth Katakana variants
if ((ch == 0xFF9E || ch == 0xFF9F) && i > 0 && combine(text, i, ch)) {
length = StemmerUtil.delete(text, i--, length);
} else {
text[i] = KANA_NORM[ch - 0xFF65];
}
}
}
termAtt.setLength(length);
return true;
} else {
return false;
}
}
// in lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java
Override
public boolean incrementToken() throws IOException {
if (tokenIter == null || !tokenIter.hasNext()) {
// there are no remaining tokens from the current sentence... are there more sentences?
if (input.incrementToken()) {
tokStart = offsetAtt.startOffset();
tokEnd = offsetAtt.endOffset();
// if length by start + end offsets doesn't match the term text then assume
// this is a synonym and don't adjust the offsets.
hasIllegalOffsets = (tokStart + termAtt.length()) != tokEnd;
// a new sentence is available: process it.
tokenBuffer = wordSegmenter.segmentSentence(termAtt.toString(), offsetAtt.startOffset());
tokenIter = tokenBuffer.iterator();
/*
* it should not be possible to have a sentence with 0 words, check just in case.
* returning EOS isn't the best either, but its the behavior of the original code.
*/
if (!tokenIter.hasNext())
return false;
} else {
return false; // no more sentences, end of stream!
}
}
// WordTokenFilter must clear attributes, as it is creating new tokens.
clearAttributes();
// There are remaining tokens from the current sentence, return the next one.
SegToken nextWord = tokenIter.next();
termAtt.copyBuffer(nextWord.charArray, 0, nextWord.charArray.length);
if (hasIllegalOffsets) {
offsetAtt.setOffset(tokStart, tokEnd);
} else {
offsetAtt.setOffset(nextWord.startOffset, nextWord.endOffset);
}
typeAtt.setType("word");
return true;
}
// in lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java
Override
public void reset() throws IOException {
super.reset();
tokenIter = null;
}
// in lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java
static CharArraySet loadDefaultStopWordSet() throws IOException {
// make sure it is unmodifiable as we expose it in the outer class
return CharArraySet.unmodifiableSet(WordlistLoader.getWordSet(IOUtils
.getDecodingReader(SmartChineseAnalyzer.class, DEFAULT_STOPWORD_FILE,
IOUtils.CHARSET_UTF_8), STOPWORD_FILE_COMMENT,
Version.LUCENE_CURRENT));
}
// in lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java
Override
public boolean incrementToken() throws IOException {
clearAttributes();
buffer.setLength(0);
int ci;
char ch, pch;
boolean atBegin = true;
tokenStart = tokenEnd;
ci = input.read();
ch = (char) ci;
while (true) {
if (ci == -1) {
break;
} else if (PUNCTION.indexOf(ch) != -1) {
// End of a sentence
buffer.append(ch);
tokenEnd++;
break;
} else if (atBegin && Utility.SPACES.indexOf(ch) != -1) {
tokenStart++;
tokenEnd++;
ci = input.read();
ch = (char) ci;
} else {
buffer.append(ch);
atBegin = false;
tokenEnd++;
pch = ch;
ci = input.read();
ch = (char) ci;
// Two spaces, such as CR, LF
if (Utility.SPACES.indexOf(ch) != -1
&& Utility.SPACES.indexOf(pch) != -1) {
// buffer.append(ch);
tokenEnd++;
break;
}
}
}
if (buffer.length() == 0)
return false;
else {
termAtt.setEmpty().append(buffer);
offsetAtt.setOffset(correctOffset(tokenStart), correctOffset(tokenEnd));
typeAtt.setType("sentence");
return true;
}
}
// in lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java
Override
public void reset() throws IOException {
super.reset();
tokenStart = tokenEnd = 0;
}
// in lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java
Override
public void reset(Reader input) throws IOException {
super.reset(input);
reset();
}
// in lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java
Override
public void end() throws IOException {
// set final offset
final int finalOffset = correctOffset(tokenEnd);
offsetAtt.setOffset(finalOffset, finalOffset);
}
// in lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/WordDictionary.java
public void load() throws IOException, ClassNotFoundException {
InputStream input = this.getClass().getResourceAsStream("coredict.mem");
loadFromObjectInputStream(input);
}
// in lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/WordDictionary.java
private void loadFromObjectInputStream(InputStream serialObjectInputStream)
throws IOException, ClassNotFoundException {
ObjectInputStream input = new ObjectInputStream(serialObjectInputStream);
wordIndexTable = (short[]) input.readObject();
charIndexTable = (char[]) input.readObject();
wordItem_charArrayTable = (char[][][]) input.readObject();
wordItem_frequencyTable = (int[][]) input.readObject();
// log.info("load core dict from serialization.");
input.close();
}
// in lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/WordDictionary.java
private int loadMainDataFromFile(String dctFilePath)
throws FileNotFoundException, IOException, UnsupportedEncodingException {
int i, cnt, length, total = 0;
// The file only counted 6763 Chinese characters plus 5 reserved slots 3756~3760.
// The 3756th is used (as a header) to store information.
int[] buffer = new int[3];
byte[] intBuffer = new byte[4];
String tmpword;
RandomAccessFile dctFile = new RandomAccessFile(dctFilePath, "r");
// GB2312 characters 0 - 6768
for (i = GB2312_FIRST_CHAR; i < GB2312_FIRST_CHAR + CHAR_NUM_IN_FILE; i++) {
// if (i == 5231)
// System.out.println(i);
dctFile.read(intBuffer);
// the dictionary was developed for C, and byte order must be converted to work with Java
cnt = ByteBuffer.wrap(intBuffer).order(ByteOrder.LITTLE_ENDIAN).getInt();
if (cnt <= 0) {
wordItem_charArrayTable[i] = null;
wordItem_frequencyTable[i] = null;
continue;
}
wordItem_charArrayTable[i] = new char[cnt][];
wordItem_frequencyTable[i] = new int[cnt];
total += cnt;
int j = 0;
while (j < cnt) {
// wordItemTable[i][j] = new WordItem();
dctFile.read(intBuffer);
buffer[0] = ByteBuffer.wrap(intBuffer).order(ByteOrder.LITTLE_ENDIAN)
.getInt();// frequency
dctFile.read(intBuffer);
buffer[1] = ByteBuffer.wrap(intBuffer).order(ByteOrder.LITTLE_ENDIAN)
.getInt();// length
dctFile.read(intBuffer);
buffer[2] = ByteBuffer.wrap(intBuffer).order(ByteOrder.LITTLE_ENDIAN)
.getInt();// handle
// wordItemTable[i][j].frequency = buffer[0];
wordItem_frequencyTable[i][j] = buffer[0];
length = buffer[1];
if (length > 0) {
byte[] lchBuffer = new byte[length];
dctFile.read(lchBuffer);
tmpword = new String(lchBuffer, "GB2312");
// indexTable[i].wordItems[j].word = tmpword;
// wordItemTable[i][j].charArray = tmpword.toCharArray();
wordItem_charArrayTable[i][j] = tmpword.toCharArray();
} else {
// wordItemTable[i][j].charArray = null;
wordItem_charArrayTable[i][j] = null;
}
// System.out.println(indexTable[i].wordItems[j]);
j++;
}
String str = getCCByGB2312Id(i);
setTableIndex(str.charAt(0), i);
}
dctFile.close();
return total;
}
// in lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/BigramDictionary.java
private void loadFromInputStream(InputStream serialObjectInputStream)
throws IOException, ClassNotFoundException {
ObjectInputStream input = new ObjectInputStream(serialObjectInputStream);
bigramHashTable = (long[]) input.readObject();
frequencyTable = (int[]) input.readObject();
// log.info("load bigram dict from serialization.");
input.close();
}
// in lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/BigramDictionary.java
private void load() throws IOException, ClassNotFoundException {
InputStream input = this.getClass().getResourceAsStream("bigramdict.mem");
loadFromInputStream(input);
}
// in lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/BigramDictionary.java
public void loadFromFile(String dctFilePath) throws FileNotFoundException,
IOException, UnsupportedEncodingException {
int i, cnt, length, total = 0;
// The file only counted 6763 Chinese characters plus 5 reserved slots 3756~3760.
// The 3756th is used (as a header) to store information.
int[] buffer = new int[3];
byte[] intBuffer = new byte[4];
String tmpword;
RandomAccessFile dctFile = new RandomAccessFile(dctFilePath, "r");
// GB2312 characters 0 - 6768
for (i = GB2312_FIRST_CHAR; i < GB2312_FIRST_CHAR + CHAR_NUM_IN_FILE; i++) {
String currentStr = getCCByGB2312Id(i);
// if (i == 5231)
// System.out.println(i);
dctFile.read(intBuffer);
// the dictionary was developed for C, and byte order must be converted to work with Java
cnt = ByteBuffer.wrap(intBuffer).order(ByteOrder.LITTLE_ENDIAN).getInt();
if (cnt <= 0) {
continue;
}
total += cnt;
int j = 0;
while (j < cnt) {
dctFile.read(intBuffer);
buffer[0] = ByteBuffer.wrap(intBuffer).order(ByteOrder.LITTLE_ENDIAN)
.getInt();// frequency
dctFile.read(intBuffer);
buffer[1] = ByteBuffer.wrap(intBuffer).order(ByteOrder.LITTLE_ENDIAN)
.getInt();// length
dctFile.read(intBuffer);
// buffer[2] = ByteBuffer.wrap(intBuffer).order(
// ByteOrder.LITTLE_ENDIAN).getInt();// handle
length = buffer[1];
if (length > 0) {
byte[] lchBuffer = new byte[length];
dctFile.read(lchBuffer);
tmpword = new String(lchBuffer, "GB2312");
if (i != 3755 + GB2312_FIRST_CHAR) {
tmpword = currentStr + tmpword;
}
char carray[] = tmpword.toCharArray();
long hashId = hash1(carray);
int index = getAvaliableIndex(hashId, carray);
if (index != -1) {
if (bigramHashTable[index] == 0) {
bigramHashTable[index] = hashId;
// bigramStringTable[index] = tmpword;
}
frequencyTable[index] += buffer[0];
}
}
j++;
}
}
dctFile.close();
// log.info("load dictionary done! " + dctFilePath + " total:" + total);
}
// in lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java
protected void initializeIterator() throws IOException {
try {
analyzeInput();
} catch (AnalysisEngineProcessException e) {
throw new IOException(e);
}
finalOffset = correctOffset(cas.getDocumentText().length());
Type tokenType = cas.getTypeSystem().getType(tokenTypeString);
iterator = cas.getAnnotationIndex(tokenType).iterator();
}
// in lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java
Override
public boolean incrementToken() throws IOException {
if (iterator == null) {
initializeIterator();
}
if (iterator.hasNext()) {
clearAttributes();
AnnotationFS next = iterator.next();
termAttr.append(next.getCoveredText());
offsetAttr.setOffset(correctOffset(next.getBegin()), correctOffset(next.getEnd()));
return true;
} else {
return false;
}
}
// in lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMAAnnotationsTokenizer.java
Override
public void end() throws IOException {
offsetAttr.setOffset(finalOffset, finalOffset);
super.end();
}
// in lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/ae/BasicAEProvider.java
private XMLInputSource getInputSource() throws IOException {
try {
return new XMLInputSource(aePath);
} catch (IOException e) {
return new XMLInputSource(getClass().getResource(aePath));
}
}
// in lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java
protected void initializeIterator() throws IOException {
try {
analyzeInput();
} catch (AnalysisEngineProcessException e) {
throw new IOException(e);
}
featurePath = cas.createFeaturePath();
try {
featurePath.initialize(typeAttributeFeaturePath);
} catch (CASException e) {
featurePath = null;
throw new IOException(e);
}
finalOffset = correctOffset(cas.getDocumentText().length());
Type tokenType = cas.getTypeSystem().getType(tokenTypeString);
iterator = cas.getAnnotationIndex(tokenType).iterator();
}
// in lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java
Override
public boolean incrementToken() throws IOException {
if (iterator == null) {
initializeIterator();
}
if (iterator.hasNext()) {
clearAttributes();
AnnotationFS next = iterator.next();
termAttr.append(next.getCoveredText());
offsetAttr.setOffset(correctOffset(next.getBegin()), correctOffset(next.getEnd()));
typeAttr.setType(featurePath.getValueAsString(next));
return true;
} else {
return false;
}
}
// in lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/UIMATypeAwareAnnotationsTokenizer.java
Override
public void end() throws IOException {
offsetAttr.setOffset(finalOffset, finalOffset);
super.end();
}
// in lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java
protected void analyzeInput() throws AnalysisEngineProcessException, IOException {
cas.reset();
cas.setDocumentText(toString(input));
ae.process(cas);
}
// in lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java
private String toString(Reader reader) throws IOException {
StringBuilder stringBuilder = new StringBuilder();
int ch;
while ((ch = reader.read()) > -1) {
stringBuilder.append((char) ch);
}
return stringBuilder.toString();
}
// in lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java
Override
public void reset(Reader input) throws IOException {
super.reset(input);
iterator = null;
}
// in lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java
Override
public void end() throws IOException {
iterator = null;
}
// in lucene/analysis/stempel/src/java/org/apache/lucene/analysis/stempel/StempelFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (!keywordAtt.isKeyword() && termAtt.length() > minLength) {
StringBuilder sb = stemmer.stem(termAtt);
if (sb != null) // if we can't stem it, return unchanged
termAtt.setEmpty().append(sb);
}
return true;
} else {
return false;
}
}
// in lucene/analysis/stempel/src/java/org/apache/lucene/analysis/stempel/StempelStemmer.java
public static Trie load(InputStream stemmerTable) throws IOException {
DataInputStream in = null;
try {
in = new DataInputStream(new BufferedInputStream(stemmerTable));
String method = in.readUTF().toUpperCase(Locale.ENGLISH);
if (method.indexOf('M') < 0) {
return new org.egothor.stemmer.Trie(in);
} else {
return new org.egothor.stemmer.MultiTrie2(in);
}
} finally {
in.close();
}
}
// in lucene/analysis/stempel/src/java/org/egothor/stemmer/Trie.java
public void store(DataOutput os) throws IOException {
os.writeBoolean(forward);
os.writeInt(root);
os.writeInt(cmds.size());
for (CharSequence cmd : cmds)
os.writeUTF(cmd.toString());
os.writeInt(rows.size());
for (Row row : rows)
row.store(os);
}
// in lucene/analysis/stempel/src/java/org/egothor/stemmer/Row.java
public void store(DataOutput os) throws IOException {
os.writeInt(cells.size());
Iterator<Character> i = cells.keySet().iterator();
for (; i.hasNext();) {
Character c = i.next();
Cell e = at(c);
if (e.cmd < 0 && e.ref < 0) {
continue;
}
os.writeChar(c.charValue());
os.writeInt(e.cmd);
os.writeInt(e.cnt);
os.writeInt(e.ref);
os.writeInt(e.skip);
}
}
// in lucene/analysis/stempel/src/java/org/egothor/stemmer/MultiTrie2.java
Override
public void store(DataOutput os) throws IOException {
super.store(os);
}
// in lucene/analysis/stempel/src/java/org/egothor/stemmer/MultiTrie.java
Override
public void store(DataOutput os) throws IOException {
os.writeBoolean(forward);
os.writeInt(BY);
os.writeInt(tries.size());
for (Trie trie : tries)
trie.store(os);
}
// in lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilter.java
Override
public boolean incrementToken() throws IOException {
for(;;) {
if (!remainingTokens.isEmpty()) {
// clearAttributes(); // not currently necessary
restoreState(remainingTokens.removeFirst());
return true;
}
if (!input.incrementToken()) return false;
int len = termAtt.length();
if (len==0) return true; // pass through zero length terms
int firstAlternativeIncrement = inject ? 0 : posAtt.getPositionIncrement();
String v = termAtt.toString();
String primaryPhoneticValue = encoder.doubleMetaphone(v);
String alternatePhoneticValue = encoder.doubleMetaphone(v, true);
// a flag to lazily save state if needed... this avoids a save/restore when only
// one token will be generated.
boolean saveState=inject;
if (primaryPhoneticValue!=null && primaryPhoneticValue.length() > 0 && !primaryPhoneticValue.equals(v)) {
if (saveState) {
remainingTokens.addLast(captureState());
}
posAtt.setPositionIncrement( firstAlternativeIncrement );
firstAlternativeIncrement = 0;
termAtt.setEmpty().append(primaryPhoneticValue);
saveState = true;
}
if (alternatePhoneticValue!=null && alternatePhoneticValue.length() > 0
&& !alternatePhoneticValue.equals(primaryPhoneticValue)
&& !primaryPhoneticValue.equals(v)) {
if (saveState) {
remainingTokens.addLast(captureState());
saveState = false;
}
posAtt.setPositionIncrement( firstAlternativeIncrement );
termAtt.setEmpty().append(alternatePhoneticValue);
saveState = true;
}
// Just one token to return, so no need to capture/restore
// any state, simply return it.
if (remainingTokens.isEmpty()) {
return true;
}
if (saveState) {
remainingTokens.addLast(captureState());
}
}
}
// in lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/DoubleMetaphoneFilter.java
Override
public void reset() throws IOException {
input.reset();
remainingTokens.clear();
}
// in lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java
Override
public boolean incrementToken() throws IOException {
if (matcher.find()) {
clearAttributes();
termAtt.setEmpty().append(encoded, matcher.start(1), matcher.end(1));
posIncAtt.setPositionIncrement(0);
offsetAtt.setOffset(startOffset, endOffset);
return true;
}
if (input.incrementToken()) {
encoded = (languages == null)
? engine.encode(termAtt.toString())
: engine.encode(termAtt.toString(), languages);
startOffset = offsetAtt.startOffset();
endOffset = offsetAtt.endOffset();
matcher.reset(encoded);
if (matcher.find()) {
termAtt.setEmpty().append(encoded, matcher.start(1), matcher.end(1));
}
return true;
} else {
return false;
}
}
// in lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/BeiderMorseFilter.java
Override
public void reset() throws IOException {
super.reset();
matcher.reset("");
}
// in lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilter.java
Override
public boolean incrementToken() throws IOException {
if( save != null ) {
// clearAttributes(); // not currently necessary
restoreState(save);
save = null;
return true;
}
if (!input.incrementToken()) return false;
// pass through zero-length terms
if (termAtt.length() == 0) return true;
String value = termAtt.toString();
String phonetic = null;
try {
String v = encoder.encode(value).toString();
if (v.length() > 0 && !value.equals(v)) phonetic = v;
} catch (Exception ignored) {} // just use the direct text
if (phonetic == null) return true;
if (!inject) {
// just modify this token
termAtt.setEmpty().append(phonetic);
return true;
}
// We need to return both the original and the phonetic tokens.
// to avoid a orig=captureState() change_to_phonetic() saved=captureState() restoreState(orig)
// we return the phonetic alternative first
int origOffset = posAtt.getPositionIncrement();
posAtt.setPositionIncrement(0);
save = captureState();
posAtt.setPositionIncrement(origOffset);
termAtt.setEmpty().append(phonetic);
return true;
}
// in lucene/analysis/phonetic/src/java/org/apache/lucene/analysis/phonetic/PhoneticFilter.java
Override
public void reset() throws IOException {
input.reset();
save = null;
}
// in lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/RBBIRuleCompiler.java
static String getRules(File ruleFile) throws IOException {
StringBuilder rules = new StringBuilder();
InputStream in = new FileInputStream(ruleFile);
BufferedReader cin = new BufferedReader(new InputStreamReader(in, "UTF-8"));
String line = null;
while ((line = cin.readLine()) != null) {
if (!line.startsWith("#"))
rules.append(line);
rules.append('\n');
}
cin.close();
in.close();
return rules.toString();
}
// in lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUNormalizer2Filter.java
Override
public final boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (normalizer.quickCheck(termAtt) != Normalizer.YES) {
buffer.setLength(0);
normalizer.normalize(termAtt, buffer);
termAtt.setEmpty().append(buffer);
}
return true;
} else {
return false;
}
}
// in lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUTransformFilter.java
Override
public boolean incrementToken() throws IOException {
/*
* Wrap around replaceable. clear the positions, and transliterate.
*/
if (input.incrementToken()) {
replaceableAttribute.setText(termAtt);
final int length = termAtt.length();
position.start = 0;
position.limit = length;
position.contextStart = 0;
position.contextLimit = length;
transform.filteredTransliterate(replaceableAttribute, position, false);
return true;
} else {
return false;
}
}
// in lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java
Override
public boolean incrementToken() throws IOException {
clearAttributes();
if (length == 0)
refill();
while (!incrementTokenBuffer()) {
refill();
if (length <= 0) // no more bytes to read;
return false;
}
return true;
}
// in lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java
Override
public void reset() throws IOException {
super.reset();
breaker.setText(buffer, 0, 0);
length = usableLength = offset = 0;
}
// in lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java
Override
public void reset(Reader input) throws IOException {
super.reset(input);
reset();
}
// in lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java
Override
public void end() throws IOException {
final int finalOffset = (length < 0) ? offset : offset + length;
offsetAtt.setOffset(correctOffset(finalOffset), correctOffset(finalOffset));
}
// in lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java
private void refill() throws IOException {
offset += usableLength;
int leftover = length - usableLength;
System.arraycopy(buffer, usableLength, buffer, 0, leftover);
int requested = buffer.length - leftover;
int returned = read(input, buffer, leftover, requested);
length = returned + leftover;
if (returned < requested) /* reader has been emptied, process the rest */
usableLength = length;
else { /* still more data to be read, find a safe-stopping place */
usableLength = findSafeEnd();
if (usableLength < 0)
usableLength = length; /*
* more than IOBUFFER of text without space,
* gonna possibly truncate tokens
*/
}
breaker.setText(buffer, 0, Math.max(0, usableLength));
}
// in lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java
private static int read(Reader input, char[] buffer, int offset, int length) throws IOException {
assert length >= 0 : "length must not be negative: " + length;
int remaining = length;
while ( remaining > 0 ) {
int location = length - remaining;
int count = input.read( buffer, offset + location, remaining );
if ( -1 == count ) { // EOF
break;
}
remaining -= count;
}
return length - remaining;
}
// in lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
Override
public final boolean incrementToken() throws IOException {
if (lemmaListIndex < lemmaList.size()) {
restoreState(current);
posIncrAtt.setPositionIncrement(0);
popNextLemma();
return true;
} else if (this.input.incrementToken()) {
if (lookupSurfaceForm(termAtt) || lookupSurfaceForm(toLowercase(termAtt))) {
current = captureState();
popNextLemma();
} else {
tagAtt.clear();
}
return true;
} else {
return false;
}
}
// in lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
Override
public void reset() throws IOException {
lemmaListIndex = 0;
lemmaList = Collections.emptyList();
super.reset();
}
// in lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java
public TokenInfoDictionaryWriter build(String dirname) throws IOException {
FilenameFilter filter = new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
return name.endsWith(".csv");
}
};
ArrayList<File> csvFiles = new ArrayList<File>();
for (File file : new File(dirname).listFiles(filter)) {
csvFiles.add(file);
}
Collections.sort(csvFiles);
return buildDictionary(csvFiles);
}
// in lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java
public TokenInfoDictionaryWriter buildDictionary(List<File> csvFiles) throws IOException {
TokenInfoDictionaryWriter dictionary = new TokenInfoDictionaryWriter(10 * 1024 * 1024);
// all lines in the file
System.out.println(" parse...");
List<String[]> lines = new ArrayList<String[]>(400000);
for (File file : csvFiles){
FileInputStream inputStream = new FileInputStream(file);
Charset cs = Charset.forName(encoding);
CharsetDecoder decoder = cs.newDecoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT);
InputStreamReader streamReader = new InputStreamReader(inputStream, decoder);
BufferedReader reader = new BufferedReader(streamReader);
String line = null;
while ((line = reader.readLine()) != null) {
String[] entry = CSVUtil.parse(line);
if(entry.length < 13) {
System.out.println("Entry in CSV is not valid: " + line);
continue;
}
String[] formatted = formatEntry(entry);
lines.add(formatted);
// NFKC normalize dictionary entry
if (normalizeEntries) {
if (normalizer.isNormalized(entry[0])){
continue;
}
String[] normalizedEntry = new String[entry.length];
for (int i = 0; i < entry.length; i++) {
normalizedEntry[i] = normalizer.normalize(entry[i]);
}
formatted = formatEntry(normalizedEntry);
lines.add(formatted);
}
}
}
System.out.println(" sort...");
// sort by term: we sorted the files already and use a stable sort.
Collections.sort(lines, new Comparator<String[]>() {
public int compare(String[] left, String[] right) {
return left[0].compareTo(right[0]);
}
});
System.out.println(" encode...");
PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton(true);
Builder<Long> fstBuilder = new Builder<Long>(FST.INPUT_TYPE.BYTE2, 0, 0, true, true, Integer.MAX_VALUE, fstOutput, null, true);
IntsRef scratch = new IntsRef();
long ord = -1; // first ord will be 0
String lastValue = null;
// build tokeninfo dictionary
for (String[] entry : lines) {
int next = dictionary.put(entry);
if(next == offset){
System.out.println("Failed to process line: " + Arrays.toString(entry));
continue;
}
String token = entry[0];
if (!token.equals(lastValue)) {
// new word to add to fst
ord++;
lastValue = token;
scratch.grow(token.length());
scratch.length = token.length();
for (int i = 0; i < token.length(); i++) {
scratch.ints[i] = (int) token.charAt(i);
}
fstBuilder.add(scratch, ord);
}
dictionary.addMapping((int)ord, offset);
offset = next;
}
final FST<Long> fst = fstBuilder.finish().pack(2, 100000);
System.out.print(" " + fst.getNodeCount() + " nodes, " + fst.getArcCount() + " arcs, " + fst.sizeInBytes() + " bytes... ");
dictionary.setFST(fst);
System.out.println(" done");
return dictionary;
}
// in lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/BinaryDictionaryWriter.java
protected final String getBaseFileName(String baseDir) throws IOException {
return baseDir + File.separator + implClazz.getName().replace('.', File.separatorChar);
}
// in lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/BinaryDictionaryWriter.java
public void write(String baseDir) throws IOException {
final String baseName = getBaseFileName(baseDir);
writeDictionary(baseName + BinaryDictionary.DICT_FILENAME_SUFFIX);
writeTargetMap(baseName + BinaryDictionary.TARGETMAP_FILENAME_SUFFIX);
writePosDict(baseName + BinaryDictionary.POSDICT_FILENAME_SUFFIX);
}
// in lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/BinaryDictionaryWriter.java
protected void writeTargetMap(String filename) throws IOException {
new File(filename).getParentFile().mkdirs();
OutputStream os = new FileOutputStream(filename);
try {
os = new BufferedOutputStream(os);
final DataOutput out = new OutputStreamDataOutput(os);
CodecUtil.writeHeader(out, BinaryDictionary.TARGETMAP_HEADER, BinaryDictionary.VERSION);
final int numSourceIds = lastSourceId + 1;
out.writeVInt(targetMapEndOffset); // <-- size of main array
out.writeVInt(numSourceIds + 1); // <-- size of offset array (+ 1 more entry)
int prev = 0, sourceId = 0;
for (int ofs = 0; ofs < targetMapEndOffset; ofs++) {
final int val = targetMap[ofs], delta = val - prev;
assert delta >= 0;
if (ofs == targetMapOffsets[sourceId]) {
out.writeVInt((delta << 1) | 0x01);
sourceId++;
} else {
out.writeVInt((delta << 1));
}
prev += delta;
}
assert sourceId == numSourceIds : "sourceId:"+sourceId+" != numSourceIds:"+numSourceIds;
} finally {
os.close();
}
}
// in lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/BinaryDictionaryWriter.java
protected void writePosDict(String filename) throws IOException {
new File(filename).getParentFile().mkdirs();
OutputStream os = new FileOutputStream(filename);
try {
os = new BufferedOutputStream(os);
final DataOutput out = new OutputStreamDataOutput(os);
CodecUtil.writeHeader(out, BinaryDictionary.POSDICT_HEADER, BinaryDictionary.VERSION);
out.writeVInt(posDict.size());
for (String s : posDict) {
if (s == null) {
out.writeByte((byte)0);
out.writeByte((byte)0);
out.writeByte((byte)0);
} else {
String data[] = CSVUtil.parse(s);
assert data.length == 3 : "malformed pos/inflection: " + s;
out.writeString(data[0]);
out.writeString(data[1]);
out.writeString(data[2]);
}
}
} finally {
os.close();
}
}
// in lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/BinaryDictionaryWriter.java
protected void writeDictionary(String filename) throws IOException {
new File(filename).getParentFile().mkdirs();
final FileOutputStream os = new FileOutputStream(filename);
try {
final DataOutput out = new OutputStreamDataOutput(os);
CodecUtil.writeHeader(out, BinaryDictionary.DICT_HEADER, BinaryDictionary.VERSION);
out.writeVInt(buffer.position());
final WritableByteChannel channel = Channels.newChannel(os);
// Write Buffer
buffer.flip(); // set position to 0, set limit to current position
channel.write(buffer);
assert buffer.remaining() == 0L;
} finally {
os.close();
}
}
// in lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/DictionaryBuilder.java
public static void build(DictionaryFormat format,
String inputDirname,
String outputDirname,
String encoding,
boolean normalizeEntry) throws IOException {
System.out.println("building tokeninfo dict...");
TokenInfoDictionaryBuilder tokenInfoBuilder = new TokenInfoDictionaryBuilder(format, encoding, normalizeEntry);
TokenInfoDictionaryWriter tokenInfoDictionary = tokenInfoBuilder.build(inputDirname);
tokenInfoDictionary.write(outputDirname);
tokenInfoDictionary = null;
tokenInfoBuilder = null;
System.out.println("done");
System.out.print("building unknown word dict...");
UnknownDictionaryBuilder unkBuilder = new UnknownDictionaryBuilder(encoding);
UnknownDictionaryWriter unkDictionary = unkBuilder.build(inputDirname);
unkDictionary.write(outputDirname);
unkDictionary = null;
unkBuilder = null;
System.out.println("done");
System.out.print("building connection costs...");
ConnectionCostsWriter connectionCosts
= ConnectionCostsBuilder.build(inputDirname + File.separator + "matrix.def");
connectionCosts.write(outputDirname);
System.out.println("done");
}
// in lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/DictionaryBuilder.java
public static void main(String[] args) throws IOException, ClassNotFoundException {
DictionaryFormat format;
if (args[0].equalsIgnoreCase("ipadic")) {
format = DictionaryFormat.IPADIC;
} else if (args[0].equalsIgnoreCase("unidic")) {
format = DictionaryFormat.UNIDIC;
} else {
System.err.println("Illegal format " + args[0] + " using unidic instead");
format = DictionaryFormat.IPADIC;
}
String inputDirname = args[1];
String outputDirname = args[2];
String inputEncoding = args[3];
boolean normalizeEntries = Boolean.parseBoolean(args[4]);
System.out.println("dictionary builder");
System.out.println("");
System.out.println("dictionary format: " + format);
System.out.println("input directory: " + inputDirname);
System.out.println("output directory: " + outputDirname);
System.out.println("input encoding: " + inputEncoding);
System.out.println("normalize entries: " + normalizeEntries);
System.out.println("");
DictionaryBuilder.build(format, inputDirname, outputDirname, inputEncoding, normalizeEntries);
}
// in lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryWriter.java
Override
public void write(String baseDir) throws IOException {
super.write(baseDir);
writeFST(getBaseFileName(baseDir) + TokenInfoDictionary.FST_FILENAME_SUFFIX);
}
// in lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryWriter.java
protected void writeFST(String filename) throws IOException {
File f = new File(filename);
f.getParentFile().mkdirs();
fst.save(f);
}
// in lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/CharacterDefinitionWriter.java
public void write(String baseDir) throws IOException {
String filename = baseDir + File.separator +
CharacterDefinition.class.getName().replace('.', File.separatorChar) + CharacterDefinition.FILENAME_SUFFIX;
new File(filename).getParentFile().mkdirs();
OutputStream os = new FileOutputStream(filename);
try {
os = new BufferedOutputStream(os);
final DataOutput out = new OutputStreamDataOutput(os);
CodecUtil.writeHeader(out, CharacterDefinition.HEADER, CharacterDefinition.VERSION);
out.writeBytes(characterCategoryMap, 0, characterCategoryMap.length);
for (int i = 0; i < CharacterDefinition.CLASS_COUNT; i++) {
final byte b = (byte) (
(invokeMap[i] ? 0x01 : 0x00) |
(groupMap[i] ? 0x02 : 0x00)
);
out.writeByte(b);
}
} finally {
os.close();
}
}
// in lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/ConnectionCostsWriter.java
public void write(String baseDir) throws IOException {
String filename = baseDir + File.separator +
ConnectionCosts.class.getName().replace('.', File.separatorChar) + ConnectionCosts.FILENAME_SUFFIX;
new File(filename).getParentFile().mkdirs();
OutputStream os = new FileOutputStream(filename);
try {
os = new BufferedOutputStream(os);
final DataOutput out = new OutputStreamDataOutput(os);
CodecUtil.writeHeader(out, ConnectionCosts.HEADER, ConnectionCosts.VERSION);
out.writeVInt(forwardSize);
out.writeVInt(backwardSize);
int last = 0;
assert costs.length == backwardSize;
for (short[] a : costs) {
assert a.length == forwardSize;
for (int i = 0; i < a.length; i++) {
int delta = (int)a[i] - last;
out.writeVInt((delta >> 31) ^ (delta << 1));
last = a[i];
}
}
} finally {
os.close();
}
}
// in lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/ConnectionCostsBuilder.java
public static ConnectionCostsWriter build(String filename) throws IOException {
FileInputStream inputStream = new FileInputStream(filename);
Charset cs = Charset.forName("US-ASCII");
CharsetDecoder decoder = cs.newDecoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT);
InputStreamReader streamReader = new InputStreamReader(inputStream, decoder);
LineNumberReader lineReader = new LineNumberReader(streamReader);
String line = lineReader.readLine();
String[] dimensions = line.split("\\s+");
assert dimensions.length == 2;
int forwardSize = Integer.parseInt(dimensions[0]);
int backwardSize = Integer.parseInt(dimensions[1]);
assert forwardSize > 0 && backwardSize > 0;
ConnectionCostsWriter costs = new ConnectionCostsWriter(forwardSize, backwardSize);
while ((line = lineReader.readLine()) != null) {
String[] fields = line.split("\\s+");
assert fields.length == 3;
int forwardId = Integer.parseInt(fields[0]);
int backwardId = Integer.parseInt(fields[1]);
int cost = Integer.parseInt(fields[2]);
costs.add(forwardId, backwardId, cost);
}
return costs;
}
// in lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/UnknownDictionaryWriter.java
Override
public void write(String baseDir) throws IOException {
super.write(baseDir);
characterDefinition.write(baseDir);
}
// in lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/UnknownDictionaryBuilder.java
public UnknownDictionaryWriter build(String dirname) throws IOException {
UnknownDictionaryWriter unkDictionary = readDictionaryFile(dirname + File.separator + "unk.def"); //Should be only one file
readCharacterDefinition(dirname + File.separator + "char.def", unkDictionary);
return unkDictionary;
}
// in lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/UnknownDictionaryBuilder.java
public UnknownDictionaryWriter readDictionaryFile(String filename)
throws IOException {
return readDictionaryFile(filename, encoding);
}
// in lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/UnknownDictionaryBuilder.java
public UnknownDictionaryWriter readDictionaryFile(String filename, String encoding)
throws IOException {
UnknownDictionaryWriter dictionary = new UnknownDictionaryWriter(5 * 1024 * 1024);
FileInputStream inputStream = new FileInputStream(filename);
Charset cs = Charset.forName(encoding);
CharsetDecoder decoder = cs.newDecoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT);
InputStreamReader streamReader = new InputStreamReader(inputStream, decoder);
LineNumberReader lineReader = new LineNumberReader(streamReader);
dictionary.put(CSVUtil.parse(NGRAM_DICTIONARY_ENTRY));
List<String[]> lines = new ArrayList<String[]>();
String line = null;
while ((line = lineReader.readLine()) != null) {
// note: unk.def only has 10 fields, it simplifies the writer to just append empty reading and pronunciation,
// even though the unknown dictionary returns hardcoded null here.
final String[] parsed = CSVUtil.parse(line + ",*,*"); // Probably we don't need to validate entry
lines.add(parsed);
}
Collections.sort(lines, new Comparator<String[]>() {
public int compare(String[] left, String[] right) {
int leftId = CharacterDefinition.lookupCharacterClass(left[0]);
int rightId = CharacterDefinition.lookupCharacterClass(right[0]);
return leftId - rightId;
}
});
for (String[] entry : lines) {
dictionary.put(entry);
}
return dictionary;
}
// in lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/UnknownDictionaryBuilder.java
public void readCharacterDefinition(String filename, UnknownDictionaryWriter dictionary) throws IOException {
FileInputStream inputStream = new FileInputStream(filename);
InputStreamReader streamReader = new InputStreamReader(inputStream, encoding);
LineNumberReader lineReader = new LineNumberReader(streamReader);
String line = null;
while ((line = lineReader.readLine()) != null) {
line = line.replaceAll("^\\s", "");
line = line.replaceAll("\\s*#.*", "");
line = line.replaceAll("\\s+", " ");
// Skip empty line or comment line
if(line.length() == 0) {
continue;
}
if(line.startsWith("0x")) { // Category mapping
String[] values = line.split(" ", 2); // Split only first space
if(!values[0].contains("..")) {
int cp = Integer.decode(values[0]).intValue();
dictionary.putCharacterCategory(cp, values[1]);
} else {
String[] codePoints = values[0].split("\\.\\.");
int cpFrom = Integer.decode(codePoints[0]).intValue();
int cpTo = Integer.decode(codePoints[1]).intValue();
for(int i = cpFrom; i <= cpTo; i++){
dictionary.putCharacterCategory(i, values[1]);
}
}
} else { // Invoke definition
String[] values = line.split(" "); // Consecutive space is merged above
String characterClassName = values[0];
int invoke = Integer.parseInt(values[1]);
int group = Integer.parseInt(values[2]);
int length = Integer.parseInt(values[3]);
dictionary.putInvokeDefinition(characterClassName, invoke, group, length);
}
}
}
// in lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java
public int[][] lookup(char[] chars, int off, int len) throws IOException {
// TODO: can we avoid this treemap/toIndexArray?
TreeMap<Integer, int[]> result = new TreeMap<Integer, int[]>(); // index, [length, length...]
boolean found = false; // true if we found any results
final FST.BytesReader fstReader = fst.getBytesReader(0);
FST.Arc<Long> arc = new FST.Arc<Long>();
int end = off + len;
for (int startOffset = off; startOffset < end; startOffset++) {
arc = fst.getFirstArc(arc);
int output = 0;
int remaining = end - startOffset;
for (int i = 0; i < remaining; i++) {
int ch = chars[startOffset+i];
if (fst.findTargetArc(ch, arc, arc, i == 0, fstReader) == null) {
break; // continue to next position
}
output += arc.output.intValue();
if (arc.isFinal()) {
final int finalOutput = output + arc.nextFinalOutput.intValue();
result.put(startOffset-off, segmentations[finalOutput]);
found = true;
}
}
}
return found ? toIndexArray(result) : EMPTY_RESULT;
}
// in lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/BinaryDictionary.java
protected final InputStream getResource(String suffix) throws IOException {
return getClassResource(getClass(), suffix);
}
// in lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/BinaryDictionary.java
public static final InputStream getClassResource(Class<?> clazz, String suffix) throws IOException {
final InputStream is = clazz.getResourceAsStream(clazz.getSimpleName() + suffix);
if (is == null)
throw new FileNotFoundException("Not in classpath: " + clazz.getName().replace('.','/') + suffix);
return is;
}
// in lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoFST.java
public FST.Arc<Long> findTargetArc(int ch, FST.Arc<Long> follow, FST.Arc<Long> arc, boolean useCache, FST.BytesReader fstReader) throws IOException {
if (useCache && ch >= 0x3040 && ch <= cacheCeiling) {
assert ch != FST.END_LABEL;
final Arc<Long> result = rootCache[ch - 0x3040];
if (result == null) {
return null;
} else {
arc.copyFrom(result);
return arc;
}
} else {
return fst.findTargetArc(ch, follow, arc, fstReader);
}
}
// in lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java
Override
public void reset(Reader input) throws IOException {
super.reset(input);
buffer.reset(input);
}
// in lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java
Override
public void reset() throws IOException {
super.reset();
resetState();
}
// in lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java
private int computeSecondBestThreshold(int pos, int length) throws IOException {
// TODO: maybe we do something else here, instead of just
// using the penalty...? EG we can be more aggressive on
// when to also test for 2nd best path
return computePenalty(pos, length);
}
// in lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java
private int computePenalty(int pos, int length) throws IOException {
if (length > SEARCH_MODE_KANJI_LENGTH) {
boolean allKanji = true;
// check if node consists of only kanji
final int endPos = pos + length;
for (int pos2 = pos; pos2 < endPos; pos2++) {
if (!characterDefinition.isKanji((char) buffer.get(pos2))) {
allKanji = false;
break;
}
}
if (allKanji) { // Process only Kanji keywords
return (length - SEARCH_MODE_KANJI_LENGTH) * SEARCH_MODE_KANJI_PENALTY;
} else if (length > SEARCH_MODE_OTHER_LENGTH) {
return (length - SEARCH_MODE_OTHER_LENGTH) * SEARCH_MODE_OTHER_PENALTY;
}
}
return 0;
}
// in lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java
private void add(Dictionary dict, Position fromPosData, int endPos, int wordID, Type type, boolean addPenalty) throws IOException {
final int wordCost = dict.getWordCost(wordID);
final int leftID = dict.getLeftId(wordID);
int leastCost = Integer.MAX_VALUE;
int leastIDX = -1;
assert fromPosData.count > 0;
for(int idx=0;idx<fromPosData.count;idx++) {
// Cost is path cost so far, plus word cost (added at
// end of loop), plus bigram cost:
final int cost = fromPosData.costs[idx] + costs.get(fromPosData.lastRightID[idx], leftID);
if (VERBOSE) {
System.out.println(" fromIDX=" + idx + ": cost=" + cost + " (prevCost=" + fromPosData.costs[idx] + " wordCost=" + wordCost + " bgCost=" + costs.get(fromPosData.lastRightID[idx], leftID) + " leftID=" + leftID);
}
if (cost < leastCost) {
leastCost = cost;
leastIDX = idx;
if (VERBOSE) {
System.out.println(" **");
}
}
}
leastCost += wordCost;
if (VERBOSE) {
System.out.println(" + cost=" + leastCost + " wordID=" + wordID + " leftID=" + leftID + " leastIDX=" + leastIDX + " toPos=" + endPos + " toPos.idx=" + positions.get(endPos).count);
}
if ((addPenalty || (!outputCompounds && searchMode)) && type != Type.USER) {
final int penalty = computePenalty(fromPosData.pos, endPos - fromPosData.pos);
if (VERBOSE) {
if (penalty > 0) {
System.out.println(" + penalty=" + penalty + " cost=" + (leastCost+penalty));
}
}
leastCost += penalty;
}
//positions.get(endPos).add(leastCost, dict.getRightId(wordID), fromPosData.pos, leastIDX, wordID, type);
assert leftID == dict.getRightId(wordID);
positions.get(endPos).add(leastCost, leftID, fromPosData.pos, leastIDX, wordID, type);
}
// in lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java
Override
public boolean incrementToken() throws IOException {
// parse() is able to return w/o producing any new
// tokens, when the tokens it had produced were entirely
// punctuation. So we loop here until we get a real
// token or we end:
while (pending.size() == 0) {
if (end) {
return false;
}
// Push Viterbi forward some more:
parse();
}
final Token token = pending.remove(pending.size()-1);
int position = token.getPosition();
int length = token.getLength();
clearAttributes();
assert length > 0;
//System.out.println("off=" + token.getOffset() + " len=" + length + " vs " + token.getSurfaceForm().length);
termAtt.copyBuffer(token.getSurfaceForm(), token.getOffset(), length);
offsetAtt.setOffset(correctOffset(position), correctOffset(position+length));
basicFormAtt.setToken(token);
posAtt.setToken(token);
readingAtt.setToken(token);
inflectionAtt.setToken(token);
if (token.getPosition() == lastTokenPos) {
posIncAtt.setPositionIncrement(0);
posLengthAtt.setPositionLength(token.getPositionLength());
} else {
assert token.getPosition() > lastTokenPos;
posIncAtt.setPositionIncrement(1);
posLengthAtt.setPositionLength(1);
}
if (VERBOSE) {
System.out.println(Thread.currentThread().getName() + ": incToken: return token=" + token);
}
lastTokenPos = token.getPosition();
return true;
}
// in lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java
private void parse() throws IOException {
if (VERBOSE) {
System.out.println("\nPARSE");
}
// Advances over each position (character):
while (true) {
if (buffer.get(pos) == -1) {
// End
break;
}
final Position posData = positions.get(pos);
final boolean isFrontier = positions.getNextPos() == pos+1;
if (posData.count == 0) {
// No arcs arrive here; move to next position:
if (VERBOSE) {
System.out.println(" no arcs in; skip pos=" + pos);
}
pos++;
continue;
}
if (pos > lastBackTracePos && posData.count == 1 && isFrontier) {
// if (pos > lastBackTracePos && posData.count == 1 && isFrontier) {
// We are at a "frontier", and only one node is
// alive, so whatever the eventual best path is must
// come through this node. So we can safely commit
// to the prefix of the best path at this point:
backtrace(posData, 0);
// Re-base cost so we don't risk int overflow:
posData.costs[0] = 0;
if (pending.size() != 0) {
return;
} else {
// This means the backtrace only produced
// punctuation tokens, so we must keep parsing.
}
}
if (pos - lastBackTracePos >= MAX_BACKTRACE_GAP) {
// Safety: if we've buffered too much, force a
// backtrace now. We find the least-cost partial
// path, across all paths, backtrace from it, and
// then prune all others. Note that this, in
// general, can produce the wrong result, if the
// total best path did not in fact back trace
// through this partial best path. But it's the
// best we can do... (short of not having a
// safety!).
// First pass: find least cost partial path so far,
// including ending at future positions:
int leastIDX = -1;
int leastCost = Integer.MAX_VALUE;
Position leastPosData = null;
for(int pos2=pos;pos2<positions.getNextPos();pos2++) {
final Position posData2 = positions.get(pos2);
for(int idx=0;idx<posData2.count;idx++) {
//System.out.println(" idx=" + idx + " cost=" + cost);
final int cost = posData2.costs[idx];
if (cost < leastCost) {
leastCost = cost;
leastIDX = idx;
leastPosData = posData2;
}
}
}
// We will always have at least one live path:
assert leastIDX != -1;
// Second pass: prune all but the best path:
for(int pos2=pos;pos2<positions.getNextPos();pos2++) {
final Position posData2 = positions.get(pos2);
if (posData2 != leastPosData) {
posData2.reset();
} else {
if (leastIDX != 0) {
posData2.costs[0] = posData2.costs[leastIDX];
posData2.lastRightID[0] = posData2.lastRightID[leastIDX];
posData2.backPos[0] = posData2.backPos[leastIDX];
posData2.backIndex[0] = posData2.backIndex[leastIDX];
posData2.backID[0] = posData2.backID[leastIDX];
posData2.backType[0] = posData2.backType[leastIDX];
}
posData2.count = 1;
}
}
backtrace(leastPosData, 0);
// Re-base cost so we don't risk int overflow:
Arrays.fill(leastPosData.costs, 0, leastPosData.count, 0);
if (pos != leastPosData.pos) {
// We jumped into a future position:
assert pos < leastPosData.pos;
pos = leastPosData.pos;
}
if (pending.size() != 0) {
return;
} else {
// This means the backtrace only produced
// punctuation tokens, so we must keep parsing.
continue;
}
}
if (VERBOSE) {
System.out.println("\n extend @ pos=" + pos + " char=" + (char) buffer.get(pos));
}
if (VERBOSE) {
System.out.println(" " + posData.count + " arcs in");
}
boolean anyMatches = false;
// First try user dict:
if (userFST != null) {
userFST.getFirstArc(arc);
int output = 0;
for(int posAhead=posData.pos;;posAhead++) {
final int ch = buffer.get(posAhead);
if (ch == -1) {
break;
}
if (userFST.findTargetArc(ch, arc, arc, posAhead == posData.pos, userFSTReader) == null) {
break;
}
output += arc.output.intValue();
if (arc.isFinal()) {
if (VERBOSE) {
System.out.println(" USER word " + new String(buffer.get(pos, posAhead - pos + 1)) + " toPos=" + (posAhead + 1));
}
add(userDictionary, posData, posAhead+1, output + arc.nextFinalOutput.intValue(), Type.USER, false);
anyMatches = true;
}
}
}
// TODO: we can be more aggressive about user
// matches? if we are "under" a user match then don't
// extend KNOWN/UNKNOWN paths?
if (!anyMatches) {
// Next, try known dictionary matches
fst.getFirstArc(arc);
int output = 0;
for(int posAhead=posData.pos;;posAhead++) {
final int ch = buffer.get(posAhead);
if (ch == -1) {
break;
}
//System.out.println(" match " + (char) ch + " posAhead=" + posAhead);
if (fst.findTargetArc(ch, arc, arc, posAhead == posData.pos, fstReader) == null) {
break;
}
output += arc.output.intValue();
// Optimization: for known words that are too-long
// (compound), we should pre-compute the 2nd
// best segmentation and store it in the
// dictionary instead of recomputing it each time a
// match is found.
if (arc.isFinal()) {
dictionary.lookupWordIds(output + arc.nextFinalOutput.intValue(), wordIdRef);
if (VERBOSE) {
System.out.println(" KNOWN word " + new String(buffer.get(pos, posAhead - pos + 1)) + " toPos=" + (posAhead + 1) + " " + wordIdRef.length + " wordIDs");
}
for (int ofs = 0; ofs < wordIdRef.length; ofs++) {
add(dictionary, posData, posAhead+1, wordIdRef.ints[wordIdRef.offset + ofs], Type.KNOWN, false);
anyMatches = true;
}
}
}
}
// In the case of normal mode, it doesn't process unknown word greedily.
if (!searchMode && unknownWordEndIndex > posData.pos) {
pos++;
continue;
}
final char firstCharacter = (char) buffer.get(pos);
if (!anyMatches || characterDefinition.isInvoke(firstCharacter)) {
// Find unknown match:
final int characterId = characterDefinition.getCharacterClass(firstCharacter);
final boolean isPunct = isPunctuation(firstCharacter);
// NOTE: copied from UnknownDictionary.lookup:
int unknownWordLength;
if (!characterDefinition.isGroup(firstCharacter)) {
unknownWordLength = 1;
} else {
// Extract unknown word. Characters with the same character class are considered to be part of unknown word
unknownWordLength = 1;
for (int posAhead=pos+1;unknownWordLength<MAX_UNKNOWN_WORD_LENGTH;posAhead++) {
final int ch = buffer.get(posAhead);
if (ch == -1) {
break;
}
if (characterId == characterDefinition.getCharacterClass((char) ch) &&
isPunctuation((char) ch) == isPunct) {
unknownWordLength++;
} else {
break;
}
}
}
unkDictionary.lookupWordIds(characterId, wordIdRef); // characters in input text are supposed to be the same
if (VERBOSE) {
System.out.println(" UNKNOWN word len=" + unknownWordLength + " " + wordIdRef.length + " wordIDs");
}
for (int ofs = 0; ofs < wordIdRef.length; ofs++) {
add(unkDictionary, posData, posData.pos + unknownWordLength, wordIdRef.ints[wordIdRef.offset + ofs], Type.UNKNOWN, false);
}
unknownWordEndIndex = posData.pos + unknownWordLength;
}
pos++;
}
end = true;
if (pos > 0) {
final Position endPosData = positions.get(pos);
int leastCost = Integer.MAX_VALUE;
int leastIDX = -1;
if (VERBOSE) {
System.out.println(" end: " + endPosData.count + " nodes");
}
for(int idx=0;idx<endPosData.count;idx++) {
// Add EOS cost:
final int cost = endPosData.costs[idx] + costs.get(endPosData.lastRightID[idx], 0);
//System.out.println(" idx=" + idx + " cost=" + cost + " (pathCost=" + endPosData.costs[idx] + " bgCost=" + costs.get(endPosData.lastRightID[idx], 0) + ") backPos=" + endPosData.backPos[idx]);
if (cost < leastCost) {
leastCost = cost;
leastIDX = idx;
}
}
backtrace(endPosData, leastIDX);
} else {
// No characters in the input string; return no tokens!
}
}
// in lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java
private void pruneAndRescore(int startPos, int endPos, int bestStartIDX) throws IOException {
if (VERBOSE) {
System.out.println(" pruneAndRescore startPos=" + startPos + " endPos=" + endPos + " bestStartIDX=" + bestStartIDX);
}
// First pass: walk backwards, building up the forward
// arcs and pruning inadmissible arcs:
for(int pos=endPos; pos > startPos; pos--) {
final Position posData = positions.get(pos);
if (VERBOSE) {
System.out.println(" back pos=" + pos);
}
for(int arcIDX=0;arcIDX<posData.count;arcIDX++) {
final int backPos = posData.backPos[arcIDX];
if (backPos >= startPos) {
// Keep this arc:
//System.out.println(" keep backPos=" + backPos);
positions.get(backPos).addForward(pos,
arcIDX,
posData.backID[arcIDX],
posData.backType[arcIDX]);
} else {
if (VERBOSE) {
System.out.println(" prune");
}
}
}
if (pos != startPos) {
posData.count = 0;
}
}
// Second pass: walk forward, re-scoring:
for(int pos=startPos; pos < endPos; pos++) {
final Position posData = positions.get(pos);
if (VERBOSE) {
System.out.println(" forward pos=" + pos + " count=" + posData.forwardCount);
}
if (posData.count == 0) {
// No arcs arrive here...
if (VERBOSE) {
System.out.println(" skip");
}
posData.forwardCount = 0;
continue;
}
if (pos == startPos) {
// On the initial position, only consider the best
// path so we "force congruence": the
// sub-segmentation is "in context" of what the best
// path (compound token) had matched:
final int rightID;
if (startPos == 0) {
rightID = 0;
} else {
rightID = getDict(posData.backType[bestStartIDX]).getRightId(posData.backID[bestStartIDX]);
}
final int pathCost = posData.costs[bestStartIDX];
for(int forwardArcIDX=0;forwardArcIDX<posData.forwardCount;forwardArcIDX++) {
final Type forwardType = posData.forwardType[forwardArcIDX];
final Dictionary dict2 = getDict(forwardType);
final int wordID = posData.forwardID[forwardArcIDX];
final int toPos = posData.forwardPos[forwardArcIDX];
final int newCost = pathCost + dict2.getWordCost(wordID) +
costs.get(rightID, dict2.getLeftId(wordID)) +
computePenalty(pos, toPos-pos);
if (VERBOSE) {
System.out.println(" + " + forwardType + " word " + new String(buffer.get(pos, toPos-pos)) + " toPos=" + toPos + " cost=" + newCost + " penalty=" + computePenalty(pos, toPos-pos) + " toPos.idx=" + positions.get(toPos).count);
}
positions.get(toPos).add(newCost,
dict2.getRightId(wordID),
pos,
bestStartIDX,
wordID,
forwardType);
}
} else {
// On non-initial positions, we maximize score
// across all arriving lastRightIDs:
for(int forwardArcIDX=0;forwardArcIDX<posData.forwardCount;forwardArcIDX++) {
final Type forwardType = posData.forwardType[forwardArcIDX];
final int toPos = posData.forwardPos[forwardArcIDX];
if (VERBOSE) {
System.out.println(" + " + forwardType + " word " + new String(buffer.get(pos, toPos-pos)) + " toPos=" + toPos);
}
add(getDict(forwardType),
posData,
toPos,
posData.forwardID[forwardArcIDX],
forwardType,
true);
}
}
posData.forwardCount = 0;
}
}
// in lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java
private void backtrace(final Position endPosData, final int fromIDX) throws IOException {
final int endPos = endPosData.pos;
if (VERBOSE) {
System.out.println("\n backtrace: endPos=" + endPos + " pos=" + pos + "; " + (pos - lastBackTracePos) + " characters; last=" + lastBackTracePos + " cost=" + endPosData.costs[fromIDX]);
}
final char[] fragment = buffer.get(lastBackTracePos, endPos-lastBackTracePos);
if (dotOut != null) {
dotOut.onBacktrace(this, positions, lastBackTracePos, endPosData, fromIDX, fragment, end);
}
int pos = endPos;
int bestIDX = fromIDX;
Token altToken = null;
// We trace backwards, so this will be the leftWordID of
// the token after the one we are now on:
int lastLeftWordID = -1;
int backCount = 0;
// TODO: sort of silly to make Token instances here; the
// back trace has all info needed to generate the
// token. So, we could just directly set the attrs,
// from the backtrace, in incrementToken w/o ever
// creating Token; we'd have to defer calling freeBefore
// until after the backtrace was fully "consumed" by
// incrementToken.
while (pos > lastBackTracePos) {
//System.out.println("BT: back pos=" + pos + " bestIDX=" + bestIDX);
final Position posData = positions.get(pos);
assert bestIDX < posData.count;
int backPos = posData.backPos[bestIDX];
assert backPos >= lastBackTracePos: "backPos=" + backPos + " vs lastBackTracePos=" + lastBackTracePos;
int length = pos - backPos;
Type backType = posData.backType[bestIDX];
int backID = posData.backID[bestIDX];
int nextBestIDX = posData.backIndex[bestIDX];
if (outputCompounds && searchMode && altToken == null && backType != Type.USER) {
// In searchMode, if best path had picked a too-long
// token, we use the "penalty" to compute the allowed
// max cost of an alternate back-trace. If we find an
// alternate back trace with cost below that
// threshold, we pursue it instead (but also output
// the long token).
//System.out.println(" 2nd best backPos=" + backPos + " pos=" + pos);
final int penalty = computeSecondBestThreshold(backPos, pos-backPos);
if (penalty > 0) {
if (VERBOSE) {
System.out.println(" compound=" + new String(buffer.get(backPos, pos-backPos)) + " backPos=" + backPos + " pos=" + pos + " penalty=" + penalty + " cost=" + posData.costs[bestIDX] + " bestIDX=" + bestIDX + " lastLeftID=" + lastLeftWordID);
}
// Use the penalty to set maxCost on the 2nd best
// segmentation:
int maxCost = posData.costs[bestIDX] + penalty;
if (lastLeftWordID != -1) {
maxCost += costs.get(getDict(backType).getRightId(backID), lastLeftWordID);
}
// Now, prune all too-long tokens from the graph:
pruneAndRescore(backPos, pos,
posData.backIndex[bestIDX]);
// Finally, find 2nd best back-trace and resume
// backtrace there:
int leastCost = Integer.MAX_VALUE;
int leastIDX = -1;
for(int idx=0;idx<posData.count;idx++) {
int cost = posData.costs[idx];
//System.out.println(" idx=" + idx + " prevCost=" + cost);
if (lastLeftWordID != -1) {
cost += costs.get(getDict(posData.backType[idx]).getRightId(posData.backID[idx]),
lastLeftWordID);
//System.out.println(" += bgCost=" + costs.get(getDict(posData.backType[idx]).getRightId(posData.backID[idx]),
//lastLeftWordID) + " -> " + cost);
}
//System.out.println("penalty " + posData.backPos[idx] + " to " + pos);
//cost += computePenalty(posData.backPos[idx], pos - posData.backPos[idx]);
if (cost < leastCost) {
//System.out.println(" ** ");
leastCost = cost;
leastIDX = idx;
}
}
//System.out.println(" leastIDX=" + leastIDX);
if (VERBOSE) {
System.out.println(" afterPrune: " + posData.count + " arcs arriving; leastCost=" + leastCost + " vs threshold=" + maxCost + " lastLeftWordID=" + lastLeftWordID);
}
if (leastIDX != -1 && leastCost <= maxCost && posData.backPos[leastIDX] != backPos) {
// We should have pruned the altToken from the graph:
assert posData.backPos[leastIDX] != backPos;
// Save the current compound token, to output when
// this alternate path joins back:
altToken = new Token(backID,
fragment,
backPos - lastBackTracePos,
length,
backType,
backPos,
getDict(backType));
// Redirect our backtrace to 2nd best:
bestIDX = leastIDX;
nextBestIDX = posData.backIndex[bestIDX];
backPos = posData.backPos[bestIDX];
length = pos - backPos;
backType = posData.backType[bestIDX];
backID = posData.backID[bestIDX];
backCount = 0;
//System.out.println(" do alt token!");
} else {
// I think in theory it's possible there is no
// 2nd best path, which is fine; in this case we
// only output the compound token:
//System.out.println(" no alt token! bestIDX=" + bestIDX);
}
}
}
final int offset = backPos - lastBackTracePos;
assert offset >= 0;
if (altToken != null && altToken.getPosition() >= backPos) {
// We've backtraced to the position where the
// compound token starts; add it now:
// The pruning we did when we created the altToken
// ensures that the back trace will align back with
// the start of the altToken:
assert altToken.getPosition() == backPos: altToken.getPosition() + " vs " + backPos;
// NOTE: not quite right: the compound token may
// have had all punctuation back traced so far, but
// then the decompounded token at this position is
// not punctuation. In this case backCount is 0,
// but we should maybe add the altToken anyway...?
if (backCount > 0) {
backCount++;
altToken.setPositionLength(backCount);
if (VERBOSE) {
System.out.println(" add altToken=" + altToken);
}
pending.add(altToken);
} else {
// This means alt token was all punct tokens:
if (VERBOSE) {
System.out.println(" discard all-punctuation altToken=" + altToken);
}
assert discardPunctuation;
}
altToken = null;
}
final Dictionary dict = getDict(backType);
if (backType == Type.USER) {
// Expand the phraseID we recorded into the actual
// segmentation:
final int[] wordIDAndLength = userDictionary.lookupSegmentation(backID);
int wordID = wordIDAndLength[0];
int current = 0;
for(int j=1; j < wordIDAndLength.length; j++) {
final int len = wordIDAndLength[j];
//System.out.println(" add user: len=" + len);
pending.add(new Token(wordID+j-1,
fragment,
current + offset,
len,
Type.USER,
current + backPos,
dict));
if (VERBOSE) {
System.out.println(" add USER token=" + pending.get(pending.size()-1));
}
current += len;
}
// Reverse the tokens we just added, because when we
// serve them up from incrementToken we serve in
// reverse:
Collections.reverse(pending.subList(pending.size() - (wordIDAndLength.length - 1),
pending.size()));
backCount += wordIDAndLength.length-1;
} else {
if (extendedMode && backType == Type.UNKNOWN) {
// In EXTENDED mode we convert unknown word into
// unigrams:
int unigramTokenCount = 0;
for(int i=length-1;i>=0;i--) {
int charLen = 1;
if (i > 0 && Character.isLowSurrogate(fragment[offset+i])) {
i--;
charLen = 2;
}
//System.out.println(" extended tok offset="
//+ (offset + i));
if (!discardPunctuation || !isPunctuation(fragment[offset+i])) {
pending.add(new Token(CharacterDefinition.NGRAM,
fragment,
offset + i,
charLen,
Type.UNKNOWN,
backPos + i,
unkDictionary));
unigramTokenCount++;
}
}
backCount += unigramTokenCount;
} else if (!discardPunctuation || length == 0 || !isPunctuation(fragment[offset])) {
pending.add(new Token(backID,
fragment,
offset,
length,
backType,
backPos,
dict));
if (VERBOSE) {
System.out.println(" add token=" + pending.get(pending.size()-1));
}
backCount++;
} else {
if (VERBOSE) {
System.out.println(" skip punctuation token=" + new String(fragment, offset, length));
}
}
}
lastLeftWordID = dict.getLeftId(backID);
pos = backPos;
bestIDX = nextBestIDX;
}
lastBackTracePos = endPos;
if (VERBOSE) {
System.out.println(" freeBefore pos=" + endPos);
}
// Notify the circular buffers that we are done with
// these positions:
buffer.freeBefore(endPos);
positions.freeBefore(endPos);
}
// in lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseKatakanaStemFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (!keywordAttr.isKeyword()) {
termAttr.setLength(stem(termAttr.buffer(), termAttr.length()));
}
return true;
} else {
return false;
}
}
// in lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseBaseFormFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (!keywordAtt.isKeyword()) {
String baseForm = basicFormAtt.getBaseForm();
if (baseForm != null) {
termAtt.setEmpty().append(baseForm);
}
}
return true;
} else {
return false;
}
}
// in lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilter.java
Override
protected boolean accept() throws IOException {
final String pos = posAtt.getPartOfSpeech();
return pos == null || !stopTags.contains(pos);
}
// in lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseReadingFormFilter.java
Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
String reading = readingAttr.getReading();
if (reading != null) {
if (useRomaji) {
ToStringUtil.getRomanization(termAttr.setEmpty(), reading);
} else {
termAttr.setEmpty().append(reading);
}
}
return true;
} else {
return false;
}
}
// in lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/ToStringUtil.java
public static void getRomanization(Appendable builder, CharSequence s) throws IOException {
final int len = s.length();
for (int i = 0; i < len; i++) {
// maximum lookahead: 3
char ch = s.charAt(i);
char ch2 = (i < len - 1) ? s.charAt(i + 1) : 0;
char ch3 = (i < len - 2) ? s.charAt(i + 2) : 0;
main: switch (ch) {
case 'ã':
switch (ch2) {
case 'ã«':
case 'ãÂ':
case 'ã¯':
case 'ã±':
case 'ã³':
builder.append('k');
break main;
case 'ãµ':
case 'ã·':
case 'ã¹':
case 'ã»':
case 'ã½':
builder.append('s');
break main;
case 'ã¿':
case 'ã':
case 'ã':
case 'ã':
case 'ã':
builder.append('t');
break main;
case 'ã':
case 'ã':
case 'ã':
case 'ã':
case 'ã':
builder.append('p');
break main;
}
break;
case 'ã¢':
builder.append('a');
break;
case 'ã¤':
if (ch2 == 'ã£') {
builder.append("yi");
i++;
} else if (ch2 == 'ã§') {
builder.append("ye");
i++;
} else {
builder.append('i');
}
break;
case 'ã¦':
switch(ch2) {
case 'ã¡':
builder.append("wa");
i++;
break;
case 'ã£':
builder.append("wi");
i++;
break;
case 'ã¥':
builder.append("wu");
i++;
break;
case 'ã§':
builder.append("we");
i++;
break;
case 'ã©':
builder.append("wo");
i++;
break;
case 'ã¥':
builder.append("wyu");
i++;
break;
default:
builder.append('u');
break;
}
break;
case 'ã¨':
builder.append('e');
break;
case 'ãª':
if (ch2 == 'ã¦') {
builder.append('Ã…');
i++;
} else {
builder.append('o');
}
break;
case 'ã«':
builder.append("ka");
break;
case 'ãÂ':
if (ch2 == 'ã§' && ch3 == 'ã¦') {
builder.append("kyÃ…");
i += 2;
} else if (ch2 == 'ã¥' && ch3 == 'ã¦') {
builder.append("kyū");
i += 2;
} else if (ch2 == 'ã£') {
builder.append("kya");
i++;
} else if (ch2 == 'ã§') {
builder.append("kyo");
i++;
} else if (ch2 == 'ã¥') {
builder.append("kyu");
i++;
} else if (ch2 == 'ã§') {
builder.append("kye");
i++;
} else {
builder.append("ki");
}
break;
case 'ã¯':
switch(ch2) {
case 'ã¡':
builder.append("kwa");
i++;
break;
case 'ã£':
builder.append("kwi");
i++;
break;
case 'ã§':
builder.append("kwe");
i++;
break;
case 'ã©':
builder.append("kwo");
i++;
break;
case 'ã®':
builder.append("kwa");
i++;
break;
default:
builder.append("ku");
break;
}
break;
case 'ã±':
builder.append("ke");
break;
case 'ã³':
if (ch2 == 'ã¦') {
builder.append("kÃ…");
i++;
} else {
builder.append("ko");
}
break;
case 'ãµ':
builder.append("sa");
break;
case 'ã·':
if (ch2 == 'ã§' && ch3 == 'ã¦') {
builder.append("shÃ…");
i += 2;
} else if (ch2 == 'ã¥' && ch3 == 'ã¦') {
builder.append("shū");
i += 2;
} else if (ch2 == 'ã£') {
builder.append("sha");
i++;
} else if (ch2 == 'ã§') {
builder.append("sho");
i++;
} else if (ch2 == 'ã¥') {
builder.append("shu");
i++;
} else if (ch2 == 'ã§') {
builder.append("she");
i++;
} else {
builder.append("shi");
}
break;
case 'ã¹':
if (ch2 == 'ã£') {
builder.append("si");
i++;
} else {
builder.append("su");
}
break;
case 'ã»':
builder.append("se");
break;
case 'ã½':
if (ch2 == 'ã¦') {
builder.append("sÃ…");
i++;
} else {
builder.append("so");
}
break;
case 'ã¿':
builder.append("ta");
break;
case 'ã':
if (ch2 == 'ã§' && ch3 == 'ã¦') {
builder.append("chÃ…");
i += 2;
} else if (ch2 == 'ã¥' && ch3 == 'ã¦') {
builder.append("chū");
i += 2;
} else if (ch2 == 'ã£') {
builder.append("cha");
i++;
} else if (ch2 == 'ã§') {
builder.append("cho");
i++;
} else if (ch2 == 'ã¥') {
builder.append("chu");
i++;
} else if (ch2 == 'ã§') {
builder.append("che");
i++;
} else {
builder.append("chi");
}
break;
case 'ã':
if (ch2 == 'ã¡') {
builder.append("tsa");
i++;
} else if (ch2 == 'ã£') {
builder.append("tsi");
i++;
} else if (ch2 == 'ã§') {
builder.append("tse");
i++;
} else if (ch2 == 'ã©') {
builder.append("tso");
i++;
} else if (ch2 == 'ã¥') {
builder.append("tsyu");
i++;
} else {
builder.append("tsu");
}
break;
case 'ã':
if (ch2 == 'ã£') {
builder.append("ti");
i++;
} else if (ch2 == 'ã¥') {
builder.append("tu");
i++;
} else if (ch2 == 'ã¥') {
builder.append("tyu");
i++;
} else {
builder.append("te");
}
break;
case 'ã':
if (ch2 == 'ã¦') {
builder.append("tÃ…");
i++;
} else if (ch2 == 'ã¥') {
builder.append("tu");
i++;
} else {
builder.append("to");
}
break;
case 'ã':
builder.append("na");
break;
case 'ã':
if (ch2 == 'ã§' && ch3 == 'ã¦') {
builder.append("nyÃ…");
i += 2;
} else if (ch2 == 'ã¥' && ch3 == 'ã¦') {
builder.append("nyū");
i += 2;
} else if (ch2 == 'ã£') {
builder.append("nya");
i++;
} else if (ch2 == 'ã§') {
builder.append("nyo");
i++;
} else if (ch2 == 'ã¥') {
builder.append("nyu");
i++;
} else if (ch2 == 'ã§') {
builder.append("nye");
i++;
} else {
builder.append("ni");
}
break;
case 'ã':
builder.append("nu");
break;
case 'ã':
builder.append("ne");
break;
case 'ã':
if (ch2 == 'ã¦') {
builder.append("nÃ…");
i++;
} else {
builder.append("no");
}
break;
case 'ã':
builder.append("ha");
break;
case 'ã':
if (ch2 == 'ã§' && ch3 == 'ã¦') {
builder.append("hyÃ…");
i += 2;
} else if (ch2 == 'ã¥' && ch3 == 'ã¦') {
builder.append("hyū");
i += 2;
} else if (ch2 == 'ã£') {
builder.append("hya");
i++;
} else if (ch2 == 'ã§') {
builder.append("hyo");
i++;
} else if (ch2 == 'ã¥') {
builder.append("hyu");
i++;
} else if (ch2 == 'ã§') {
builder.append("hye");
i++;
} else {
builder.append("hi");
}
break;
case 'ã':
if (ch2 == 'ã£') {
builder.append("fya");
i++;
} else if (ch2 == 'ã¥') {
builder.append("fyu");
i++;
} else if (ch2 == 'ã£' && ch3 == 'ã§') {
builder.append("fye");
i+=2;
} else if (ch2 == 'ã§') {
builder.append("fyo");
i++;
} else if (ch2 == 'ã¡') {
builder.append("fa");
i++;
} else if (ch2 == 'ã£') {
builder.append("fi");
i++;
} else if (ch2 == 'ã§') {
builder.append("fe");
i++;
} else if (ch2 == 'ã©') {
builder.append("fo");
i++;
} else {
builder.append("fu");
}
break;
case 'ã':
builder.append("he");
break;
case 'ã':
if (ch2 == 'ã¦') {
builder.append("hÃ…");
i++;
} else if (ch2 == 'ã¥') {
builder.append("hu");
i++;
} else {
builder.append("ho");
}
break;
case 'ã':
builder.append("ma");
break;
case 'ã':
if (ch2 == 'ã§' && ch3 == 'ã¦') {
builder.append("myÃ…");
i += 2;
} else if (ch2 == 'ã¥' && ch3 == 'ã¦') {
builder.append("myū");
i += 2;
} else if (ch2 == 'ã£') {
builder.append("mya");
i++;
} else if (ch2 == 'ã§') {
builder.append("myo");
i++;
} else if (ch2 == 'ã¥') {
builder.append("myu");
i++;
} else if (ch2 == 'ã§') {
builder.append("mye");
i++;
} else {
builder.append("mi");
}
break;
case 'ã ':
builder.append("mu");
break;
case 'ã¡':
builder.append("me");
break;
case 'ã¢':
if (ch2 == 'ã¦') {
builder.append("mÃ…");
i++;
} else {
builder.append("mo");
}
break;
case 'ã¤':
builder.append("ya");
break;
case 'ã¦':
builder.append("yu");
break;
case 'ã¨':
if (ch2 == 'ã¦') {
builder.append("yÃ…");
i++;
} else {
builder.append("yo");
}
break;
case 'ã©':
if (ch2 == 'ã') {
builder.append("la");
i++;
} else {
builder.append("ra");
}
break;
case 'ãª':
if (ch2 == 'ã§' && ch3 == 'ã¦') {
builder.append("ryÃ…");
i += 2;
} else if (ch2 == 'ã¥' && ch3 == 'ã¦') {
builder.append("ryū");
i += 2;
} else if (ch2 == 'ã£') {
builder.append("rya");
i++;
} else if (ch2 == 'ã§') {
builder.append("ryo");
i++;
} else if (ch2 == 'ã¥') {
builder.append("ryu");
i++;
} else if (ch2 == 'ã§') {
builder.append("rye");
i++;
} else if (ch2 == 'ã') {
builder.append("li");
i++;
} else {
builder.append("ri");
}
break;
case 'ã«':
if (ch2 == 'ã') {
builder.append("lu");
i++;
} else {
builder.append("ru");
}
break;
case 'ã¬':
if (ch2 == 'ã') {
builder.append("le");
i++;
} else {
builder.append("re");
}
break;
case 'ãÂ':
if (ch2 == 'ã¦') {
builder.append("rÃ…");
i++;
} else if (ch2 == 'ã') {
builder.append("lo");
i++;
} else {
builder.append("ro");
}
break;
case 'ã¯':
builder.append("wa");
break;
case 'ã°':
builder.append("i");
break;
case 'ã±':
builder.append("e");
break;
case 'ã²':
builder.append("o");
break;
case 'ã³':
switch (ch2) {
case 'ã':
case 'ã':
case 'ã':
case 'ã':
case 'ã':
case 'ã':
case 'ã':
case 'ã':
case 'ã':
case 'ã':
case 'ã':
case 'ã':
case 'ã ':
case 'ã¡':
case 'ã¢':
builder.append('m');
break main;
case 'ã¤':
case 'ã¦':
case 'ã¨':
case 'ã¢':
case 'ã¤':
case 'ã¦':
case 'ã¨':
case 'ãª':
builder.append("n'");
break main;
default:
builder.append("n");
break main;
}
case 'ã¬':
builder.append("ga");
break;
case 'ã®':
if (ch2 == 'ã§' && ch3 == 'ã¦') {
builder.append("gyÃ…");
i += 2;
} else if (ch2 == 'ã¥' && ch3 == 'ã¦') {
builder.append("gyū");
i += 2;
} else if (ch2 == 'ã£') {
builder.append("gya");
i++;
} else if (ch2 == 'ã§') {
builder.append("gyo");
i++;
} else if (ch2 == 'ã¥') {
builder.append("gyu");
i++;
} else if (ch2 == 'ã§') {
builder.append("gye");
i++;
} else {
builder.append("gi");
}
break;
case 'ã°':
switch(ch2) {
case 'ã¡':
builder.append("gwa");
i++;
break;
case 'ã£':
builder.append("gwi");
i++;
break;
case 'ã§':
builder.append("gwe");
i++;
break;
case 'ã©':
builder.append("gwo");
i++;
break;
case 'ã®':
builder.append("gwa");
i++;
break;
default:
builder.append("gu");
break;
}
break;
case 'ã²':
builder.append("ge");
break;
case 'ã´':
if (ch2 == 'ã¦') {
builder.append("gÃ…");
i++;
} else {
builder.append("go");
}
break;
case 'ã¶':
builder.append("za");
break;
case 'ã¸':
if (ch2 == 'ã§' && ch3 == 'ã¦') {
builder.append("jÃ…");
i += 2;
} else if (ch2 == 'ã¥' && ch3 == 'ã¦') {
builder.append("jū");
i += 2;
} else if (ch2 == 'ã£') {
builder.append("ja");
i++;
} else if (ch2 == 'ã§') {
builder.append("jo");
i++;
} else if (ch2 == 'ã¥') {
builder.append("ju");
i++;
} else if (ch2 == 'ã§') {
builder.append("je");
i++;
} else {
builder.append("ji");
}
break;
case 'ãº':
if (ch2 == 'ã£') {
builder.append("zi");
i++;
} else {
builder.append("zu");
}
break;
case 'ã¼':
builder.append("ze");
break;
case 'ã¾':
if (ch2 == 'ã¦') {
builder.append("zÃ…");
i++;
} else {
builder.append("zo");
}
break;
case 'ã':
builder.append("da");
break;
case 'ã':
// TODO: investigate all this
if (ch2 == 'ã§' && ch3 == 'ã¦') {
builder.append("jÃ…");
i += 2;
} else if (ch2 == 'ã¥' && ch3 == 'ã¦') {
builder.append("jū");
i += 2;
} else if (ch2 == 'ã£') {
builder.append("ja");
i++;
} else if (ch2 == 'ã§') {
builder.append("jo");
i++;
} else if (ch2 == 'ã¥') {
builder.append("ju");
i++;
} else if (ch2 == 'ã§') {
builder.append("je");
i++;
} else {
builder.append("ji");
}
break;
case 'ã
':
builder.append("zu");
break;
case 'ã':
if (ch2 == 'ã£') {
builder.append("di");
i++;
} else if (ch2 == 'ã¥') {
builder.append("dyu");
i++;
} else {
builder.append("de");
}
break;
case 'ã':
if (ch2 == 'ã¦') {
builder.append("dÃ…");
i++;
} else if (ch2 == 'ã¥') {
builder.append("du");
i++;
} else {
builder.append("do");
}
break;
case 'ã':
builder.append("ba");
break;
case 'ã':
if (ch2 == 'ã§' && ch3 == 'ã¦') {
builder.append("byÃ…");
i += 2;
} else if (ch2 == 'ã¥' && ch3 == 'ã¦') {
builder.append("byū");
i += 2;
} else if (ch2 == 'ã£') {
builder.append("bya");
i++;
} else if (ch2 == 'ã§') {
builder.append("byo");
i++;
} else if (ch2 == 'ã¥') {
builder.append("byu");
i++;
} else if (ch2 == 'ã§') {
builder.append("bye");
i++;
} else {
builder.append("bi");
}
break;
case 'ã':
builder.append("bu");
break;
case 'ã':
builder.append("be");
break;
case 'ã':
if (ch2 == 'ã¦') {
builder.append("bÃ…");
i++;
} else {
builder.append("bo");
}
break;
case 'ã':
builder.append("pa");
break;
case 'ã':
if (ch2 == 'ã§' && ch3 == 'ã¦') {
builder.append("pyÃ…");
i += 2;
} else if (ch2 == 'ã¥' && ch3 == 'ã¦') {
builder.append("pyū");
i += 2;
} else if (ch2 == 'ã£') {
builder.append("pya");
i++;
} else if (ch2 == 'ã§') {
builder.append("pyo");
i++;
} else if (ch2 == 'ã¥') {
builder.append("pyu");
i++;
} else if (ch2 == 'ã§') {
builder.append("pye");
i++;
} else {
builder.append("pi");
}
break;
case 'ã':
builder.append("pu");
break;
case 'ã':
builder.append("pe");
break;
case 'ã':
if (ch2 == 'ã¦') {
builder.append("pÃ…");
i++;
} else {
builder.append("po");
}
break;
case 'ã·':
builder.append("va");
break;
case 'ã¸':
builder.append("vi");
break;
case 'ã¹':
builder.append("ve");
break;
case 'ãº':
builder.append("vo");
break;
case 'ã´':
if (ch2 == 'ã£' && ch3 == 'ã§') {
builder.append("vye");
i+= 2;
} else {
builder.append('v');
}
break;
case 'ã¡':
builder.append('a');
break;
case 'ã£':
builder.append('i');
break;
case 'ã¥':
builder.append('u');
break;
case 'ã§':
builder.append('e');
break;
case 'ã©':
builder.append('o');
break;
case 'ã®':
builder.append("wa");
break;
case 'ã£':
builder.append("ya");
break;
case 'ã¥':
builder.append("yu");
break;
case 'ã§':
builder.append("yo");
break;
case 'ã¼':
break;
default:
builder.append(ch);
}
}
}
// in lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/DuplicateFilter.java
Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
if (processingMode == ProcessingMode.PM_FAST_INVALIDATION) {
return fastBits(context.reader(), acceptDocs);
} else {
return correctBits(context.reader(), acceptDocs);
}
}
// in lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/DuplicateFilter.java
private FixedBitSet correctBits(AtomicReader reader, Bits acceptDocs) throws IOException {
FixedBitSet bits = new FixedBitSet(reader.maxDoc()); //assume all are INvalid
Terms terms = reader.fields().terms(fieldName);
if (terms == null) {
return bits;
}
TermsEnum termsEnum = terms.iterator(null);
DocsEnum docs = null;
while (true) {
BytesRef currTerm = termsEnum.next();
if (currTerm == null) {
break;
} else {
docs = termsEnum.docs(acceptDocs, docs, false);
int doc = docs.nextDoc();
if (doc != DocIdSetIterator.NO_MORE_DOCS) {
if (keepMode == KeepMode.KM_USE_FIRST_OCCURRENCE) {
bits.set(doc);
} else {
int lastDoc = doc;
while (true) {
lastDoc = doc;
doc = docs.nextDoc();
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
}
bits.set(lastDoc);
}
}
}
}
return bits;
}
// in lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/DuplicateFilter.java
private FixedBitSet fastBits(AtomicReader reader, Bits acceptDocs) throws IOException {
FixedBitSet bits = new FixedBitSet(reader.maxDoc());
bits.set(0, reader.maxDoc()); //assume all are valid
Terms terms = reader.fields().terms(fieldName);
if (terms == null) {
return bits;
}
TermsEnum termsEnum = terms.iterator(null);
DocsEnum docs = null;
while (true) {
BytesRef currTerm = termsEnum.next();
if (currTerm == null) {
break;
} else {
if (termsEnum.docFreq() > 1) {
// unset potential duplicates
docs = termsEnum.docs(acceptDocs, docs, false);
int doc = docs.nextDoc();
if (doc != DocIdSetIterator.NO_MORE_DOCS) {
if (keepMode == KeepMode.KM_USE_FIRST_OCCURRENCE) {
doc = docs.nextDoc();
}
}
int lastDoc = -1;
while (true) {
lastDoc = doc;
bits.clear(lastDoc);
doc = docs.nextDoc();
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
}
if (keepMode == KeepMode.KM_USE_LAST_OCCURRENCE) {
// restore the last bit
bits.set(lastDoc);
}
}
}
}
return bits;
}
// in lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/SlowCollatedTermRangeQuery.java
Override
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
if (lowerTerm != null && upperTerm != null && collator.compare(lowerTerm, upperTerm) > 0) {
return TermsEnum.EMPTY;
}
TermsEnum tenum = terms.iterator(null);
if (lowerTerm == null && upperTerm == null) {
return tenum;
}
return new SlowCollatedTermRangeTermsEnum(tenum,
lowerTerm, upperTerm, includeLower, includeUpper, collator);
}
// in lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/SlowFuzzyQuery.java
Override
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
if (!termLongEnough) { // can only match if it's exact
return new SingleTermsEnum(terms.iterator(null), term.bytes());
}
return new SlowFuzzyTermsEnum(terms, atts, getTerm(), minimumSimilarity, prefixLength);
}
// in lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/FuzzyLikeThisQuery.java
private void addTerms(IndexReader reader,FieldVals f) throws IOException
{
if(f.queryString==null) return;
TokenStream ts=analyzer.tokenStream(f.fieldName, new StringReader(f.queryString));
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
int corpusNumDocs=reader.numDocs();
HashSet<String> processedTerms=new HashSet<String>();
ts.reset();
while (ts.incrementToken())
{
String term = termAtt.toString();
if(!processedTerms.contains(term))
{
processedTerms.add(term);
ScoreTermQueue variantsQ=new ScoreTermQueue(MAX_VARIANTS_PER_TERM); //maxNum variants considered for any one term
float minScore=0;
Term startTerm=new Term(f.fieldName, term);
AttributeSource atts = new AttributeSource();
MaxNonCompetitiveBoostAttribute maxBoostAtt =
atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
SlowFuzzyTermsEnum fe = new SlowFuzzyTermsEnum(MultiFields.getTerms(reader, startTerm.field()), atts, startTerm, f.minSimilarity, f.prefixLength);
//store the df so all variants use same idf
int df = reader.docFreq(startTerm);
int numVariants=0;
int totalVariantDocFreqs=0;
BytesRef possibleMatch;
BoostAttribute boostAtt =
fe.attributes().addAttribute(BoostAttribute.class);
while ((possibleMatch = fe.next()) != null) {
numVariants++;
totalVariantDocFreqs+=fe.docFreq();
float score=boostAtt.getBoost();
if (variantsQ.size() < MAX_VARIANTS_PER_TERM || score > minScore){
ScoreTerm st=new ScoreTerm(new Term(startTerm.field(), BytesRef.deepCopyOf(possibleMatch)),score,startTerm);
variantsQ.insertWithOverflow(st);
minScore = variantsQ.top().score; // maintain minScore
}
maxBoostAtt.setMaxNonCompetitiveBoost(variantsQ.size() >= MAX_VARIANTS_PER_TERM ? minScore : Float.NEGATIVE_INFINITY);
}
if(numVariants>0)
{
int avgDf=totalVariantDocFreqs/numVariants;
if(df==0)//no direct match we can use as df for all variants
{
df=avgDf; //use avg df of all variants
}
// take the top variants (scored by edit distance) and reset the score
// to include an IDF factor then add to the global queue for ranking
// overall top query terms
int size = variantsQ.size();
for(int i = 0; i < size; i++)
{
ScoreTerm st = variantsQ.pop();
st.score=(st.score*st.score)*sim.idf(df,corpusNumDocs);
q.insertWithOverflow(st);
}
}
}
}
ts.end();
ts.close();
}
// in lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/FuzzyLikeThisQuery.java
Override
public Query rewrite(IndexReader reader) throws IOException
{
if(rewrittenQuery!=null)
{
return rewrittenQuery;
}
//load up the list of possible terms
for (Iterator<FieldVals> iter = fieldVals.iterator(); iter.hasNext();)
{
FieldVals f = iter.next();
addTerms(reader,f);
}
//clear the list of fields
fieldVals.clear();
BooleanQuery bq=new BooleanQuery();
//create BooleanQueries to hold the variants for each token/field pair and ensure it
// has no coord factor
//Step 1: sort the termqueries by term/field
HashMap<Term,ArrayList<ScoreTerm>> variantQueries=new HashMap<Term,ArrayList<ScoreTerm>>();
int size = q.size();
for(int i = 0; i < size; i++)
{
ScoreTerm st = q.pop();
ArrayList<ScoreTerm> l= variantQueries.get(st.fuzziedSourceTerm);
if(l==null)
{
l=new ArrayList<ScoreTerm>();
variantQueries.put(st.fuzziedSourceTerm,l);
}
l.add(st);
}
//Step 2: Organize the sorted termqueries into zero-coord scoring boolean queries
for (Iterator<ArrayList<ScoreTerm>> iter = variantQueries.values().iterator(); iter.hasNext();)
{
ArrayList<ScoreTerm> variants = iter.next();
if(variants.size()==1)
{
//optimize where only one selected variant
ScoreTerm st= variants.get(0);
Query tq = ignoreTF ? new ConstantScoreQuery(new TermQuery(st.term)) : new TermQuery(st.term, 1);
tq.setBoost(st.score); // set the boost to a mix of IDF and score
bq.add(tq, BooleanClause.Occur.SHOULD);
}
else
{
BooleanQuery termVariants=new BooleanQuery(true); //disable coord and IDF for these term variants
for (Iterator<ScoreTerm> iterator2 = variants.iterator(); iterator2
.hasNext();)
{
ScoreTerm st = iterator2.next();
// found a match
Query tq = ignoreTF ? new ConstantScoreQuery(new TermQuery(st.term)) : new TermQuery(st.term, 1);
tq.setBoost(st.score); // set the boost using the ScoreTerm's score
termVariants.add(tq, BooleanClause.Occur.SHOULD); // add to query
}
bq.add(termVariants, BooleanClause.Occur.SHOULD); // add to query
}
}
//TODO possible alternative step 3 - organize above booleans into a new layer of field-based
// booleans with a minimum-should-match of NumFields-1?
bq.setBoost(getBoost());
this.rewrittenQuery=bq;
return bq;
}
// in lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/regex/RegexQuery.java
Override
protected FilteredTermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
return new RegexTermsEnum(terms.iterator(null), term, regexImpl);
}
// in lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/SlowCollatedStringComparator.java
Override
public FieldComparator<String> setNextReader(AtomicReaderContext context) throws IOException {
currentDocTerms = FieldCache.DEFAULT.getTerms(context.reader(), field);
return this;
}
// in lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/SlowFuzzyTermsEnum.java
Override
protected void maxEditDistanceChanged(BytesRef lastTerm, int maxEdits, boolean init)
throws IOException {
TermsEnum newEnum = getAutomatonEnum(maxEdits, lastTerm);
if (newEnum != null) {
setEnum(newEnum);
} else if (init) {
setEnum(new LinearFuzzyTermsEnum());
}
}
// in lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
public float search(Query query) {
if (query == null)
throw new IllegalArgumentException("query must not be null");
IndexSearcher searcher = createSearcher();
try {
final float[] scores = new float[1]; // inits to 0.0f (no match)
searcher.search(query, new Collector() {
private Scorer scorer;
@Override
public void collect(int doc) throws IOException {
scores[0] = scorer.score();
}
@Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
}
@Override
public boolean acceptsDocsOutOfOrder() {
return true;
}
@Override
public void setNextReader(AtomicReaderContext context) { }
});
float score = scores[0];
return score;
} catch (IOException e) { // can never happen (RAMDirectory)
throw new RuntimeException(e);
} finally {
// searcher.close();
/*
* Note that it is harmless and important for good performance to
* NOT close the index reader!!! This avoids all sorts of
* unnecessary baggage and locking in the Lucene IndexReader
* superclass, all of which is completely unnecessary for this main
* memory index data structure without thread-safety claims.
*
* Wishing IndexReader would be an interface...
*
* Actually with the new tight createSearcher() API auto-closing is now
* made impossible, hence searcher.close() would be harmless and also
* would not degrade performance...
*/
}
}
// in lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
Override
public void collect(int doc) throws IOException {
scores[0] = scorer.score();
}
// in lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
}
// in lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
Override
public Terms terms(final String field) {
int i = Arrays.binarySearch(sortedFields, field, termComparator);
if (i < 0) {
return null;
} else {
final Info info = getInfo(i);
info.sortTerms();
return new Terms() {
@Override
public TermsEnum iterator(TermsEnum reuse) {
return new MemoryTermsEnum(info);
}
@Override
public Comparator<BytesRef> getComparator() {
return BytesRef.getUTF8SortedAsUnicodeComparator();
}
@Override
public long size() {
return info.sortedTerms.length;
}
@Override
public long getSumTotalTermFreq() {
return info.getSumTotalTermFreq();
}
@Override
public long getSumDocFreq() throws IOException {
// each term has df=1
return info.sortedTerms.length;
}
@Override
public int getDocCount() throws IOException {
return info.sortedTerms.length > 0 ? 1 : 0;
}
};
}
}
// in lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
Override
public long getSumDocFreq() throws IOException {
// each term has df=1
return info.sortedTerms.length;
}
// in lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
Override
public int getDocCount() throws IOException {
return info.sortedTerms.length > 0 ? 1 : 0;
}
// in lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
Override
public void seekExact(BytesRef term, TermState state) throws IOException {
assert state != null;
this.seekExact(((OrdTermState)state).ord);
}
// in lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
Override
public TermState termState() throws IOException {
OrdTermState ts = new OrdTermState();
ts.ord = termUpto;
return ts;
}
// in lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
Override
public int freq() throws IOException {
return positions.size();
}
// in lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
Override
public int freq() throws IOException {
return positions.size() / stride;
}
// in lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
Override
public DocValues docValues(String field) throws IOException {
return null;
}
// in lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
Override
public DocValues normValues(String field) throws IOException {
DocValues norms = cachedNormValues;
Similarity sim = getSimilarity();
if (!field.equals(cachedFieldName) || sim != cachedSimilarity) { // not cached?
Info info = getInfo(field);
int numTokens = info != null ? info.numTokens : 0;
int numOverlapTokens = info != null ? info.numOverlapTokens : 0;
float boost = info != null ? info.getBoost() : 1.0f;
FieldInvertState invertState = new FieldInvertState(field, 0, numTokens, numOverlapTokens, 0, boost);
Norm norm = new Norm();
sim.computeNorm(invertState, norm);
SingleValueSource singleByteSource = new SingleValueSource(norm);
norms = new MemoryIndexNormDocValues(singleByteSource);
// cache it for future reuse
cachedNormValues = norms;
cachedFieldName = field;
cachedSimilarity = sim;
if (DEBUG) System.err.println("MemoryIndexReader.norms: " + field + ":" + norm + ":" + numTokens);
}
return norms;
}
// in lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndexNormDocValues.java
Override
public Source load() throws IOException {
return source;
}
// in lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndexNormDocValues.java
Override
public Source getDirectSource() throws IOException {
return source;
}
// in lucene/queries/src/java/org/apache/lucene/queries/ChainedFilter.java
Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
int[] index = new int[1]; // use array as reference to modifiable int;
index[0] = 0; // an object attribute would not be thread safe.
if (logic != -1) {
return BitsFilteredDocIdSet.wrap(getDocIdSet(context, logic, index), acceptDocs);
} else if (logicArray != null) {
return BitsFilteredDocIdSet.wrap(getDocIdSet(context, logicArray, index), acceptDocs);
}
return BitsFilteredDocIdSet.wrap(getDocIdSet(context, DEFAULT, index), acceptDocs);
}
// in lucene/queries/src/java/org/apache/lucene/queries/ChainedFilter.java
private DocIdSetIterator getDISI(Filter filter, AtomicReaderContext context)
throws IOException {
// we dont pass acceptDocs, we will filter at the end using an additional filter
DocIdSet docIdSet = filter.getDocIdSet(context, null);
if (docIdSet == null) {
return DocIdSet.EMPTY_DOCIDSET.iterator();
} else {
DocIdSetIterator iter = docIdSet.iterator();
if (iter == null) {
return DocIdSet.EMPTY_DOCIDSET.iterator();
} else {
return iter;
}
}
}
// in lucene/queries/src/java/org/apache/lucene/queries/ChainedFilter.java
private OpenBitSetDISI initialResult(AtomicReaderContext context, int logic, int[] index)
throws IOException {
AtomicReader reader = context.reader();
OpenBitSetDISI result;
/**
* First AND operation takes place against a completely false
* bitset and will always return zero results.
*/
if (logic == AND) {
result = new OpenBitSetDISI(getDISI(chain[index[0]], context), reader.maxDoc());
++index[0];
} else if (logic == ANDNOT) {
result = new OpenBitSetDISI(getDISI(chain[index[0]], context), reader.maxDoc());
result.flip(0, reader.maxDoc()); // NOTE: may set bits for deleted docs.
++index[0];
} else {
result = new OpenBitSetDISI(reader.maxDoc());
}
return result;
}
// in lucene/queries/src/java/org/apache/lucene/queries/ChainedFilter.java
private DocIdSet getDocIdSet(AtomicReaderContext context, int logic, int[] index)
throws IOException {
OpenBitSetDISI result = initialResult(context, logic, index);
for (; index[0] < chain.length; index[0]++) {
// we dont pass acceptDocs, we will filter at the end using an additional filter
doChain(result, logic, chain[index[0]].getDocIdSet(context, null));
}
return result;
}
// in lucene/queries/src/java/org/apache/lucene/queries/ChainedFilter.java
private DocIdSet getDocIdSet(AtomicReaderContext context, int[] logic, int[] index)
throws IOException {
if (logic.length != chain.length) {
throw new IllegalArgumentException("Invalid number of elements in logic array");
}
OpenBitSetDISI result = initialResult(context, logic[0], index);
for (; index[0] < chain.length; index[0]++) {
// we dont pass acceptDocs, we will filter at the end using an additional filter
doChain(result, logic[index[0]], chain[index[0]].getDocIdSet(context, null));
}
return result;
}
// in lucene/queries/src/java/org/apache/lucene/queries/ChainedFilter.java
private void doChain(OpenBitSetDISI result, int logic, DocIdSet dis)
throws IOException {
if (dis instanceof OpenBitSet) {
// optimized case for OpenBitSets
switch (logic) {
case OR:
result.or((OpenBitSet) dis);
break;
case AND:
result.and((OpenBitSet) dis);
break;
case ANDNOT:
result.andNot((OpenBitSet) dis);
break;
case XOR:
result.xor((OpenBitSet) dis);
break;
default:
doChain(result, DEFAULT, dis);
break;
}
} else {
DocIdSetIterator disi;
if (dis == null) {
disi = DocIdSet.EMPTY_DOCIDSET.iterator();
} else {
disi = dis.iterator();
if (disi == null) {
disi = DocIdSet.EMPTY_DOCIDSET.iterator();
}
}
switch (logic) {
case OR:
result.inPlaceOr(disi);
break;
case AND:
result.inPlaceAnd(disi);
break;
case ANDNOT:
result.inPlaceNot(disi);
break;
case XOR:
result.inPlaceXor(disi);
break;
default:
doChain(result, DEFAULT, dis);
break;
}
}
}
// in lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java
public Query like(int docNum) throws IOException {
if (fieldNames == null) {
// gather list of valid fields from lucene
Collection<String> fields = MultiFields.getIndexedFields(ir);
fieldNames = fields.toArray(new String[fields.size()]);
}
return createQuery(retrieveTerms(docNum));
}
// in lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java
public Query like(Reader r, String fieldName) throws IOException {
return createQuery(retrieveTerms(r, fieldName));
}
// in lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java
private PriorityQueue<Object[]> createQueue(Map<String, Int> words) throws IOException {
// have collected all words in doc and their freqs
int numDocs = ir.numDocs();
FreqQ res = new FreqQ(words.size()); // will order words by score
for (String word : words.keySet()) { // for every word
int tf = words.get(word).x; // term freq in the source doc
if (minTermFreq > 0 && tf < minTermFreq) {
continue; // filter out words that don't occur enough times in the source
}
// go through all the fields and find the largest document frequency
String topField = fieldNames[0];
int docFreq = 0;
for (String fieldName : fieldNames) {
int freq = ir.docFreq(new Term(fieldName, word));
topField = (freq > docFreq) ? fieldName : topField;
docFreq = (freq > docFreq) ? freq : docFreq;
}
if (minDocFreq > 0 && docFreq < minDocFreq) {
continue; // filter out words that don't occur in enough docs
}
if (docFreq > maxDocFreq) {
continue; // filter out words that occur in too many docs
}
if (docFreq == 0) {
continue; // index update problem?
}
float idf = similarity.idf(docFreq, numDocs);
float score = tf * idf;
// only really need 1st 3 entries, other ones are for troubleshooting
res.insertWithOverflow(new Object[]{word, // the word
topField, // the top field
score, // overall score
idf, // idf
docFreq, // freq in all docs
tf
});
}
return res;
}
// in lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java
public PriorityQueue<Object[]> retrieveTerms(int docNum) throws IOException {
Map<String, Int> termFreqMap = new HashMap<String, Int>();
for (String fieldName : fieldNames) {
final Fields vectors = ir.getTermVectors(docNum);
final Terms vector;
if (vectors != null) {
vector = vectors.terms(fieldName);
} else {
vector = null;
}
// field does not store term vector info
if (vector == null) {
Document d = ir.document(docNum);
IndexableField fields[] = d.getFields(fieldName);
for (int j = 0; j < fields.length; j++) {
final String stringValue = fields[j].stringValue();
if (stringValue != null) {
addTermFrequencies(new StringReader(stringValue), termFreqMap, fieldName);
}
}
} else {
addTermFrequencies(termFreqMap, vector);
}
}
return createQueue(termFreqMap);
}
// in lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java
private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector) throws IOException {
final TermsEnum termsEnum = vector.iterator(null);
final CharsRef spare = new CharsRef();
BytesRef text;
while((text = termsEnum.next()) != null) {
UnicodeUtil.UTF8toUTF16(text, spare);
final String term = spare.toString();
if (isNoiseWord(term)) {
continue;
}
final int freq = (int) termsEnum.totalTermFreq();
// increment frequency
Int cnt = termFreqMap.get(term);
if (cnt == null) {
cnt = new Int();
termFreqMap.put(term, cnt);
cnt.x = freq;
} else {
cnt.x += freq;
}
}
}
// in lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java
private void addTermFrequencies(Reader r, Map<String, Int> termFreqMap, String fieldName)
throws IOException {
if (analyzer == null) {
throw new UnsupportedOperationException("To use MoreLikeThis without " +
"term vectors, you must provide an Analyzer");
}
TokenStream ts = analyzer.tokenStream(fieldName, r);
int tokenCount = 0;
// for every token
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
ts.reset();
while (ts.incrementToken()) {
String word = termAtt.toString();
tokenCount++;
if (tokenCount > maxNumTokensParsed) {
break;
}
if (isNoiseWord(word)) {
continue;
}
// increment frequency
Int cnt = termFreqMap.get(word);
if (cnt == null) {
termFreqMap.put(word, new Int());
} else {
cnt.x++;
}
}
ts.end();
ts.close();
}
// in lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java
public PriorityQueue<Object[]> retrieveTerms(Reader r, String fieldName) throws IOException {
Map<String, Int> words = new HashMap<String, Int>();
addTermFrequencies(r, words, fieldName);
return createQueue(words);
}
// in lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java
public String[] retrieveInterestingTerms(int docNum) throws IOException {
ArrayList<Object> al = new ArrayList<Object>(maxQueryTerms);
PriorityQueue<Object[]> pq = retrieveTerms(docNum);
Object cur;
int lim = maxQueryTerms; // have to be careful, retrieveTerms returns all words but that's probably not useful to our caller...
// we just want to return the top words
while (((cur = pq.pop()) != null) && lim-- > 0) {
Object[] ar = (Object[]) cur;
al.add(ar[0]); // the 1st entry is the interesting word
}
String[] res = new String[al.size()];
return al.toArray(res);
}
// in lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java
public String[] retrieveInterestingTerms(Reader r, String fieldName) throws IOException {
ArrayList<Object> al = new ArrayList<Object>(maxQueryTerms);
PriorityQueue<Object[]> pq = retrieveTerms(r, fieldName);
Object cur;
int lim = maxQueryTerms; // have to be careful, retrieveTerms returns all words but that's probably not useful to our caller...
// we just want to return the top words
while (((cur = pq.pop()) != null) && lim-- > 0) {
Object[] ar = (Object[]) cur;
al.add(ar[0]); // the 1st entry is the interesting word
}
String[] res = new String[al.size()];
return al.toArray(res);
}
// in lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThisQuery.java
Override
public Query rewrite(IndexReader reader) throws IOException {
MoreLikeThis mlt = new MoreLikeThis(reader);
mlt.setFieldNames(moreLikeFields);
mlt.setAnalyzer(analyzer);
mlt.setMinTermFreq(minTermFrequency);
if (minDocFreq >= 0) {
mlt.setMinDocFreq(minDocFreq);
}
mlt.setMaxQueryTerms(maxQueryTerms);
mlt.setStopWords(stopWords);
BooleanQuery bq = (BooleanQuery) mlt.like(new StringReader(likeText), fieldName);
BooleanClause[] clauses = bq.getClauses();
//make at least half the terms match
bq.setMinimumNumberShouldMatch((int) (clauses.length * percentTermsToMatch));
return bq;
}
// in lucene/queries/src/java/org/apache/lucene/queries/TermsFilter.java
Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
AtomicReader reader = context.reader();
FixedBitSet result = new FixedBitSet(reader.maxDoc());
Fields fields = reader.fields();
if (fields == null) {
return result;
}
BytesRef br = new BytesRef();
String lastField = null;
Terms termsC = null;
TermsEnum termsEnum = null;
DocsEnum docs = null;
for (Term term : terms) {
if (!term.field().equals(lastField)) {
termsC = fields.terms(term.field());
if (termsC == null) {
return result;
}
termsEnum = termsC.iterator(null);
lastField = term.field();
}
if (terms != null) { // TODO this check doesn't make sense, decide which variable its supposed to be for
br.copyBytes(term.bytes());
if (termsEnum.seekCeil(br) == TermsEnum.SeekStatus.FOUND) {
docs = termsEnum.docs(acceptDocs, docs, false);
while (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
result.set(docs.docID());
}
}
}
}
return result;
}
// in lucene/queries/src/java/org/apache/lucene/queries/BooleanFilter.java
Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
FixedBitSet res = null;
final AtomicReader reader = context.reader();
boolean hasShouldClauses = false;
for (final FilterClause fc : clauses) {
if (fc.getOccur() == Occur.SHOULD) {
hasShouldClauses = true;
final DocIdSetIterator disi = getDISI(fc.getFilter(), context);
if (disi == null) continue;
if (res == null) {
res = new FixedBitSet(reader.maxDoc());
}
res.or(disi);
}
}
if (hasShouldClauses && res == null)
return DocIdSet.EMPTY_DOCIDSET;
for (final FilterClause fc : clauses) {
if (fc.getOccur() == Occur.MUST_NOT) {
if (res == null) {
assert !hasShouldClauses;
res = new FixedBitSet(reader.maxDoc());
res.set(0, reader.maxDoc()); // NOTE: may set bits on deleted docs
}
final DocIdSetIterator disi = getDISI(fc.getFilter(), context);
if (disi != null) {
res.andNot(disi);
}
}
}
for (final FilterClause fc : clauses) {
if (fc.getOccur() == Occur.MUST) {
final DocIdSetIterator disi = getDISI(fc.getFilter(), context);
if (disi == null) {
return DocIdSet.EMPTY_DOCIDSET; // no documents can match
}
if (res == null) {
res = new FixedBitSet(reader.maxDoc());
res.or(disi);
} else {
res.and(disi);
}
}
}
return res != null ? BitsFilteredDocIdSet.wrap(res, acceptDocs) : DocIdSet.EMPTY_DOCIDSET;
}
// in lucene/queries/src/java/org/apache/lucene/queries/BooleanFilter.java
private static DocIdSetIterator getDISI(Filter filter, AtomicReaderContext context)
throws IOException {
// we dont pass acceptDocs, we will filter at the end using an additional filter
final DocIdSet set = filter.getDocIdSet(context, null);
return (set == null || set == DocIdSet.EMPTY_DOCIDSET) ? null : set.iterator();
}
// in lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java
Override
public Query rewrite(IndexReader reader) throws IOException {
CustomScoreQuery clone = null;
final Query sq = subQuery.rewrite(reader);
if (sq != subQuery) {
clone = clone();
clone.subQuery = sq;
}
for(int i = 0; i < scoringQueries.length; i++) {
final Query v = scoringQueries[i].rewrite(reader);
if (v != scoringQueries[i]) {
if (clone == null) clone = clone();
clone.scoringQueries[i] = v;
}
}
return (clone == null) ? this : clone;
}
// in lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java
protected CustomScoreProvider getCustomScoreProvider(AtomicReaderContext context) throws IOException {
return new CustomScoreProvider(context);
}
// in lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java
Override
public float getValueForNormalization() throws IOException {
float sum = subQueryWeight.getValueForNormalization();
for(int i = 0; i < valSrcWeights.length; i++) {
if (qStrict) {
valSrcWeights[i].getValueForNormalization(); // do not include ValueSource part in the query normalization
} else {
sum += valSrcWeights[i].getValueForNormalization();
}
}
sum *= getBoost() * getBoost(); // boost each sub-weight
return sum ;
}
// in lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java
Override
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException {
// Pass true for "scoresDocsInOrder", because we
// require in-order scoring, even if caller does not,
// since we call advance on the valSrcScorers. Pass
// false for "topScorer" because we will not invoke
// score(Collector) on these scorers:
Scorer subQueryScorer = subQueryWeight.scorer(context, true, false, acceptDocs);
if (subQueryScorer == null) {
return null;
}
Scorer[] valSrcScorers = new Scorer[valSrcWeights.length];
for(int i = 0; i < valSrcScorers.length; i++) {
valSrcScorers[i] = valSrcWeights[i].scorer(context, true, topScorer, acceptDocs);
}
return new CustomScorer(CustomScoreQuery.this.getCustomScoreProvider(context), this, getBoost(), subQueryScorer, valSrcScorers);
}
// in lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java
Override
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
Explanation explain = doExplain(context, doc);
return explain == null ? new Explanation(0.0f, "no matching docs") : explain;
}
// in lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java
private Explanation doExplain(AtomicReaderContext info, int doc) throws IOException {
Explanation subQueryExpl = subQueryWeight.explain(info, doc);
if (!subQueryExpl.isMatch()) {
return subQueryExpl;
}
// match
Explanation[] valSrcExpls = new Explanation[valSrcWeights.length];
for(int i = 0; i < valSrcWeights.length; i++) {
valSrcExpls[i] = valSrcWeights[i].explain(info, doc);
}
Explanation customExp = CustomScoreQuery.this.getCustomScoreProvider(info).customExplain(doc,subQueryExpl,valSrcExpls);
float sc = getBoost() * customExp.getValue();
Explanation res = new ComplexExplanation(
true, sc, CustomScoreQuery.this.toString() + ", product of:");
res.addDetail(customExp);
res.addDetail(new Explanation(getBoost(), "queryBoost")); // actually using the q boost as q weight (== weight value)
return res;
}
// in lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java
Override
public int nextDoc() throws IOException {
int doc = subQueryScorer.nextDoc();
if (doc != NO_MORE_DOCS) {
for (int i = 0; i < valSrcScorers.length; i++) {
valSrcScorers[i].advance(doc);
}
}
return doc;
}
// in lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java
Override
public float score() throws IOException {
for (int i = 0; i < valSrcScorers.length; i++) {
vScores[i] = valSrcScorers[i].score();
}
return qWeight * provider.customScore(subQueryScorer.docID(), subQueryScorer.score(), vScores);
}
// in lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java
Override
public int advance(int target) throws IOException {
int doc = subQueryScorer.advance(target);
if (doc != NO_MORE_DOCS) {
for (int i = 0; i < valSrcScorers.length; i++) {
valSrcScorers[i].advance(doc);
}
}
return doc;
}
// in lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java
Override
public Weight createWeight(IndexSearcher searcher) throws IOException {
return new CustomWeight(searcher);
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/FunctionQuery.java
Override
public Query rewrite(IndexReader reader) throws IOException {
return this;
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/FunctionQuery.java
Override
public float getValueForNormalization() throws IOException {
queryWeight = getBoost();
return queryWeight * queryWeight;
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/FunctionQuery.java
Override
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException {
return new AllScorer(context, acceptDocs, this, queryWeight);
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/FunctionQuery.java
Override
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
return ((AllScorer)scorer(context, true, true, context.reader().getLiveDocs())).explain(doc);
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/FunctionQuery.java
Override
public int nextDoc() throws IOException {
for(;;) {
++doc;
if (doc>=maxDoc) {
return doc=NO_MORE_DOCS;
}
if (acceptDocs != null && !acceptDocs.get(doc)) continue;
return doc;
}
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/FunctionQuery.java
Override
public int advance(int target) throws IOException {
// this will work even if target==NO_MORE_DOCS
doc=target-1;
return nextDoc();
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/FunctionQuery.java
Override
public float score() throws IOException {
float score = qWeight * vals.floatVal(doc);
// Current Lucene priority queues can't handle NaN and -Infinity, so
// map to -Float.MAX_VALUE. This conditional handles both -infinity
// and NaN since comparisons with NaN are always false.
return score>Float.NEGATIVE_INFINITY ? score : -Float.MAX_VALUE;
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/FunctionQuery.java
public Explanation explain(int doc) throws IOException {
float sc = qWeight * vals.floatVal(doc);
Explanation result = new ComplexExplanation
(true, sc, "FunctionQuery(" + func + "), product of:");
result.addDetail(vals.explain(doc));
result.addDetail(new Explanation(getBoost(), "boost"));
result.addDetail(new Explanation(weight.queryNorm,"queryNorm"));
return result;
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/FunctionQuery.java
Override
public Weight createWeight(IndexSearcher searcher) throws IOException {
return new FunctionQuery.FunctionWeight(searcher);
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/BoostedQuery.java
Override
public Query rewrite(IndexReader reader) throws IOException {
Query newQ = q.rewrite(reader);
if (newQ == q) return this;
BoostedQuery bq = (BoostedQuery)this.clone();
bq.q = newQ;
return bq;
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/BoostedQuery.java
Override
public Weight createWeight(IndexSearcher searcher) throws IOException {
return new BoostedQuery.BoostedWeight(searcher);
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/BoostedQuery.java
Override
public float getValueForNormalization() throws IOException {
float sum = qWeight.getValueForNormalization();
sum *= getBoost() * getBoost();
return sum ;
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/BoostedQuery.java
Override
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException {
// we are gonna advance() the subscorer
Scorer subQueryScorer = qWeight.scorer(context, true, false, acceptDocs);
if(subQueryScorer == null) {
return null;
}
return new BoostedQuery.CustomScorer(context, this, getBoost(), subQueryScorer, boostVal);
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/BoostedQuery.java
Override
public Explanation explain(AtomicReaderContext readerContext, int doc) throws IOException {
Explanation subQueryExpl = qWeight.explain(readerContext,doc);
if (!subQueryExpl.isMatch()) {
return subQueryExpl;
}
FunctionValues vals = boostVal.getValues(fcontext, readerContext);
float sc = subQueryExpl.getValue() * vals.floatVal(doc);
Explanation res = new ComplexExplanation(
true, sc, BoostedQuery.this.toString() + ", product of:");
res.addDetail(subQueryExpl);
res.addDetail(vals.explain(doc));
return res;
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/BoostedQuery.java
Override
public int advance(int target) throws IOException {
return scorer.advance(target);
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/BoostedQuery.java
Override
public int nextDoc() throws IOException {
return scorer.nextDoc();
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/BoostedQuery.java
Override
public float score() throws IOException {
float score = qWeight * scorer.score() * vals.floatVal(scorer.docID());
// Current Lucene priority queues can't handle NaN and -Infinity, so
// map to -Float.MAX_VALUE. This conditional handles both -infinity
// and NaN since comparisons with NaN are always false.
return score>Float.NEGATIVE_INFINITY ? score : -Float.MAX_VALUE;
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/BoostedQuery.java
public Explanation explain(int doc) throws IOException {
Explanation subQueryExpl = weight.qWeight.explain(readerContext ,doc);
if (!subQueryExpl.isMatch()) {
return subQueryExpl;
}
float sc = subQueryExpl.getValue() * vals.floatVal(doc);
Explanation res = new ComplexExplanation(
true, sc, BoostedQuery.this.toString() + ", product of:");
res.addDetail(subQueryExpl);
res.addDetail(vals.explain(doc));
return res;
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/NumDocsValueSource.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
// Searcher has no numdocs so we must use the reader instead
return new ConstIntDocValues(ReaderUtil.getTopLevelContext(readerContext).reader().numDocs(), this);
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/LongFieldSource.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
final long[] arr = cache.getLongs(readerContext.reader(), field, parser, true);
final Bits valid = cache.getDocsWithField(readerContext.reader(), field);
return new LongDocValues(this) {
@Override
public long longVal(int doc) {
return arr[doc];
}
@Override
public boolean exists(int doc) {
return valid.get(doc);
}
@Override
public Object objectVal(int doc) {
return valid.get(doc) ? longToObject(arr[doc]) : null;
}
@Override
public ValueSourceScorer getRangeScorer(IndexReader reader, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) {
long lower,upper;
// instead of using separate comparison functions, adjust the endpoints.
if (lowerVal==null) {
lower = Long.MIN_VALUE;
} else {
lower = externalToLong(lowerVal);
if (!includeLower && lower < Long.MAX_VALUE) lower++;
}
if (upperVal==null) {
upper = Long.MAX_VALUE;
} else {
upper = externalToLong(upperVal);
if (!includeUpper && upper > Long.MIN_VALUE) upper--;
}
final long ll = lower;
final long uu = upper;
return new ValueSourceScorer(reader, this) {
@Override
public boolean matchesValue(int doc) {
long val = arr[doc];
// only check for deleted if it's the default value
// if (val==0 && reader.isDeleted(doc)) return false;
return val >= ll && val <= uu;
}
};
}
@Override
public ValueFiller getValueFiller() {
return new ValueFiller() {
private final long[] longArr = arr;
private final MutableValueLong mval = newMutableValueLong();
@Override
public MutableValue getValue() {
return mval;
}
@Override
public void fillValue(int doc) {
mval.value = longArr[doc];
mval.exists = valid.get(doc);
}
};
}
};
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/SimpleBoolFunction.java
Override
public BoolDocValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
final FunctionValues vals = source.getValues(context, readerContext);
return new BoolDocValues(this) {
@Override
public boolean boolVal(int doc) {
return func(doc, vals);
}
@Override
public String toString(int doc) {
return name() + '(' + vals.toString(doc) + ')';
}
};
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/SimpleBoolFunction.java
Override
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
source.createWeight(context, searcher);
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/LinearFloatFunction.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
final FunctionValues vals = source.getValues(context, readerContext);
return new FloatDocValues(this) {
@Override
public float floatVal(int doc) {
return vals.floatVal(doc) * slope + intercept;
}
@Override
public String toString(int doc) {
return slope + "*float(" + vals.toString(doc) + ")+" + intercept;
}
};
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/LinearFloatFunction.java
Override
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
source.createWeight(context, searcher);
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/SumTotalTermFreqValueSource.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
return (FunctionValues)context.get(this);
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/SumTotalTermFreqValueSource.java
Override
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
long sumTotalTermFreq = 0;
for (AtomicReaderContext readerContext : searcher.getTopReaderContext().leaves()) {
Fields fields = readerContext.reader().fields();
if (fields == null) continue;
Terms terms = fields.terms(indexedField);
if (terms == null) continue;
long v = terms.getSumTotalTermFreq();
if (v == -1) {
sumTotalTermFreq = -1;
break;
} else {
sumTotalTermFreq += v;
}
}
final long ttf = sumTotalTermFreq;
context.put(this, new LongDocValues(this) {
@Override
public long longVal(int doc) {
return ttf;
}
});
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/ShortFieldSource.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
final short[] arr = cache.getShorts(readerContext.reader(), field, parser, false);
return new FunctionValues() {
@Override
public byte byteVal(int doc) {
return (byte) arr[doc];
}
@Override
public short shortVal(int doc) {
return arr[doc];
}
@Override
public float floatVal(int doc) {
return (float) arr[doc];
}
@Override
public int intVal(int doc) {
return (int) arr[doc];
}
@Override
public long longVal(int doc) {
return (long) arr[doc];
}
@Override
public double doubleVal(int doc) {
return (double) arr[doc];
}
@Override
public String strVal(int doc) {
return Short.toString(arr[doc]);
}
@Override
public String toString(int doc) {
return description() + '=' + shortVal(doc);
}
};
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TFValueSource.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
Fields fields = readerContext.reader().fields();
final Terms terms = fields.terms(indexedField);
IndexSearcher searcher = (IndexSearcher)context.get("searcher");
final TFIDFSimilarity similarity = IDFValueSource.asTFIDF(searcher.getSimilarity(), indexedField);
if (similarity == null) {
throw new UnsupportedOperationException("requires a TFIDFSimilarity (such as DefaultSimilarity)");
}
return new FloatDocValues(this) {
DocsEnum docs ;
int atDoc;
int lastDocRequested = -1;
{ reset(); }
public void reset() throws IOException {
// no one should call us for deleted docs?
boolean omitTF = false;
if (terms != null) {
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(indexedBytes, false)) {
docs = termsEnum.docs(null, null, true);
if (docs == null) { // omitTF
omitTF = true;
docs = termsEnum.docs(null, null, false);
}
} else {
docs = null;
}
} else {
docs = null;
}
if (docs == null) {
docs = new DocsEnum() {
@Override
public int freq() {
return 0;
}
@Override
public int docID() {
return DocIdSetIterator.NO_MORE_DOCS;
}
@Override
public int nextDoc() throws IOException {
return DocIdSetIterator.NO_MORE_DOCS;
}
@Override
public int advance(int target) throws IOException {
return DocIdSetIterator.NO_MORE_DOCS;
}
};
} else if (omitTF) {
// the docsenum won't support freq(), so return 1
final DocsEnum delegate = docs;
docs = new DocsEnum() {
@Override
public int freq() {
return 1;
}
@Override
public int docID() {
return delegate.docID();
}
@Override
public int nextDoc() throws IOException {
return delegate.nextDoc();
}
@Override
public int advance(int target) throws IOException {
return delegate.advance(target);
}
};
}
atDoc = -1;
}
@Override
public float floatVal(int doc) {
try {
if (doc < lastDocRequested) {
// out-of-order access.... reset
reset();
}
lastDocRequested = doc;
if (atDoc < doc) {
atDoc = docs.advance(doc);
}
if (atDoc > doc) {
// term doesn't match this document... either because we hit the
// end, or because the next doc is after this doc.
return similarity.tf(0);
}
// a match!
return similarity.tf(docs.freq());
} catch (IOException e) {
throw new RuntimeException("caught exception in function "+description()+" : doc="+doc, e);
}
}
};
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TFValueSource.java
public void reset() throws IOException {
// no one should call us for deleted docs?
boolean omitTF = false;
if (terms != null) {
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(indexedBytes, false)) {
docs = termsEnum.docs(null, null, true);
if (docs == null) { // omitTF
omitTF = true;
docs = termsEnum.docs(null, null, false);
}
} else {
docs = null;
}
} else {
docs = null;
}
if (docs == null) {
docs = new DocsEnum() {
@Override
public int freq() {
return 0;
}
@Override
public int docID() {
return DocIdSetIterator.NO_MORE_DOCS;
}
@Override
public int nextDoc() throws IOException {
return DocIdSetIterator.NO_MORE_DOCS;
}
@Override
public int advance(int target) throws IOException {
return DocIdSetIterator.NO_MORE_DOCS;
}
};
} else if (omitTF) {
// the docsenum won't support freq(), so return 1
final DocsEnum delegate = docs;
docs = new DocsEnum() {
@Override
public int freq() {
return 1;
}
@Override
public int docID() {
return delegate.docID();
}
@Override
public int nextDoc() throws IOException {
return delegate.nextDoc();
}
@Override
public int advance(int target) throws IOException {
return delegate.advance(target);
}
};
}
atDoc = -1;
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TFValueSource.java
Override
public int nextDoc() throws IOException {
return DocIdSetIterator.NO_MORE_DOCS;
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TFValueSource.java
Override
public int advance(int target) throws IOException {
return DocIdSetIterator.NO_MORE_DOCS;
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TFValueSource.java
Override
public int nextDoc() throws IOException {
return delegate.nextDoc();
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TFValueSource.java
Override
public int advance(int target) throws IOException {
return delegate.advance(target);
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/ByteFieldSource.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
final byte[] arr = cache.getBytes(readerContext.reader(), field, parser, false);
return new FunctionValues() {
@Override
public byte byteVal(int doc) {
return arr[doc];
}
@Override
public short shortVal(int doc) {
return (short) arr[doc];
}
@Override
public float floatVal(int doc) {
return (float) arr[doc];
}
@Override
public int intVal(int doc) {
return (int) arr[doc];
}
@Override
public long longVal(int doc) {
return (long) arr[doc];
}
@Override
public double doubleVal(int doc) {
return (double) arr[doc];
}
@Override
public String strVal(int doc) {
return Byte.toString(arr[doc]);
}
@Override
public String toString(int doc) {
return description() + '=' + byteVal(doc);
}
@Override
public Object objectVal(int doc) {
return arr[doc]; // TODO: valid?
}
};
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/ScaleFloatFunction.java
private ScaleInfo createScaleInfo(Map context, AtomicReaderContext readerContext) throws IOException {
final AtomicReaderContext[] leaves = ReaderUtil.getTopLevelContext(readerContext).leaves();
float minVal = Float.POSITIVE_INFINITY;
float maxVal = Float.NEGATIVE_INFINITY;
for (AtomicReaderContext leaf : leaves) {
int maxDoc = leaf.reader().maxDoc();
FunctionValues vals = source.getValues(context, leaf);
for (int i=0; i<maxDoc; i++) {
float val = vals.floatVal(i);
if ((Float.floatToRawIntBits(val) & (0xff<<23)) == 0xff<<23) {
// if the exponent in the float is all ones, then this is +Inf, -Inf or NaN
// which don't make sense to factor into the scale function
continue;
}
if (val < minVal) {
minVal = val;
}
if (val > maxVal) {
maxVal = val;
}
}
}
if (minVal == Float.POSITIVE_INFINITY) {
// must have been an empty index
minVal = maxVal = 0;
}
ScaleInfo scaleInfo = new ScaleInfo();
scaleInfo.minVal = minVal;
scaleInfo.maxVal = maxVal;
context.put(this.source, scaleInfo);
return scaleInfo;
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/ScaleFloatFunction.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
ScaleInfo scaleInfo = (ScaleInfo)context.get(source);
if (scaleInfo == null) {
scaleInfo = createScaleInfo(context, readerContext);
}
final float scale = (scaleInfo.maxVal-scaleInfo.minVal==0) ? 0 : (max-min)/(scaleInfo.maxVal-scaleInfo.minVal);
final float minSource = scaleInfo.minVal;
final float maxSource = scaleInfo.maxVal;
final FunctionValues vals = source.getValues(context, readerContext);
return new FloatDocValues(this) {
@Override
public float floatVal(int doc) {
return (vals.floatVal(doc) - minSource) * scale + min;
}
@Override
public String toString(int doc) {
return "scale(" + vals.toString(doc) + ",toMin=" + min + ",toMax=" + max
+ ",fromMin=" + minSource
+ ",fromMax=" + maxSource
+ ")";
}
};
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/ScaleFloatFunction.java
Override
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
source.createWeight(context, searcher);
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/OrdFieldSource.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
final int off = readerContext.docBase;
final IndexReader topReader = ReaderUtil.getTopLevelContext(readerContext).reader();
final AtomicReader r = topReader instanceof CompositeReader
? new SlowCompositeReaderWrapper((CompositeReader)topReader)
: (AtomicReader) topReader;
final FieldCache.DocTermsIndex sindex = FieldCache.DEFAULT.getTermsIndex(r, field);
return new IntDocValues(this) {
protected String toTerm(String readableValue) {
return readableValue;
}
@Override
public int intVal(int doc) {
return sindex.getOrd(doc+off);
}
@Override
public int ordVal(int doc) {
return sindex.getOrd(doc+off);
}
@Override
public int numOrd() {
return sindex.numOrd();
}
@Override
public boolean exists(int doc) {
return sindex.getOrd(doc+off) != 0;
}
@Override
public ValueFiller getValueFiller() {
return new ValueFiller() {
private final MutableValueInt mval = new MutableValueInt();
@Override
public MutableValue getValue() {
return mval;
}
@Override
public void fillValue(int doc) {
mval.value = sindex.getOrd(doc);
mval.exists = mval.value!=0;
}
};
}
};
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/MultiBoolFunction.java
Override
public BoolDocValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
final FunctionValues[] vals = new FunctionValues[sources.size()];
int i=0;
for (ValueSource source : sources) {
vals[i++] = source.getValues(context, readerContext);
}
return new BoolDocValues(this) {
@Override
public boolean boolVal(int doc) {
return func(doc, vals);
}
@Override
public String toString(int doc) {
StringBuilder sb = new StringBuilder(name());
sb.append('(');
boolean first = true;
for (FunctionValues dv : vals) {
if (first) {
first = false;
} else {
sb.append(',');
}
sb.append(dv.toString(doc));
}
return sb.toString();
}
};
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/MultiBoolFunction.java
Override
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
for (ValueSource source : sources) {
source.createWeight(context, searcher);
}
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/LiteralValueSource.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
return new StrDocValues(this) {
@Override
public String strVal(int doc) {
return string;
}
@Override
public boolean bytesVal(int doc, BytesRef target) {
target.copyBytes(bytesRef);
return true;
}
@Override
public String toString(int doc) {
return string;
}
};
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/NumericIndexDocValueSource.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
final Source source = readerContext.reader().docValues(field)
.getSource();
Type type = source.getType();
switch (type) {
case FLOAT_32:
case FLOAT_64:
// TODO (chrism) Change to use FloatDocValues and IntDocValues
return new FunctionValues() {
@Override
public String toString(int doc) {
return "float: [" + floatVal(doc) + "]";
}
@Override
public float floatVal(int doc) {
return (float) source.getFloat(doc);
}
};
case VAR_INTS:
return new FunctionValues() {
@Override
public String toString(int doc) {
return "float: [" + floatVal(doc) + "]";
}
@Override
public float floatVal(int doc) {
return (float) source.getInt(doc);
}
};
default:
throw new IOException("Type: " + type + "is not numeric");
}
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/DocFreqValueSource.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
IndexSearcher searcher = (IndexSearcher)context.get("searcher");
int docfreq = searcher.getIndexReader().docFreq(new Term(indexedField, indexedBytes));
return new ConstIntDocValues(docfreq, this);
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/DocFreqValueSource.java
Override
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
context.put("searcher",searcher);
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/IfFunction.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
final FunctionValues ifVals = ifSource.getValues(context, readerContext);
final FunctionValues trueVals = trueSource.getValues(context, readerContext);
final FunctionValues falseVals = falseSource.getValues(context, readerContext);
return new FunctionValues() {
@Override
public byte byteVal(int doc) {
return ifVals.boolVal(doc) ? trueVals.byteVal(doc) : falseVals.byteVal(doc);
}
@Override
public short shortVal(int doc) {
return ifVals.boolVal(doc) ? trueVals.shortVal(doc) : falseVals.shortVal(doc);
}
@Override
public float floatVal(int doc) {
return ifVals.boolVal(doc) ? trueVals.floatVal(doc) : falseVals.floatVal(doc);
}
@Override
public int intVal(int doc) {
return ifVals.boolVal(doc) ? trueVals.intVal(doc) : falseVals.intVal(doc);
}
@Override
public long longVal(int doc) {
return ifVals.boolVal(doc) ? trueVals.longVal(doc) : falseVals.longVal(doc);
}
@Override
public double doubleVal(int doc) {
return ifVals.boolVal(doc) ? trueVals.doubleVal(doc) : falseVals.doubleVal(doc);
}
@Override
public String strVal(int doc) {
return ifVals.boolVal(doc) ? trueVals.strVal(doc) : falseVals.strVal(doc);
}
@Override
public boolean boolVal(int doc) {
return ifVals.boolVal(doc) ? trueVals.boolVal(doc) : falseVals.boolVal(doc);
}
@Override
public boolean bytesVal(int doc, BytesRef target) {
return ifVals.boolVal(doc) ? trueVals.bytesVal(doc, target) : falseVals.bytesVal(doc, target);
}
@Override
public Object objectVal(int doc) {
return ifVals.boolVal(doc) ? trueVals.objectVal(doc) : falseVals.objectVal(doc);
}
@Override
public boolean exists(int doc) {
return true; // TODO: flow through to any sub-sources?
}
@Override
public ValueFiller getValueFiller() {
// TODO: we need types of trueSource / falseSource to handle this
// for now, use float.
return super.getValueFiller();
}
@Override
public String toString(int doc) {
return "if(" + ifVals.toString(doc) + ',' + trueVals.toString(doc) + ',' + falseVals.toString(doc) + ')';
}
};
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/IfFunction.java
Override
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
ifSource.createWeight(context, searcher);
trueSource.createWeight(context, searcher);
falseSource.createWeight(context, searcher);
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/MultiFloatFunction.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
final FunctionValues[] valsArr = new FunctionValues[sources.length];
for (int i=0; i<sources.length; i++) {
valsArr[i] = sources[i].getValues(context, readerContext);
}
return new FloatDocValues(this) {
@Override
public float floatVal(int doc) {
return func(doc, valsArr);
}
@Override
public String toString(int doc) {
StringBuilder sb = new StringBuilder();
sb.append(name()).append('(');
boolean firstTime=true;
for (FunctionValues vals : valsArr) {
if (firstTime) {
firstTime=false;
} else {
sb.append(',');
}
sb.append(vals.toString(doc));
}
sb.append(')');
return sb.toString();
}
};
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/MultiFloatFunction.java
Override
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
for (ValueSource source : sources)
source.createWeight(context, searcher);
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/DualFloatFunction.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
final FunctionValues aVals = a.getValues(context, readerContext);
final FunctionValues bVals = b.getValues(context, readerContext);
return new FloatDocValues(this) {
@Override
public float floatVal(int doc) {
return func(doc, aVals, bVals);
}
@Override
public String toString(int doc) {
return name() + '(' + aVals.toString(doc) + ',' + bVals.toString(doc) + ')';
}
};
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/DualFloatFunction.java
Override
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
a.createWeight(context,searcher);
b.createWeight(context,searcher);
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/IntFieldSource.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
final int[] arr = cache.getInts(readerContext.reader(), field, parser, true);
final Bits valid = cache.getDocsWithField(readerContext.reader(), field);
return new IntDocValues(this) {
final MutableValueInt val = new MutableValueInt();
@Override
public float floatVal(int doc) {
return (float)arr[doc];
}
@Override
public int intVal(int doc) {
return arr[doc];
}
@Override
public long longVal(int doc) {
return (long)arr[doc];
}
@Override
public double doubleVal(int doc) {
return (double)arr[doc];
}
@Override
public String strVal(int doc) {
return Float.toString(arr[doc]);
}
@Override
public Object objectVal(int doc) {
return valid.get(doc) ? arr[doc] : null;
}
@Override
public boolean exists(int doc) {
return valid.get(doc);
}
@Override
public String toString(int doc) {
return description() + '=' + intVal(doc);
}
@Override
public ValueSourceScorer getRangeScorer(IndexReader reader, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) {
int lower,upper;
// instead of using separate comparison functions, adjust the endpoints.
if (lowerVal==null) {
lower = Integer.MIN_VALUE;
} else {
lower = Integer.parseInt(lowerVal);
if (!includeLower && lower < Integer.MAX_VALUE) lower++;
}
if (upperVal==null) {
upper = Integer.MAX_VALUE;
} else {
upper = Integer.parseInt(upperVal);
if (!includeUpper && upper > Integer.MIN_VALUE) upper--;
}
final int ll = lower;
final int uu = upper;
return new ValueSourceScorer(reader, this) {
@Override
public boolean matchesValue(int doc) {
int val = arr[doc];
// only check for deleted if it's the default value
// if (val==0 && reader.isDeleted(doc)) return false;
return val >= ll && val <= uu;
}
};
}
@Override
public ValueFiller getValueFiller() {
return new ValueFiller() {
private final int[] intArr = arr;
private final MutableValueInt mval = new MutableValueInt();
@Override
public MutableValue getValue() {
return mval;
}
@Override
public void fillValue(int doc) {
mval.value = intArr[doc];
mval.exists = valid.get(doc);
}
};
}
};
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/BytesRefFieldSource.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
return new DocTermsIndexDocValues(this, readerContext, field) {
@Override
protected String toTerm(String readableValue) {
return readableValue;
}
@Override
public Object objectVal(int doc) {
return strVal(doc);
}
@Override
public String toString(int doc) {
return description() + '=' + strVal(doc);
}
};
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/MultiFunction.java
public static FunctionValues[] valsArr(List<ValueSource> sources, Map fcontext, AtomicReaderContext readerContext) throws IOException {
final FunctionValues[] valsArr = new FunctionValues[sources.size()];
int i=0;
for (ValueSource source : sources) {
valsArr[i++] = source.getValues(fcontext, readerContext);
}
return valsArr;
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/MultiFunction.java
Override
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
for (ValueSource source : sources)
source.createWeight(context, searcher);
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/VectorValueSource.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
int size = sources.size();
// special-case x,y and lat,lon since it's so common
if (size==2) {
final FunctionValues x = sources.get(0).getValues(context, readerContext);
final FunctionValues y = sources.get(1).getValues(context, readerContext);
return new FunctionValues() {
@Override
public void byteVal(int doc, byte[] vals) {
vals[0] = x.byteVal(doc);
vals[1] = y.byteVal(doc);
}
@Override
public void shortVal(int doc, short[] vals) {
vals[0] = x.shortVal(doc);
vals[1] = y.shortVal(doc);
}
@Override
public void intVal(int doc, int[] vals) {
vals[0] = x.intVal(doc);
vals[1] = y.intVal(doc);
}
@Override
public void longVal(int doc, long[] vals) {
vals[0] = x.longVal(doc);
vals[1] = y.longVal(doc);
}
@Override
public void floatVal(int doc, float[] vals) {
vals[0] = x.floatVal(doc);
vals[1] = y.floatVal(doc);
}
@Override
public void doubleVal(int doc, double[] vals) {
vals[0] = x.doubleVal(doc);
vals[1] = y.doubleVal(doc);
}
@Override
public void strVal(int doc, String[] vals) {
vals[0] = x.strVal(doc);
vals[1] = y.strVal(doc);
}
@Override
public String toString(int doc) {
return name() + "(" + x.toString(doc) + "," + y.toString(doc) + ")";
}
};
}
final FunctionValues[] valsArr = new FunctionValues[size];
for (int i = 0; i < size; i++) {
valsArr[i] = sources.get(i).getValues(context, readerContext);
}
return new FunctionValues() {
@Override
public void byteVal(int doc, byte[] vals) {
for (int i = 0; i < valsArr.length; i++) {
vals[i] = valsArr[i].byteVal(doc);
}
}
@Override
public void shortVal(int doc, short[] vals) {
for (int i = 0; i < valsArr.length; i++) {
vals[i] = valsArr[i].shortVal(doc);
}
}
@Override
public void floatVal(int doc, float[] vals) {
for (int i = 0; i < valsArr.length; i++) {
vals[i] = valsArr[i].floatVal(doc);
}
}
@Override
public void intVal(int doc, int[] vals) {
for (int i = 0; i < valsArr.length; i++) {
vals[i] = valsArr[i].intVal(doc);
}
}
@Override
public void longVal(int doc, long[] vals) {
for (int i = 0; i < valsArr.length; i++) {
vals[i] = valsArr[i].longVal(doc);
}
}
@Override
public void doubleVal(int doc, double[] vals) {
for (int i = 0; i < valsArr.length; i++) {
vals[i] = valsArr[i].doubleVal(doc);
}
}
@Override
public void strVal(int doc, String[] vals) {
for (int i = 0; i < valsArr.length; i++) {
vals[i] = valsArr[i].strVal(doc);
}
}
@Override
public String toString(int doc) {
StringBuilder sb = new StringBuilder();
sb.append(name()).append('(');
boolean firstTime = true;
for (FunctionValues vals : valsArr) {
if (firstTime) {
firstTime = false;
} else {
sb.append(',');
}
sb.append(vals.toString(doc));
}
sb.append(')');
return sb.toString();
}
};
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/VectorValueSource.java
Override
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
for (ValueSource source : sources)
source.createWeight(context, searcher);
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/JoinDocFreqValueSource.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException
{
final DocTerms terms = cache.getTerms(readerContext.reader(), field, PackedInts.FAST);
final IndexReader top = ReaderUtil.getTopLevelContext(readerContext).reader();
return new IntDocValues(this) {
BytesRef ref = new BytesRef();
@Override
public int intVal(int doc)
{
try {
terms.getTerm(doc, ref);
int v = top.docFreq( qfield, ref );
//System.out.println( NAME+"["+field+"="+ref.utf8ToString()+"=("+qfield+":"+v+")]" );
return v;
}
catch (IOException e) {
throw new RuntimeException("caught exception in function "+description()+" : doc="+doc, e);
}
}
};
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/DoubleConstValueSource.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
return new DoubleDocValues(this) {
@Override
public float floatVal(int doc) {
return fv;
}
@Override
public int intVal(int doc) {
return (int) lv;
}
@Override
public long longVal(int doc) {
return lv;
}
@Override
public double doubleVal(int doc) {
return constant;
}
@Override
public String strVal(int doc) {
return Double.toString(constant);
}
@Override
public Object objectVal(int doc) {
return constant;
}
@Override
public String toString(int doc) {
return description();
}
};
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/SingleFunction.java
Override
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
source.createWeight(context, searcher);
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/QueryValueSource.java
Override
public FunctionValues getValues(Map fcontext, AtomicReaderContext readerContext) throws IOException {
return new QueryDocValues(this, readerContext, fcontext);
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/QueryValueSource.java
Override
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
Weight w = searcher.createNormalizedWeight(q);
context.put(this, w);
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/NormValueSource.java
Override
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
context.put("searcher",searcher);
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/NormValueSource.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
IndexSearcher searcher = (IndexSearcher)context.get("searcher");
final TFIDFSimilarity similarity = IDFValueSource.asTFIDF(searcher.getSimilarity(), field);
if (similarity == null) {
throw new UnsupportedOperationException("requires a TFIDFSimilarity (such as DefaultSimilarity)");
}
DocValues dv = readerContext.reader().normValues(field);
if (dv == null) {
return new ConstDoubleDocValues(0.0, this);
}
final byte[] norms = (byte[]) dv.getSource().getArray();
return new FloatDocValues(this) {
@Override
public float floatVal(int doc) {
return similarity.decodeNormValue(norms[doc]);
}
};
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/FloatFieldSource.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
final float[] arr = cache.getFloats(readerContext.reader(), field, parser, true);
final Bits valid = cache.getDocsWithField(readerContext.reader(), field);
return new FloatDocValues(this) {
@Override
public float floatVal(int doc) {
return arr[doc];
}
@Override
public Object objectVal(int doc) {
return valid.get(doc) ? arr[doc] : null;
}
@Override
public boolean exists(int doc) {
return valid.get(doc);
}
@Override
public ValueFiller getValueFiller() {
return new ValueFiller() {
private final float[] floatArr = arr;
private final MutableValueFloat mval = new MutableValueFloat();
@Override
public MutableValue getValue() {
return mval;
}
@Override
public void fillValue(int doc) {
mval.value = floatArr[doc];
mval.exists = valid.get(doc);
}
};
}
};
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/ReciprocalFloatFunction.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
final FunctionValues vals = source.getValues(context, readerContext);
return new FloatDocValues(this) {
@Override
public float floatVal(int doc) {
return a/(m*vals.floatVal(doc) + b);
}
@Override
public String toString(int doc) {
return Float.toString(a) + "/("
+ m + "*float(" + vals.toString(doc) + ')'
+ '+' + b + ')';
}
};
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/ReciprocalFloatFunction.java
Override
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
source.createWeight(context, searcher);
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TermFreqValueSource.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
Fields fields = readerContext.reader().fields();
final Terms terms = fields.terms(indexedField);
return new IntDocValues(this) {
DocsEnum docs ;
int atDoc;
int lastDocRequested = -1;
{ reset(); }
public void reset() throws IOException {
// no one should call us for deleted docs?
boolean omitTF = false;
if (terms != null) {
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(indexedBytes, false)) {
docs = termsEnum.docs(null, null, true);
if (docs == null) { // omit tf
omitTF = true;
docs = termsEnum.docs(null, null, false);
}
} else {
docs = null;
}
} else {
docs = null;
}
if (docs == null) {
docs = new DocsEnum() {
@Override
public int freq() {
return 0;
}
@Override
public int docID() {
return DocIdSetIterator.NO_MORE_DOCS;
}
@Override
public int nextDoc() throws IOException {
return DocIdSetIterator.NO_MORE_DOCS;
}
@Override
public int advance(int target) throws IOException {
return DocIdSetIterator.NO_MORE_DOCS;
}
};
} else if (omitTF) {
// the docsenum won't support freq(), so return 1
final DocsEnum delegate = docs;
docs = new DocsEnum() {
@Override
public int freq() {
return 1;
}
@Override
public int docID() {
return delegate.docID();
}
@Override
public int nextDoc() throws IOException {
return delegate.nextDoc();
}
@Override
public int advance(int target) throws IOException {
return delegate.advance(target);
}
};
}
atDoc = -1;
}
@Override
public int intVal(int doc) {
try {
if (doc < lastDocRequested) {
// out-of-order access.... reset
reset();
}
lastDocRequested = doc;
if (atDoc < doc) {
atDoc = docs.advance(doc);
}
if (atDoc > doc) {
// term doesn't match this document... either because we hit the
// end, or because the next doc is after this doc.
return 0;
}
// a match!
return docs.freq();
} catch (IOException e) {
throw new RuntimeException("caught exception in function "+description()+" : doc="+doc, e);
}
}
};
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TermFreqValueSource.java
public void reset() throws IOException {
// no one should call us for deleted docs?
boolean omitTF = false;
if (terms != null) {
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(indexedBytes, false)) {
docs = termsEnum.docs(null, null, true);
if (docs == null) { // omit tf
omitTF = true;
docs = termsEnum.docs(null, null, false);
}
} else {
docs = null;
}
} else {
docs = null;
}
if (docs == null) {
docs = new DocsEnum() {
@Override
public int freq() {
return 0;
}
@Override
public int docID() {
return DocIdSetIterator.NO_MORE_DOCS;
}
@Override
public int nextDoc() throws IOException {
return DocIdSetIterator.NO_MORE_DOCS;
}
@Override
public int advance(int target) throws IOException {
return DocIdSetIterator.NO_MORE_DOCS;
}
};
} else if (omitTF) {
// the docsenum won't support freq(), so return 1
final DocsEnum delegate = docs;
docs = new DocsEnum() {
@Override
public int freq() {
return 1;
}
@Override
public int docID() {
return delegate.docID();
}
@Override
public int nextDoc() throws IOException {
return delegate.nextDoc();
}
@Override
public int advance(int target) throws IOException {
return delegate.advance(target);
}
};
}
atDoc = -1;
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TermFreqValueSource.java
Override
public int nextDoc() throws IOException {
return DocIdSetIterator.NO_MORE_DOCS;
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TermFreqValueSource.java
Override
public int advance(int target) throws IOException {
return DocIdSetIterator.NO_MORE_DOCS;
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TermFreqValueSource.java
Override
public int nextDoc() throws IOException {
return delegate.nextDoc();
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TermFreqValueSource.java
Override
public int advance(int target) throws IOException {
return delegate.advance(target);
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/SimpleFloatFunction.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
final FunctionValues vals = source.getValues(context, readerContext);
return new FloatDocValues(this) {
@Override
public float floatVal(int doc) {
return func(doc, vals);
}
@Override
public String toString(int doc) {
return name() + '(' + vals.toString(doc) + ')';
}
};
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TotalTermFreqValueSource.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
return (FunctionValues)context.get(this);
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TotalTermFreqValueSource.java
Override
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
long totalTermFreq = 0;
for (AtomicReaderContext readerContext : searcher.getTopReaderContext().leaves()) {
long val = readerContext.reader().totalTermFreq(indexedField, indexedBytes);
if (val == -1) {
totalTermFreq = -1;
break;
} else {
totalTermFreq += val;
}
}
final long ttf = totalTermFreq;
context.put(this, new LongDocValues(this) {
@Override
public long longVal(int doc) {
return ttf;
}
});
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/IDFValueSource.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
IndexSearcher searcher = (IndexSearcher)context.get("searcher");
TFIDFSimilarity sim = asTFIDF(searcher.getSimilarity(), field);
if (sim == null) {
throw new UnsupportedOperationException("requires a TFIDFSimilarity (such as DefaultSimilarity)");
}
int docfreq = searcher.getIndexReader().docFreq(new Term(indexedField, indexedBytes));
float idf = sim.idf(docfreq, searcher.getIndexReader().maxDoc());
return new ConstDoubleDocValues(idf, this);
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/ReverseOrdFieldSource.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
final IndexReader topReader = ReaderUtil.getTopLevelContext(readerContext).reader();
final AtomicReader r = topReader instanceof CompositeReader
? new SlowCompositeReaderWrapper((CompositeReader)topReader)
: (AtomicReader) topReader;
final int off = readerContext.docBase;
final FieldCache.DocTermsIndex sindex = FieldCache.DEFAULT.getTermsIndex(r, field);
final int end = sindex.numOrd();
return new IntDocValues(this) {
@Override
public int intVal(int doc) {
return (end - sindex.getOrd(doc+off));
}
};
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/RangeMapFloatFunction.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
final FunctionValues vals = source.getValues(context, readerContext);
return new FloatDocValues(this) {
@Override
public float floatVal(int doc) {
float val = vals.floatVal(doc);
return (val>=min && val<=max) ? target : (defaultVal == null ? val : defaultVal);
}
@Override
public String toString(int doc) {
return "map(" + vals.toString(doc) + ",min=" + min + ",max=" + max + ",target=" + target + ")";
}
};
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/RangeMapFloatFunction.java
Override
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
source.createWeight(context, searcher);
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/DoubleFieldSource.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
final double[] arr = cache.getDoubles(readerContext.reader(), field, parser, true);
final Bits valid = cache.getDocsWithField(readerContext.reader(), field);
return new DoubleDocValues(this) {
@Override
public double doubleVal(int doc) {
return arr[doc];
}
@Override
public boolean exists(int doc) {
return valid.get(doc);
}
@Override
public ValueSourceScorer getRangeScorer(IndexReader reader, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) {
double lower,upper;
if (lowerVal==null) {
lower = Double.NEGATIVE_INFINITY;
} else {
lower = Double.parseDouble(lowerVal);
}
if (upperVal==null) {
upper = Double.POSITIVE_INFINITY;
} else {
upper = Double.parseDouble(upperVal);
}
final double l = lower;
final double u = upper;
if (includeLower && includeUpper) {
return new ValueSourceScorer(reader, this) {
@Override
public boolean matchesValue(int doc) {
double docVal = doubleVal(doc);
return docVal >= l && docVal <= u;
}
};
}
else if (includeLower && !includeUpper) {
return new ValueSourceScorer(reader, this) {
@Override
public boolean matchesValue(int doc) {
double docVal = doubleVal(doc);
return docVal >= l && docVal < u;
}
};
}
else if (!includeLower && includeUpper) {
return new ValueSourceScorer(reader, this) {
@Override
public boolean matchesValue(int doc) {
double docVal = doubleVal(doc);
return docVal > l && docVal <= u;
}
};
}
else {
return new ValueSourceScorer(reader, this) {
@Override
public boolean matchesValue(int doc) {
double docVal = doubleVal(doc);
return docVal > l && docVal < u;
}
};
}
}
@Override
public ValueFiller getValueFiller() {
return new ValueFiller() {
private final double[] doubleArr = arr;
private final MutableValueDouble mval = new MutableValueDouble();
@Override
public MutableValue getValue() {
return mval;
}
@Override
public void fillValue(int doc) {
mval.value = doubleArr[doc];
mval.exists = valid.get(doc);
}
};
}
};
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/MaxDocValueSource.java
Override
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
context.put("searcher",searcher);
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/MaxDocValueSource.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
IndexSearcher searcher = (IndexSearcher)context.get("searcher");
return new ConstIntDocValues(searcher.getIndexReader().maxDoc(), this);
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/ConstValueSource.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
return new FloatDocValues(this) {
@Override
public float floatVal(int doc) {
return constant;
}
@Override
public int intVal(int doc) {
return (int)constant;
}
@Override
public long longVal(int doc) {
return (long)constant;
}
@Override
public double doubleVal(int doc) {
return dv;
}
@Override
public String toString(int doc) {
return description();
}
@Override
public Object objectVal(int doc) {
return constant;
}
@Override
public boolean boolVal(int doc) {
return constant != 0.0f;
}
};
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/DefFunction.java
Override
public FunctionValues getValues(Map fcontext, AtomicReaderContext readerContext) throws IOException {
return new Values(valsArr(sources, fcontext, readerContext)) {
final int upto = valsArr.length - 1;
private FunctionValues get(int doc) {
for (int i=0; i<upto; i++) {
FunctionValues vals = valsArr[i];
if (vals.exists(doc)) {
return vals;
}
}
return valsArr[upto];
}
@Override
public byte byteVal(int doc) {
return get(doc).byteVal(doc);
}
@Override
public short shortVal(int doc) {
return get(doc).shortVal(doc);
}
@Override
public float floatVal(int doc) {
return get(doc).floatVal(doc);
}
@Override
public int intVal(int doc) {
return get(doc).intVal(doc);
}
@Override
public long longVal(int doc) {
return get(doc).longVal(doc);
}
@Override
public double doubleVal(int doc) {
return get(doc).doubleVal(doc);
}
@Override
public String strVal(int doc) {
return get(doc).strVal(doc);
}
@Override
public boolean boolVal(int doc) {
return get(doc).boolVal(doc);
}
@Override
public boolean bytesVal(int doc, BytesRef target) {
return get(doc).bytesVal(doc, target);
}
@Override
public Object objectVal(int doc) {
return get(doc).objectVal(doc);
}
@Override
public boolean exists(int doc) {
// return true if any source is exists?
for (FunctionValues vals : valsArr) {
if (vals.exists(doc)) {
return true;
}
}
return false;
}
@Override
public ValueFiller getValueFiller() {
// TODO: need ValueSource.type() to determine correct type
return super.getValueFiller();
}
};
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/ValueSourceScorer.java
Override
public int nextDoc() throws IOException {
for (; ;) {
doc++;
if (doc >= maxDoc) return doc = NO_MORE_DOCS;
if (matches(doc)) return doc;
}
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/ValueSourceScorer.java
Override
public int advance(int target) throws IOException {
// also works fine when target==NO_MORE_DOCS
doc = target - 1;
return nextDoc();
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/ValueSourceScorer.java
Override
public float score() throws IOException {
return values.floatVal(doc);
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/ValueSource.java
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/ValueSource.java
public SortField getSortField(boolean reverse) throws IOException {
return new ValueSourceSortField(reverse);
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/ValueSource.java
Override
public SortField rewrite(IndexSearcher searcher) throws IOException {
Map context = newContext(searcher);
createWeight(context, searcher);
return new SortField(getField(), new ValueSourceComparatorSource(context), getReverse());
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/ValueSource.java
Override
public FieldComparator<Double> newComparator(String fieldname, int numHits,
int sortPos, boolean reversed) throws IOException {
return new ValueSourceComparator(context, numHits);
}
// in lucene/queries/src/java/org/apache/lucene/queries/function/ValueSource.java
Override
public FieldComparator setNextReader(AtomicReaderContext context) throws IOException {
docVals = getValues(fcontext, context);
return this;
}
// in lucene/queries/src/java/org/apache/lucene/queries/BoostingQuery.java
Override
public Query rewrite(IndexReader reader) throws IOException {
BooleanQuery result = new BooleanQuery() {
@Override
public Weight createWeight(IndexSearcher searcher) throws IOException {
return new BooleanWeight(searcher, false) {
@Override
public float coord(int overlap, int max) {
switch (overlap) {
case 1: // matched only one clause
return 1.0f; // use the score as-is
case 2: // matched both clauses
return boost; // multiply by boost
default:
return 0.0f;
}
}
};
}
};
result.add(match, BooleanClause.Occur.MUST);
result.add(context, BooleanClause.Occur.SHOULD);
return result;
}
// in lucene/queries/src/java/org/apache/lucene/queries/BoostingQuery.java
Override
public Weight createWeight(IndexSearcher searcher) throws IOException {
return new BooleanWeight(searcher, false) {
@Override
public float coord(int overlap, int max) {
switch (overlap) {
case 1: // matched only one clause
return 1.0f; // use the score as-is
case 2: // matched both clauses
return boost; // multiply by boost
default:
return 0.0f;
}
}
};
}
// in lucene/queries/src/java/org/apache/lucene/queries/CustomScoreProvider.java
public float customScore(int doc, float subQueryScore, float valSrcScores[]) throws IOException {
if (valSrcScores.length == 1) {
return customScore(doc, subQueryScore, valSrcScores[0]);
}
if (valSrcScores.length == 0) {
return customScore(doc, subQueryScore, 1);
}
float score = subQueryScore;
for(int i = 0; i < valSrcScores.length; i++) {
score *= valSrcScores[i];
}
return score;
}
// in lucene/queries/src/java/org/apache/lucene/queries/CustomScoreProvider.java
public float customScore(int doc, float subQueryScore, float valSrcScore) throws IOException {
return subQueryScore * valSrcScore;
}
// in lucene/queries/src/java/org/apache/lucene/queries/CustomScoreProvider.java
public Explanation customExplain(int doc, Explanation subQueryExpl, Explanation valSrcExpls[]) throws IOException {
if (valSrcExpls.length == 1) {
return customExplain(doc, subQueryExpl, valSrcExpls[0]);
}
if (valSrcExpls.length == 0) {
return subQueryExpl;
}
float valSrcScore = 1;
for (int i = 0; i < valSrcExpls.length; i++) {
valSrcScore *= valSrcExpls[i].getValue();
}
Explanation exp = new Explanation( valSrcScore * subQueryExpl.getValue(), "custom score: product of:");
exp.addDetail(subQueryExpl);
for (int i = 0; i < valSrcExpls.length; i++) {
exp.addDetail(valSrcExpls[i]);
}
return exp;
}
// in lucene/queries/src/java/org/apache/lucene/queries/CustomScoreProvider.java
public Explanation customExplain(int doc, Explanation subQueryExpl, Explanation valSrcExpl) throws IOException {
float valSrcScore = 1;
if (valSrcExpl != null) {
valSrcScore *= valSrcExpl.getValue();
}
Explanation exp = new Explanation( valSrcScore * subQueryExpl.getValue(), "custom score: product of:");
exp.addDetail(subQueryExpl);
exp.addDetail(valSrcExpl);
return exp;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java
public static void main(String[] args) throws IOException {
if (args.length<1) {
System.err.println("Usage: java QualityQueriesFinder <index-dir>");
System.exit(1);
}
QualityQueriesFinder qqf = new QualityQueriesFinder(FSDirectory.open(new File(args[0])));
String q[] = qqf.bestQueries("body",20);
for (int i=0; i<q.length; i++) {
System.out.println(newline+formatQueryAsTrecTopic(i,q[i],null,null));
}
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java
private String [] bestQueries(String field,int numQueries) throws IOException {
String words[] = bestTerms("body",4*numQueries);
int n = words.length;
int m = n/4;
String res[] = new String[m];
for (int i=0; i<res.length; i++) {
res[i] = words[i] + " " + words[m+i]+ " " + words[n-1-m-i] + " " + words[n-1-i];
//System.out.println("query["+i+"]: "+res[i]);
}
return res;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/QualityQueriesFinder.java
private String [] bestTerms(String field,int numTerms) throws IOException {
PriorityQueue<TermDf> pq = new TermsDfQueue(numTerms);
IndexReader ir = DirectoryReader.open(dir);
try {
int threshold = ir.maxDoc() / 10; // ignore words too common.
Terms terms = MultiFields.getTerms(ir, field);
if (terms != null) {
TermsEnum termsEnum = terms.iterator(null);
while (termsEnum.next() != null) {
int df = termsEnum.docFreq();
if (df<threshold) {
String ttxt = termsEnum.term().utf8ToString();
pq.insertWithOverflow(new TermDf(ttxt,df));
}
}
}
} finally {
ir.close();
}
String res[] = new String[pq.size()];
int i = 0;
while (pq.size()>0) {
TermDf tdf = pq.pop();
res[i++] = tdf.word;
System.out.println(i+". word: "+tdf.df+" "+tdf.word);
}
return res;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SubmissionReport.java
public void report(QualityQuery qq, TopDocs td, String docNameField, IndexSearcher searcher) throws IOException {
if (logger==null) {
return;
}
ScoreDoc sd[] = td.scoreDocs;
String sep = " \t ";
DocNameExtractor xt = new DocNameExtractor(docNameField);
for (int i=0; i<sd.length; i++) {
String docName = xt.docName(searcher,sd[i].doc);
logger.println(
qq.getQueryID() + sep +
"Q0" + sep +
format(docName,20) + sep +
format(""+i,7) + sep +
nf.format(sd[i].score) + sep +
name
);
}
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/DocNameExtractor.java
public String docName(IndexSearcher searcher, int docid) throws IOException {
final List<String> name = new ArrayList<String>();
searcher.getIndexReader().document(docid, new StoredFieldVisitor() {
@Override
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
name.add(value);
}
@Override
public Status needsField(FieldInfo fieldInfo) throws IOException {
if (!name.isEmpty()) {
return Status.STOP;
} else if (fieldInfo.name.equals(docNameField)) {
return Status.YES;
} else {
return Status.NO;
}
}
});
if (name.size() != 0) {
return name.get(0);
} else {
return null;
}
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/DocNameExtractor.java
Override
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
name.add(value);
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/DocNameExtractor.java
Override
public Status needsField(FieldInfo fieldInfo) throws IOException {
if (!name.isEmpty()) {
return Status.STOP;
} else if (fieldInfo.name.equals(docNameField)) {
return Status.YES;
} else {
return Status.NO;
}
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/Trec1MQReader.java
public QualityQuery[] readQueries(BufferedReader reader) throws IOException {
ArrayList<QualityQuery> res = new ArrayList<QualityQuery>();
String line;
try {
while (null!=(line=reader.readLine())) {
line = line.trim();
if (line.startsWith("#")) {
continue;
}
// id
int k = line.indexOf(":");
String id = line.substring(0,k).trim();
// qtext
String qtext = line.substring(k+1).trim();
// we got a topic!
HashMap<String,String> fields = new HashMap<String,String>();
fields.put(name,qtext);
//System.out.println("id: "+id+" qtext: "+qtext+" line: "+line);
QualityQuery topic = new QualityQuery(id,fields);
res.add(topic);
}
} finally {
reader.close();
}
// sort result array (by ID)
QualityQuery qq[] = res.toArray(new QualityQuery[0]);
Arrays.sort(qq);
return qq;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecTopicsReader.java
public QualityQuery[] readQueries(BufferedReader reader) throws IOException {
ArrayList<QualityQuery> res = new ArrayList<QualityQuery>();
StringBuilder sb;
try {
while (null!=(sb=read(reader,"<top>",null,false,false))) {
HashMap<String,String> fields = new HashMap<String,String>();
// id
sb = read(reader,"<num>",null,true,false);
int k = sb.indexOf(":");
String id = sb.substring(k+1).trim();
// title
sb = read(reader,"<title>",null,true,false);
k = sb.indexOf(">");
String title = sb.substring(k+1).trim();
// description
read(reader,"<desc>",null,false,false);
sb.setLength(0);
String line = null;
while ((line = reader.readLine()) != null) {
if (line.startsWith("<narr>"))
break;
if (sb.length() > 0) sb.append(' ');
sb.append(line);
}
String description = sb.toString().trim();
// narrative
sb.setLength(0);
while ((line = reader.readLine()) != null) {
if (line.startsWith("</top>"))
break;
if (sb.length() > 0) sb.append(' ');
sb.append(line);
}
String narrative = sb.toString().trim();
// we got a topic!
fields.put("title",title);
fields.put("description",description);
fields.put("narrative", narrative);
QualityQuery topic = new QualityQuery(id,fields);
res.add(topic);
}
} finally {
reader.close();
}
// sort result array (by ID)
QualityQuery qq[] = res.toArray(new QualityQuery[0]);
Arrays.sort(qq);
return qq;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecTopicsReader.java
private StringBuilder read (BufferedReader reader, String prefix, StringBuilder sb, boolean collectMatchLine, boolean collectAll) throws IOException {
sb = (sb==null ? new StringBuilder() : sb);
String sep = "";
while (true) {
String line = reader.readLine();
if (line==null) {
return null;
}
if (line.startsWith(prefix)) {
if (collectMatchLine) {
sb.append(sep+line);
sep = newline;
}
break;
}
if (collectAll) {
sb.append(sep+line);
sep = newline;
}
}
//System.out.println("read: "+sb);
return sb;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityBenchmark.java
private QualityStats analyzeQueryResults(QualityQuery qq, Query q, TopDocs td, Judge judge, PrintWriter logger, long searchTime) throws IOException {
QualityStats stts = new QualityStats(judge.maxRecall(qq),searchTime);
ScoreDoc sd[] = td.scoreDocs;
long t1 = System.currentTimeMillis(); // extraction of first doc name we measure also construction of doc name extractor, just in case.
DocNameExtractor xt = new DocNameExtractor(docNameField);
for (int i=0; i<sd.length; i++) {
String docName = xt.docName(searcher,sd[i].doc);
long docNameExtractTime = System.currentTimeMillis() - t1;
t1 = System.currentTimeMillis();
boolean isRelevant = judge.isRelevant(docName,qq);
stts.addResult(i+1,isRelevant, docNameExtractTime);
}
if (logger!=null) {
logger.println(qq.getQueryID()+" - "+q);
stts.log(qq.getQueryID()+" Stats:",1,logger," ");
}
return stts;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StreamUtils.java
private InputStream inputStream(InputStream in) throws IOException {
try {
return csfType==null ? in : new CompressorStreamFactory().createCompressorInputStream(csfType, in);
} catch (CompressorException e) {
IOException ioe = new IOException(e.getMessage());
ioe.initCause(e);
throw ioe; }
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StreamUtils.java
private OutputStream outputStream(OutputStream os) throws IOException {
try {
return csfType==null ? os : new CompressorStreamFactory().createCompressorOutputStream(csfType, os);
} catch (CompressorException e) {
IOException ioe = new IOException(e.getMessage());
ioe.initCause(e);
throw ioe;
}
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StreamUtils.java
public static InputStream inputStream(File file) throws IOException {
// First, create a FileInputStream, as this will be required by all types.
// Wrap with BufferedInputStream for better performance
InputStream in = new BufferedInputStream(new FileInputStream(file), BUFFER_SIZE);
return fileType(file).inputStream(in);
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StreamUtils.java
public static OutputStream outputStream(File file) throws IOException {
// First, create a FileInputStream, as this will be required by all types.
// Wrap with BufferedInputStream for better performance
OutputStream os = new BufferedOutputStream(new FileOutputStream(file), BUFFER_SIZE);
return fileType(file).outputStream(os);
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StringBuilderReader.java
private void ensureOpen() throws IOException {
if (sb == null) {
throw new IOException("Stream has already been closed");
}
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StringBuilderReader.java
Override
public void mark(int readAheadLimit) throws IOException {
if (readAheadLimit < 0){
throw new IllegalArgumentException("Read-ahead limit cannpt be negative: " + readAheadLimit);
}
synchronized (lock) {
ensureOpen();
mark = next;
}
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StringBuilderReader.java
Override
public int read() throws IOException {
synchronized (lock) {
ensureOpen();
return next >= length ? -1 : sb.charAt(next++);
}
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StringBuilderReader.java
Override
public int read(char cbuf[], int off, int len) throws IOException {
synchronized (lock) {
ensureOpen();
// Validate parameters
if (off < 0 || off > cbuf.length || len < 0 || off + len > cbuf.length) {
throw new IndexOutOfBoundsException("off=" + off + " len=" + len + " cbuf.length=" + cbuf.length);
}
if (len == 0) {
return 0;
}
if (next >= length) {
return -1;
}
int n = Math.min(length - next, len);
sb.getChars(next, next + n, cbuf, off);
next += n;
return n;
}
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StringBuilderReader.java
Override
public boolean ready() throws IOException {
synchronized (lock) {
ensureOpen();
return true;
}
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StringBuilderReader.java
Override
public void reset() throws IOException {
synchronized (lock) {
ensureOpen();
next = mark;
length = sb.length();
}
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StringBuilderReader.java
Override
public long skip(long ns) throws IOException {
synchronized (lock) {
ensureOpen();
if (next >= length) {
return 0;
}
// Bound skip by beginning and end of the source
long n = Math.min(length - next, ns);
n = Math.max(-next, n);
next += n;
return n;
}
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/FileUtils.java
public static boolean fullyDelete(File dir) throws IOException {
if (dir == null || !dir.exists()) return false;
File contents[] = dir.listFiles();
if (contents != null) {
for (int i = 0; i < contents.length; i++) {
if (contents[i].isFile()) {
if (!contents[i].delete()) {
return false;
}
} else {
if (!fullyDelete(contents[i])) {
return false;
}
}
}
}
return dir.delete();
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java
Override
public void close() throws IOException {
IOUtils.close(indexWriter, indexReader, directory,
taxonomyWriter, taxonomyReader, taxonomyDir,
docMaker, facetSource, contentSource);
// close all perf objects that are closeable.
ArrayList<Closeable> perfObjectsToClose = new ArrayList<Closeable>();
for (Object obj : perfObjects.values()) {
if (obj instanceof Closeable) {
perfObjectsToClose.add((Closeable) obj);
}
}
IOUtils.close(perfObjectsToClose);
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java
private Directory createDirectory(boolean eraseIndex, String dirName,
String dirParam) throws IOException {
if ("FSDirectory".equals(config.get(dirParam,"RAMDirectory"))) {
File workDir = new File(config.get("work.dir","work"));
File indexDir = new File(workDir,dirName);
if (eraseIndex && indexDir.exists()) {
FileUtils.fullyDelete(indexDir);
}
indexDir.mkdirs();
return FSDirectory.open(indexDir);
}
return new RAMDirectory();
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java
public synchronized void setTaxonomyReader(TaxonomyReader taxoReader) throws IOException {
if (taxoReader == this.taxonomyReader) {
return;
}
if (taxonomyReader != null) {
taxonomyReader.decRef();
}
if (taxoReader != null) {
taxoReader.incRef();
}
this.taxonomyReader = taxoReader;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java
public synchronized void setIndexReader(DirectoryReader indexReader) throws IOException {
if (indexReader == this.indexReader) {
return;
}
if (this.indexReader != null) {
// Release current IR
this.indexReader.decRef();
}
this.indexReader = indexReader;
if (indexReader != null) {
// Hold reference to new IR
indexReader.incRef();
indexSearcher = new IndexSearcher(indexReader);
} else {
indexSearcher = null;
}
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java
public void resetInputs() throws IOException {
contentSource.resetInputs();
docMaker.resetInputs();
facetSource.resetInputs();
for (final QueryMaker queryMaker : readTaskQueryMaker.values()) {
queryMaker.resetInputs();
}
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java
private void read(StringBuilder buf, String lineStart,
boolean collectMatchLine, boolean collectAll) throws IOException, NoMoreDataException {
String sep = "";
while (true) {
String line = reader.readLine();
if (line == null) {
openNextFile();
continue;
}
rawDocSize += line.length();
if (lineStart!=null && line.startsWith(lineStart)) {
if (collectMatchLine) {
buf.append(sep).append(line);
sep = NEW_LINE;
}
return;
}
if (collectAll) {
buf.append(sep).append(line);
sep = NEW_LINE;
}
}
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java
void openNextFile() throws NoMoreDataException, IOException {
close();
currPathType = null;
while (true) {
if (nextFile >= inputFiles.size()) {
// exhausted files, start a new round, unless forever set to false.
if (!forever) {
throw new NoMoreDataException();
}
nextFile = 0;
iteration++;
}
File f = inputFiles.get(nextFile++);
if (verbose) {
System.out.println("opening: " + f + " length: " + f.length());
}
try {
InputStream inputStream = StreamUtils.inputStream(f); // support either gzip, bzip2, or regular text file, by extension
reader = new BufferedReader(new InputStreamReader(inputStream, encoding), StreamUtils.BUFFER_SIZE);
currPathType = TrecDocParser.pathType(f);
return;
} catch (Exception e) {
if (verbose) {
System.out.println("Skipping 'bad' file " + f.getAbsolutePath()+" due to "+e.getMessage());
continue;
}
throw new NoMoreDataException();
}
}
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java
Override
public void close() throws IOException {
if (reader == null) {
return;
}
try {
reader.close();
} catch (IOException e) {
if (verbose) {
System.out.println("failed to close reader !");
e.printStackTrace(System.out);
}
}
reader = null;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java
Override
public DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException {
String name = null;
StringBuilder docBuf = getDocBuffer();
ParsePathType parsedPathType;
// protect reading from the TREC files by multiple threads. The rest of the
// method, i.e., parsing the content and returning the DocData can run unprotected.
synchronized (lock) {
if (reader == null) {
openNextFile();
}
// 1. skip until doc start - required for all TREC formats
docBuf.setLength(0);
read(docBuf, DOC, false, false);
// save parsedFile for passing trecDataParser after the sync block, in
// case another thread will open another file in between.
parsedPathType = currPathType;
// 2. name - required for all TREC formats
docBuf.setLength(0);
read(docBuf, DOCNO, true, false);
name = docBuf.substring(DOCNO.length(), docBuf.indexOf(TERMINATING_DOCNO,
DOCNO.length())).trim();
if (!excludeDocnameIteration) {
name = name + "_" + iteration;
}
// 3. read all until end of doc
docBuf.setLength(0);
read(docBuf, TERMINATING_DOC, false, true);
}
// count char length of text to be parsed (may be larger than the resulted plain doc body text).
addBytes(docBuf.length());
// This code segment relies on HtmlParser being thread safe. When we get
// here, everything else is already private to that thread, so we're safe.
try {
docData = trecDocParser.parse(docData, name, this, docBuf, parsedPathType);
addItem();
} catch (InterruptedException ie) {
throw new ThreadInterruptedException(ie);
}
return docData;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java
Override
public void resetInputs() throws IOException {
synchronized (lock) {
super.resetInputs();
close();
nextFile = 0;
iteration = 0;
}
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecParserByPath.java
Override
public DocData parse(DocData docData, String name, TrecContentSource trecSrc,
StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException {
return pathType2parser.get(pathType).parse(docData, name, trecSrc, docBuf, pathType);
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentItemsSource.java
public void resetInputs() throws IOException {
bytesCount = 0;
itemCount = 0;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecGov2Parser.java
Override
public DocData parse(DocData docData, String name, TrecContentSource trecSrc,
StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException {
// Set up a (per-thread) reused Reader over the read content, reset it to re-read from docBuf
Reader r = trecSrc.getTrecDocReader(docBuf);
// skip some of the text, optionally set date
Date date = null;
int h1 = docBuf.indexOf(DOCHDR);
if (h1>=0) {
int h2 = docBuf.indexOf(TERMINATING_DOCHDR,h1);
String dateStr = extract(docBuf, DATE, DATE_END, h2, null);
if (dateStr != null) {
date = trecSrc.parseDate(dateStr);
}
r.mark(h2+TERMINATING_DOCHDR_LENGTH);
}
r.reset();
HTMLParser htmlParser = trecSrc.getHtmlParser();
return htmlParser.parse(docData, name, date, null, r, null);
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecLATimesParser.java
Override
public DocData parse(DocData docData, String name, TrecContentSource trecSrc,
StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException {
int mark = 0; // that much is skipped
// date...
Date date = null;
String dateStr = extract(docBuf, DATE, DATE_END, -1, null);
if (dateStr != null) {
int d2a = dateStr.indexOf(DATE_NOISE);
if (d2a > 0) {
dateStr = dateStr.substring(0,d2a+3); // we need the "day" part
}
dateStr = stripTags(dateStr,0).toString();
date = trecSrc.parseDate(dateStr.trim());
}
// title... first try with SUBJECT, them with HEADLINE
String title = extract(docBuf, SUBJECT, SUBJECT_END, -1, null);
if (title==null) {
title = extract(docBuf, HEADLINE, HEADLINE_END, -1, null);
}
if (title!=null) {
title = stripTags(title,0).toString().trim();
}
docData.clear();
docData.setName(name);
docData.setDate(date);
docData.setTitle(title);
docData.setBody(stripTags(docBuf, mark).toString());
return docData;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java
Override
public void close() throws IOException {
if (reader != null) {
reader.close();
reader = null;
}
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java
Override
public DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException {
final String line;
final int myID;
synchronized(this) {
line = reader.readLine();
if (line == null) {
if (!forever) {
throw new NoMoreDataException();
}
// Reset the file
openFile();
return getNextDocData(docData);
}
if (docDataLineReader == null) { // first line ever, one time initialization,
docDataLineReader = createDocDataLineReader(line);
if (skipHeaderLine) {
return getNextDocData(docData);
}
}
// increment IDS only once...
myID = readCount++;
}
// The date String was written in the format of DateTools.dateToString.
docData.clear();
docData.setID(myID);
docDataLineReader.parseLine(docData, line);
return docData;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LineDocSource.java
Override
public void resetInputs() throws IOException {
super.resetInputs();
openFile();
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/SimpleCharStream.java
protected void FillBuff() throws java.io.IOException
{
if (maxNextCharInd == available)
{
if (available == bufsize)
{
if (tokenBegin > 2048)
{
bufpos = maxNextCharInd = 0;
available = tokenBegin;
}
else if (tokenBegin < 0)
bufpos = maxNextCharInd = 0;
else
ExpandBuff(false);
}
else if (available > tokenBegin)
available = bufsize;
else if ((tokenBegin - available) < 2048)
ExpandBuff(true);
else
available = tokenBegin;
}
int i;
try {
if ((i = inputStream.read(buffer, maxNextCharInd,
available - maxNextCharInd)) == -1)
{
inputStream.close();
throw new java.io.IOException();
}
else
maxNextCharInd += i;
return;
}
catch(java.io.IOException e) {
--bufpos;
backup(0);
if (tokenBegin == -1)
tokenBegin = bufpos;
throw e;
}
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/SimpleCharStream.java
public char BeginToken() throws java.io.IOException
{
tokenBegin = -1;
char c = readChar();
tokenBegin = bufpos;
return c;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/SimpleCharStream.java
public char readChar() throws java.io.IOException
{
if (inBuf > 0)
{
--inBuf;
if (++bufpos == bufsize)
bufpos = 0;
return buffer[bufpos];
}
if (++bufpos >= maxNextCharInd)
FillBuff();
char c = buffer[bufpos];
UpdateLineColumn(c);
return c;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.java
public boolean full() throws IOException{
return this.available() >= PipedInputStream.PIPE_SIZE;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.java
public String getTitle() throws IOException, InterruptedException {
if (pipeIn == null)
getReader(); // spawn parsing thread
while (true) {
synchronized(this) {
if (titleComplete || pipeInStream.full())
break;
wait(10);
}
}
return title.toString().trim();
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.java
public Properties getMetaTags() throws IOException,
InterruptedException {
if (pipeIn == null)
getReader(); // spawn parsing thread
while (true) {
synchronized(this) {
if (titleComplete || pipeInStream.full())
break;
wait(10);
}
}
return metaTags;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.java
public String getSummary() throws IOException, InterruptedException {
if (pipeIn == null)
getReader(); // spawn parsing thread
while (true) {
synchronized(this) {
if (summary.length() >= SUMMARY_LENGTH || pipeInStream.full())
break;
wait(10);
}
}
if (summary.length() > SUMMARY_LENGTH)
summary.setLength(SUMMARY_LENGTH);
String sum = summary.toString().trim();
String tit = getTitle();
if (sum.equals(""))
return tit;
else
return sum;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.java
public Reader getReader() throws IOException {
if (pipeIn == null) {
pipeInStream = new MyPipedInputStream();
pipeOutStream = new PipedOutputStream(pipeInStream);
pipeIn = new InputStreamReader(pipeInStream, "UTF-16BE");
pipeOut = new OutputStreamWriter(pipeOutStream, "UTF-16BE");
Thread thread = new ParserThread(this);
thread.start(); // start parsing
}
return pipeIn;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.java
void addText(String text) throws IOException {
if (inStyle)
return;
if (inTitle)
title.append(text);
else {
addToSummary(text);
if (!titleComplete && !(title.length() == 0)) { // finished title
synchronized(this) {
titleComplete = true; // tell waiting threads
notifyAll();
}
}
}
length += text.length();
pipeOut.write(text);
afterSpace = false;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.java
void addSpace() throws IOException {
if (!afterSpace) {
if (inTitle)
title.append(" ");
else
addToSummary(" ");
String space = afterTag ? eol : " ";
length += space.length();
pipeOut.write(space);
afterSpace = true;
}
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.java
final public void HTMLDocument() throws ParseException, IOException {
Token t;
label_1:
while (true) {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case ScriptStart:
case TagName:
case DeclName:
case Comment1:
case Comment2:
case Word:
case Entity:
case Space:
case Punct:
;
break;
default:
jj_la1[0] = jj_gen;
break label_1;
}
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case TagName:
Tag();
afterTag = true;
break;
case DeclName:
t = Decl();
afterTag = true;
break;
case Comment1:
case Comment2:
CommentTag();
afterTag = true;
break;
case ScriptStart:
ScriptTag();
afterTag = true;
break;
case Word:
t = jj_consume_token(Word);
addText(t.image); afterTag = false;
break;
case Entity:
t = jj_consume_token(Entity);
addText(Entities.decode(t.image)); afterTag = false;
break;
case Punct:
t = jj_consume_token(Punct);
addText(t.image); afterTag = false;
break;
case Space:
jj_consume_token(Space);
addSpace(); afterTag = false;
break;
default:
jj_la1[1] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
}
jj_consume_token(0);
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.java
final public void Tag() throws ParseException, IOException {
Token t1, t2;
boolean inImg = false;
t1 = jj_consume_token(TagName);
String tagName = t1.image.toLowerCase(Locale.ENGLISH);
if(Tags.WS_ELEMS.contains(tagName) ) {
addSpace();
}
inTitle = tagName.equalsIgnoreCase("<title"); // keep track if in <TITLE>
inMetaTag = tagName.equalsIgnoreCase("<META"); // keep track if in <META>
inStyle = tagName.equalsIgnoreCase("<STYLE"); // keep track if in <STYLE>
inImg = tagName.equalsIgnoreCase("<img"); // keep track if in <IMG>
label_2:
while (true) {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case ArgName:
;
break;
default:
jj_la1[2] = jj_gen;
break label_2;
}
t1 = jj_consume_token(ArgName);
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case ArgEquals:
jj_consume_token(ArgEquals);
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case ArgValue:
case ArgQuote1:
case ArgQuote2:
t2 = ArgValue();
if (inImg && t1.image.equalsIgnoreCase("alt") && t2 != null)
addText("[" + t2.image + "]");
if(inMetaTag &&
( t1.image.equalsIgnoreCase("name") ||
t1.image.equalsIgnoreCase("HTTP-EQUIV")
)
&& t2 != null)
{
currentMetaTag=t2.image.toLowerCase(Locale.ENGLISH);
if(currentMetaTag != null && currentMetaContent != null) {
addMetaTag();
}
}
if(inMetaTag && t1.image.equalsIgnoreCase("content") && t2 !=
null)
{
currentMetaContent=t2.image.toLowerCase(Locale.ENGLISH);
if(currentMetaTag != null && currentMetaContent != null) {
addMetaTag();
}
}
break;
default:
jj_la1[3] = jj_gen;
;
}
break;
default:
jj_la1[4] = jj_gen;
;
}
}
jj_consume_token(TagEnd);
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java
Override
public void close() throws IOException {
synchronized (EnwikiContentSource.this) {
if (is != null) {
is.close();
is = null;
}
}
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java
Override
public synchronized DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException {
String[] tuple = parser.next();
docData.clear();
docData.setName(tuple[ID]);
docData.setBody(tuple[BODY]);
docData.setDate(tuple[DATE]);
docData.setTitle(tuple[TITLE]);
return docData;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java
Override
public void resetInputs() throws IOException {
super.resetInputs();
is = StreamUtils.inputStream(file);
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishContentSource.java
Override
public void close() throws IOException {
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishContentSource.java
Override
public synchronized DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException {
docData.clear();
// store the current counter to avoid synchronization later on
long curCounter;
synchronized (this) {
curCounter = counter;
if (counter == Long.MAX_VALUE){
counter = Long.MIN_VALUE;//loop around
} else {
++counter;
}
}
docData.setBody(rnbf.format(curCounter));
docData.setName("doc_" + String.valueOf(curCounter));
docData.setTitle("title_" + String.valueOf(curCounter));
docData.setDate(new Date());
return docData;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishContentSource.java
Override
public void resetInputs() throws IOException {
counter = Long.MIN_VALUE + 10;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/RandomFacetSource.java
Override
public CategoryContainer getNextFacets(CategoryContainer facets) throws NoMoreDataException, IOException {
if (facets == null) {
facets = new CategoryContainer();
} else {
facets.clear();
}
int numFacets = 1 + random.nextInt(maxDocFacets-1); // at least one facet to each doc
for (int i=0; i<numFacets; i++) {
CategoryPath cp = new CategoryPath();
int depth = 1 + random.nextInt(maxFacetDepth-1); // depth 0 is not useful
for (int k=0; k<depth; k++) {
cp.add(Integer.toString(random.nextInt(maxValue)));
addItem();
}
facets.addCategory(cp);
addBytes(cp.toString().length()); // very rough approximation
}
return facets;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/RandomFacetSource.java
Override
public void close() throws IOException {
// nothing to do here
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFR94Parser.java
Override
public DocData parse(DocData docData, String name, TrecContentSource trecSrc,
StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException {
int mark = 0; // that much is skipped
// optionally skip some of the text, set date (no title?)
Date date = null;
int h1 = docBuf.indexOf(TEXT);
if (h1>=0) {
int h2 = docBuf.indexOf(TEXT_END,h1);
mark = h1+TEXT_LENGTH;
// date...
String dateStr = extract(docBuf, DATE, DATE_END, h2, DATE_NOISE_PREFIXES);
if (dateStr != null) {
dateStr = stripTags(dateStr,0).toString();
date = trecSrc.parseDate(dateStr.trim());
}
}
docData.clear();
docData.setName(name);
docData.setDate(date);
docData.setBody(stripTags(docBuf, mark).toString());
return docData;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FacetSource.java
Override
public void resetInputs() throws IOException {
printStatistics("facets");
// re-initiate since properties by round may have changed.
setConfig(getConfig());
super.resetInputs();
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DemoHTMLParser.java
public DocData parse(DocData docData, String name, Date date, String title, Reader reader, DateFormat dateFormat) throws IOException, InterruptedException {
org.apache.lucene.benchmark.byTask.feeds.demohtml.HTMLParser p = new org.apache.lucene.benchmark.byTask.feeds.demohtml.HTMLParser(reader);
// title
if (title==null) {
title = p.getTitle();
}
// properties
Properties props = p.getMetaTags();
// body
Reader r = p.getReader();
char c[] = new char[1024];
StringBuilder bodyBuf = new StringBuilder();
int n;
while ((n = r.read(c)) >= 0) {
if (n>0) {
bodyBuf.append(c,0,n);
}
}
r.close();
if (date == null && props.getProperty("date")!=null) {
try {
date = dateFormat.parse(props.getProperty("date").trim());
} catch (ParseException e) {
// do not fail test just because a date could not be parsed
System.out.println("ignoring date parse exception (assigning 'now') for: "+props.getProperty("date"));
date = new Date(); // now
}
}
docData.clear();
docData.setName(name);
docData.setBody(bodyBuf.toString());
docData.setTitle(title);
docData.setProps(props);
docData.setDate(date);
return docData;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
Override
public void close() throws IOException {
source.close();
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
public synchronized void resetInputs() throws IOException {
source.printStatistics("docs");
// re-initiate since properties by round may have changed.
setConfig(config, source);
source.resetInputs();
numDocsCreated.set(0);
resetLeftovers();
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SingleDocSource.java
Override
public void close() throws IOException {}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SingleDocSource.java
Override
public synchronized void resetInputs() throws IOException {
super.resetInputs();
docID = 0;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersContentSource.java
Override
public void close() throws IOException {
// TODO implement?
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersContentSource.java
Override
public DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException {
File f = null;
String name = null;
synchronized (this) {
if (nextFile >= inputFiles.size()) {
// exhausted files, start a new round, unless forever set to false.
if (!forever) {
throw new NoMoreDataException();
}
nextFile = 0;
iteration++;
}
f = inputFiles.get(nextFile++);
name = f.getCanonicalPath() + "_" + iteration;
}
BufferedReader reader = new BufferedReader(new FileReader(f));
try {
// First line is the date, 3rd is the title, rest is body
String dateStr = reader.readLine();
reader.readLine();// skip an empty line
String title = reader.readLine();
reader.readLine();// skip an empty line
StringBuilder bodyBuf = new StringBuilder(1024);
String line = null;
while ((line = reader.readLine()) != null) {
bodyBuf.append(line).append(' ');
}
reader.close();
addBytes(f.length());
Date date = parseDate(dateStr.trim());
docData.clear();
docData.setName(name);
docData.setBody(bodyBuf.toString());
docData.setTitle(title);
docData.setDate(date);
return docData;
} finally {
reader.close();
}
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersContentSource.java
Override
public synchronized void resetInputs() throws IOException {
super.resetInputs();
nextFile = 0;
iteration = 0;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DirContentSource.java
Override
public void close() throws IOException {
inputFiles = null;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DirContentSource.java
Override
public DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException {
File f = null;
String name = null;
synchronized (this) {
if (!inputFiles.hasNext()) {
// exhausted files, start a new round, unless forever set to false.
if (!forever) {
throw new NoMoreDataException();
}
inputFiles = new Iterator(dataDir);
iteration++;
}
f = inputFiles.next();
// System.err.println(f);
name = f.getCanonicalPath()+"_"+iteration;
}
BufferedReader reader = new BufferedReader(new FileReader(f));
String line = null;
//First line is the date, 3rd is the title, rest is body
String dateStr = reader.readLine();
reader.readLine();//skip an empty line
String title = reader.readLine();
reader.readLine();//skip an empty line
StringBuilder bodyBuf = new StringBuilder(1024);
while ((line = reader.readLine()) != null) {
bodyBuf.append(line).append(' ');
}
reader.close();
addBytes(f.length());
Date date = parseDate(dateStr);
docData.clear();
docData.setName(name);
docData.setBody(bodyBuf.toString());
docData.setTitle(title);
docData.setDate(date);
return docData;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DirContentSource.java
Override
public synchronized void resetInputs() throws IOException {
super.resetInputs();
inputFiles = new Iterator(dataDir);
iteration = 0;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFBISParser.java
Override
public DocData parse(DocData docData, String name, TrecContentSource trecSrc,
StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException {
int mark = 0; // that much is skipped
// optionally skip some of the text, set date, title
Date date = null;
String title = null;
int h1 = docBuf.indexOf(HEADER);
if (h1>=0) {
int h2 = docBuf.indexOf(HEADER_END,h1);
mark = h2+HEADER_END_LENGTH;
// date...
String dateStr = extract(docBuf, DATE1, DATE1_END, h2, null);
if (dateStr != null) {
date = trecSrc.parseDate(dateStr);
}
// title...
title = extract(docBuf, TI, TI_END, h2, null);
}
docData.clear();
docData.setName(name);
docData.setDate(date);
docData.setTitle(title);
docData.setBody(stripTags(docBuf, mark).toString());
return docData;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecFTParser.java
Override
public DocData parse(DocData docData, String name, TrecContentSource trecSrc,
StringBuilder docBuf, ParsePathType pathType) throws IOException, InterruptedException {
int mark = 0; // that much is skipped
// date...
Date date = null;
String dateStr = extract(docBuf, DATE, DATE_END, -1, null);
if (dateStr != null) {
date = trecSrc.parseDate(dateStr);
}
// title...
String title = extract(docBuf, HEADLINE, HEADLINE_END, -1, null);
docData.clear();
docData.setName(name);
docData.setDate(date);
docData.setTitle(title);
docData.setBody(stripTags(docBuf, mark).toString());
return docData;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyReaderTask.java
Override
public int doLogic() throws IOException {
TaxonomyReader taxoReader = getRunData().getTaxonomyReader();
getRunData().setTaxonomyReader(null);
if (taxoReader.getRefCount() != 1) {
System.out.println("WARNING: CloseTaxonomyReader: reference count is currently " + taxoReader.getRefCount());
}
taxoReader.close();
return 1;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyReaderTask.java
Override
public int doLogic() throws IOException {
PerfRunData runData = getRunData();
DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(runData.getTaxonomyDir());
runData.setTaxonomyReader(taxoReader);
// We transfer reference to the run data
taxoReader.decRef();
return 1;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java
Override
public int doLogic() throws IOException {
PerfRunData runData = getRunData();
Config config = runData.getConfig();
runData.setIndexWriter(configureWriter(config, runData, OpenMode.CREATE, null));
return 1;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java
public static IndexWriter configureWriter(Config config, PerfRunData runData, OpenMode mode, IndexCommit commit) throws CorruptIndexException, LockObtainFailedException, IOException {
IndexWriterConfig iwc = createWriterConfig(config, runData, mode, commit);
String infoStreamVal = config.get("writer.info.stream", null);
if (infoStreamVal != null) {
if (infoStreamVal.equals("SystemOut")) {
iwc.setInfoStream(System.out);
} else if (infoStreamVal.equals("SystemErr")) {
iwc.setInfoStream(System.err);
} else {
File f = new File(infoStreamVal).getAbsoluteFile();
iwc.setInfoStream(new PrintStream(new BufferedOutputStream(new FileOutputStream(f))));
}
}
IndexWriter writer = new IndexWriter(runData.getDirectory(), iwc);
return writer;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseIndexTask.java
Override
public int doLogic() throws IOException {
IndexWriter iw = getRunData().getIndexWriter();
if (iw != null) {
// If infoStream was set to output to a file, close it.
InfoStream infoStream = iw.getConfig().getInfoStream();
if (infoStream != null) {
infoStream.close();
}
iw.close(doWait);
getRunData().setIndexWriter(null);
}
return 1;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyIndexTask.java
Override
public int doLogic() throws IOException {
PerfRunData runData = getRunData();
runData.setTaxonomyWriter(new DirectoryTaxonomyWriter(runData.getTaxonomyDir()));
return 1;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenReaderTask.java
Override
public int doLogic() throws IOException {
Directory dir = getRunData().getDirectory();
DirectoryReader r = null;
if (commitUserData != null) {
r = DirectoryReader.open(OpenReaderTask.findIndexCommit(dir, commitUserData));
} else {
r = DirectoryReader.open(dir);
}
getRunData().setIndexReader(r);
// We transfer reference to the run data
r.decRef();
return 1;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenReaderTask.java
public static IndexCommit findIndexCommit(Directory dir, String userData) throws IOException {
Collection<IndexCommit> commits = DirectoryReader.listCommits(dir);
for (final IndexCommit ic : commits) {
Map<String,String> map = ic.getUserData();
String ud = null;
if (map != null) {
ud = map.get(USER_DATA);
}
if (ud != null && ud.equals(userData)) {
return ic;
}
}
throw new IOException("index does not contain commit with userData: " + userData);
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/RollbackIndexTask.java
Override
public int doLogic() throws IOException {
IndexWriter iw = getRunData().getIndexWriter();
if (iw != null) {
// If infoStream was set to output to a file, close it.
InfoStream infoStream = iw.getConfig().getInfoStream();
if (infoStream != null) {
infoStream.close();
}
iw.rollback();
getRunData().setIndexWriter(null);
}
return 1;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyIndexTask.java
Override
public int doLogic() throws IOException {
IOUtils.close(getRunData().getTaxonomyWriter());
getRunData().setTaxonomyWriter(null);
return 1;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetLoadFieldSelectorTask.java
Override
protected Document retrieveDoc(IndexReader ir, int id) throws IOException {
if (fieldsToLoad == null) {
return ir.document(id);
} else {
DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(fieldsToLoad);
ir.document(id, visitor);
return visitor.getDocument();
}
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenIndexTask.java
Override
public int doLogic() throws IOException {
PerfRunData runData = getRunData();
Config config = runData.getConfig();
final IndexCommit ic;
if (commitUserData != null) {
ic = OpenReaderTask.findIndexCommit(runData.getDirectory(), commitUserData);
} else {
ic = null;
}
final IndexWriter writer = CreateIndexTask.configureWriter(config, runData, OpenMode.APPEND, ic);
runData.setIndexWriter(writer);
return 1;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java
protected Document retrieveDoc(IndexReader ir, int id) throws IOException {
return ir.document(id);
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewAnalyzerTask.java
Override
public int doLogic() throws IOException {
String className = null;
try {
if (current >= analyzerClassNames.size()) {
current = 0;
}
className = analyzerClassNames.get(current++);
Analyzer analyzer = null;
if (null == className || 0 == className.length()) {
className = "org.apache.lucene.analysis.standard.StandardAnalyzer";
}
if (-1 == className.indexOf(".")) {
try {
// If no package, first attempt to instantiate a core analyzer
String coreClassName = "org.apache.lucene.analysis.core." + className;
analyzer = createAnalyzer(coreClassName);
className = coreClassName;
} catch (ClassNotFoundException e) {
// If not a core analyzer, try the base analysis package
className = "org.apache.lucene.analysis." + className;
analyzer = createAnalyzer(className);
}
} else {
if (className.startsWith("standard.")) {
className = "org.apache.lucene.analysis." + className;
}
analyzer = createAnalyzer(className);
}
getRunData().setAnalyzer(analyzer);
System.out.println("Changed Analyzer to: " + className);
} catch (Exception e) {
throw new RuntimeException("Error creating Analyzer: " + className, e);
}
return 1;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseReaderTask.java
Override
public int doLogic() throws IOException {
IndexReader reader = getRunData().getIndexReader();
getRunData().setIndexReader(null);
if (reader.getRefCount() != 1) {
System.out.println("WARNING: CloseReader: reference count is currently " + reader.getRefCount());
}
reader.decRef();
return 1;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReopenReaderTask.java
Override
public int doLogic() throws IOException {
DirectoryReader r = getRunData().getIndexReader();
DirectoryReader nr = DirectoryReader.openIfChanged(r);
if (nr != null) {
getRunData().setIndexReader(nr);
nr.decRef();
}
r.decRef();
return 1;
}
// in lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateTaxonomyIndexTask.java
Override
public int doLogic() throws IOException {
PerfRunData runData = getRunData();
runData.setTaxonomyWriter(new DirectoryTaxonomyWriter(runData.getTaxonomyDir(), OpenMode.CREATE));
return 1;
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/CategoryPath.java
public void appendTo(Appendable out, char delimiter) throws IOException {
if (ncomponents == 0) {
return; // just append nothing...
}
for (int i = 0; i < ends[0]; i++) {
out.append(chars[i]);
}
for (int j = 1; j < ncomponents; j++) {
out.append(delimiter);
for (int i = ends[j - 1]; i < ends[j]; i++) {
out.append(chars[i]);
}
}
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/CategoryPath.java
public void appendTo(Appendable out, char delimiter, int prefixLen)
throws IOException {
if (prefixLen < 0 || prefixLen > ncomponents) {
prefixLen = ncomponents;
}
if (prefixLen == 0) {
return; // just append nothing...
}
for (int i = 0; i < ends[0]; i++) {
out.append(chars[i]);
}
for (int j = 1; j < prefixLen; j++) {
out.append(delimiter);
for (int i = ends[j - 1]; i < ends[j]; i++) {
out.append(chars[i]);
}
}
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/CategoryPath.java
public void appendTo(Appendable out, char delimiter, int start, int end)
throws IOException {
if (start < 0) {
start = 0;
}
if (end < 0 || end > ncomponents) {
end = ncomponents;
}
if (end <= start) {
return; // just append nothing...
}
for (int i = (start == 0 ? 0 : ends[start - 1]); i < ends[start]; i++) {
out.append(chars[i]);
}
for (int j = start + 1; j < end; j++) {
out.append(delimiter);
for (int i = ends[j - 1]; i < ends[j]; i++) {
out.append(chars[i]);
}
}
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/CategoryPath.java
public void serializeAppendTo(Appendable out) throws IOException {
// Note that we use the fact that ncomponents and ends[] are shorts,
// so we can write them as chars:
out.append((char) ncomponents);
if (ncomponents == 0) {
return;
}
for (int i = 0; i < ncomponents; i++) {
out.append((char) ends[i]);
}
int usedchars = ends[ncomponents - 1];
for (int i = 0; i < usedchars; i++) {
out.append(chars[i]);
}
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/CategoryPath.java
public void serializeAppendTo(int prefixLen, Appendable out)
throws IOException {
if (prefixLen < 0 || prefixLen > ncomponents) {
prefixLen = ncomponents;
}
// Note that we use the fact that ncomponents and ends[] are shorts,
// so we can write them as chars:
out.append((char) prefixLen);
if (prefixLen == 0) {
return;
}
for (int i = 0; i < prefixLen; i++) {
out.append((char) ends[i]);
}
int usedchars = ends[prefixLen - 1];
for (int i = 0; i < usedchars; i++) {
out.append(chars[i]);
}
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/CategoryPath.java
public void serializeToStreamWriter(OutputStreamWriter osw)
throws IOException {
osw.write(this.ncomponents);
if (this.ncomponents <= 0) {
return;
}
for (int j = 0; j < this.ncomponents; j++) {
osw.write(this.ends[j]);
}
osw.write(this.chars, 0, this.ends[this.ncomponents - 1]);
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/CategoryPath.java
public void deserializeFromStreamReader(InputStreamReader isr)
throws IOException {
this.ncomponents = (short) isr.read();
if (this.ncomponents <= 0) {
return;
}
if (this.ends == null || this.ends.length < this.ncomponents) {
this.ends = new short[this.ncomponents];
}
for (int j = 0; j < this.ncomponents; j++) {
this.ends[j] = (short) isr.read();
}
if (this.chars == null
|| this.ends[this.ncomponents - 1] > chars.length) {
this.chars = new char[this.ends[this.ncomponents - 1]];
}
isr.read(this.chars, 0, this.ends[this.ncomponents - 1]);
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/CategoryPath.java
private void writeObject(java.io.ObjectOutputStream out)
throws IOException {
OutputStreamWriter osw = new OutputStreamWriter(out, "UTF-8");
this.serializeToStreamWriter(osw);
osw.flush();
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/CategoryPath.java
private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException {
InputStreamReader isr = new InputStreamReader(in, "UTF-8");
this.deserializeFromStreamReader(isr);
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
private static Map<String, String> readCommitData(Directory dir) throws IOException {
SegmentInfos infos = new SegmentInfos();
infos.read(dir);
return infos.getUserData();
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
public static void unlock(Directory directory) throws IOException {
IndexWriter.unlock(directory);
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
protected IndexWriter openIndexWriter(Directory directory, IndexWriterConfig config)
throws IOException {
return new IndexWriter(directory, config);
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
private synchronized void openInternalReader() throws IOException {
// verify that the taxo-writer hasn't been closed on us. the method is
// synchronized since it may be called from a non sync'ed block, and it
// needs to protect against close() happening concurrently.
ensureOpen();
assert reader == null : "a reader is already open !";
reader = DirectoryReader.open(indexWriter, false);
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
Override
public synchronized void close() throws CorruptIndexException, IOException {
if (indexWriter != null) {
indexWriter.commit(combinedCommitData(null));
doClose();
}
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
private void doClose() throws CorruptIndexException, IOException {
indexWriter.close();
indexWriter = null;
closeResources();
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
protected synchronized void closeResources() throws IOException {
if (reader != null) {
reader.close();
reader = null;
}
if (cache != null) {
cache.close();
cache = null;
}
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
protected int findCategory(CategoryPath categoryPath) throws IOException {
// If we can find the category in our cache, we can return the
// response directly from it:
int res = cache.get(categoryPath);
if (res >= 0) {
return res;
}
// If we know that the cache is complete, i.e., contains every category
// which exists, we can return -1 immediately. However, if the cache is
// not complete, we need to check the disk.
if (cacheIsComplete) {
return -1;
}
cacheMisses++;
// After a few cache misses, it makes sense to read all the categories
// from disk and into the cache. The reason not to do this on the first
// cache miss (or even when opening the writer) is that it will
// significantly slow down the case when a taxonomy is opened just to
// add one category. The idea only spending a long time on reading
// after enough time was spent on cache misses is known as a "online
// algorithm".
if (perhapsFillCache()) {
return cache.get(categoryPath);
}
// We need to get an answer from the on-disk index. If a reader
// is not yet open, do it now:
if (reader == null) {
openInternalReader();
}
int base = 0;
int doc = -1;
for (AtomicReader r : reader.getSequentialSubReaders()) {
DocsEnum docs = r.termDocsEnum(null, Consts.FULL,
new BytesRef(categoryPath.toString(delimiter)), false);
if (docs != null) {
doc = docs.nextDoc() + base;
break;
}
base += r.maxDoc(); // we don't have deletions, so it's ok to call maxDoc
}
// Note: we do NOT add to the cache the fact that the category
// does not exist. The reason is that our only use for this
// method is just before we actually add this category. If
// in the future this usage changes, we should consider caching
// the fact that the category is not in the taxonomy.
if (doc > 0) {
addToCache(categoryPath, doc);
}
return doc;
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
private int findCategory(CategoryPath categoryPath, int prefixLen)
throws IOException {
int res = cache.get(categoryPath, prefixLen);
if (res >= 0) {
return res;
}
if (cacheIsComplete) {
return -1;
}
cacheMisses++;
if (perhapsFillCache()) {
return cache.get(categoryPath, prefixLen);
}
if (reader == null) {
openInternalReader();
}
int base = 0;
int doc = -1;
for (AtomicReader r : reader.getSequentialSubReaders()) {
DocsEnum docs = r.termDocsEnum(null, Consts.FULL,
new BytesRef(categoryPath.toString(delimiter, prefixLen)), false);
if (docs != null) {
doc = docs.nextDoc() + base;
break;
}
base += r.maxDoc(); // we don't have deletions, so it's ok to call maxDoc
}
if (doc > 0) {
addToCache(categoryPath, prefixLen, doc);
}
return doc;
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
Override
public int addCategory(CategoryPath categoryPath) throws IOException {
ensureOpen();
// If the category is already in the cache and/or the taxonomy, we
// should return its existing ordinal
int res = findCategory(categoryPath);
if (res < 0) {
// the category is neither in the cache nor in the index - following code
// cannot be executed in parallel.
synchronized (this) {
res = findCategory(categoryPath);
if (res < 0) {
// This is a new category, and we need to insert it into the index
// (and the cache). Actually, we might also need to add some of
// the category's ancestors before we can add the category itself
// (while keeping the invariant that a parent is always added to
// the taxonomy before its child). internalAddCategory() does all
// this recursively
res = internalAddCategory(categoryPath, categoryPath.length());
}
}
}
return res;
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
private int internalAddCategory(CategoryPath categoryPath, int length)
throws IOException {
// Find our parent's ordinal (recursively adding the parent category
// to the taxonomy if it's not already there). Then add the parent
// ordinal as payloads (rather than a stored field; payloads can be
// more efficiently read into memory in bulk by LuceneTaxonomyReader)
int parent;
if (length > 1) {
parent = findCategory(categoryPath, length - 1);
if (parent < 0) {
parent = internalAddCategory(categoryPath, length - 1);
}
} else if (length == 1) {
parent = TaxonomyReader.ROOT_ORDINAL;
} else {
parent = TaxonomyReader.INVALID_ORDINAL;
}
int id = addCategoryDocument(categoryPath, length, parent);
return id;
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
private int addCategoryDocument(CategoryPath categoryPath, int length,
int parent) throws IOException {
// Before Lucene 2.9, position increments >=0 were supported, so we
// added 1 to parent to allow the parent -1 (the parent of the root).
// Unfortunately, starting with Lucene 2.9, after LUCENE-1542, this is
// no longer enough, since 0 is not encoded consistently either (see
// comment in SinglePositionTokenStream). But because we must be
// backward-compatible with existing indexes, we can't just fix what
// we write here (e.g., to write parent+2), and need to do a workaround
// in the reader (which knows that anyway only category 0 has a parent
// -1).
parentStream.set(parent + 1);
Document d = new Document();
d.add(parentStreamField);
fullPathField.setStringValue(categoryPath.toString(delimiter, length));
d.add(fullPathField);
// Note that we do no pass an Analyzer here because the fields that are
// added to the Document are untokenized or contains their own TokenStream.
// Therefore the IndexWriter's Analyzer has no effect.
indexWriter.addDocument(d);
int id = nextID++;
addToCache(categoryPath, length, id);
// also add to the parent array
getParentArray().add(id, parent);
return id;
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
Override
public boolean incrementToken() throws IOException {
if (returned) {
return false;
}
returned = true;
return true;
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
private void addToCache(CategoryPath categoryPath, int id) throws IOException {
if (cache.put(categoryPath, id)) {
// If cache.put() returned true, it means the cache was limited in
// size, became full, so parts of it had to be cleared.
// Unfortunately we don't know which part was cleared - it is
// possible that a relatively-new category that hasn't yet been
// committed to disk (and therefore isn't yet visible in our
// "reader") was deleted from the cache, and therefore we must
// now refresh the reader.
// Because this is a slow operation, cache implementations are
// expected not to delete entries one-by-one but rather in bulk
// (LruTaxonomyWriterCache removes the 2/3rd oldest entries).
refreshInternalReader();
cacheIsComplete = false;
}
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
private void addToCache(CategoryPath categoryPath, int prefixLen, int id)
throws IOException {
if (cache.put(categoryPath, prefixLen, id)) {
refreshInternalReader();
cacheIsComplete = false;
}
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
private synchronized void refreshInternalReader() throws IOException {
if (reader != null) {
DirectoryReader r2 = DirectoryReader.openIfChanged(reader);
if (r2 != null) {
reader.close();
reader = r2;
}
}
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
Override
public synchronized void commit() throws CorruptIndexException, IOException {
ensureOpen();
indexWriter.commit(combinedCommitData(null));
refreshInternalReader();
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
Override
public synchronized void commit(Map<String,String> commitUserData) throws CorruptIndexException, IOException {
ensureOpen();
indexWriter.commit(combinedCommitData(commitUserData));
refreshInternalReader();
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
Override
public synchronized void prepareCommit() throws CorruptIndexException, IOException {
ensureOpen();
indexWriter.prepareCommit(combinedCommitData(null));
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
Override
public synchronized void prepareCommit(Map<String,String> commitUserData) throws CorruptIndexException, IOException {
ensureOpen();
indexWriter.prepareCommit(combinedCommitData(commitUserData));
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
private boolean perhapsFillCache() throws IOException {
// Note: we assume that we're only called when cacheIsComplete==false.
// TODO (Facet): parametrize this criterion:
if (cacheMisses < cacheMissesUntilFill) {
return false;
}
// If the cache was already filled (or we decided not to fill it because
// there was no room), there is no sense in trying it again.
if (alreadyCalledFillCache) {
return false;
}
alreadyCalledFillCache = true;
// TODO (Facet): we should probably completely clear the cache before starting
// to read it?
if (reader == null) {
openInternalReader();
}
if (!cache.hasRoom(reader.numDocs())) {
return false;
}
CategoryPath cp = new CategoryPath();
TermsEnum termsEnum = null;
DocsEnum docsEnum = null;
int base = 0;
for (AtomicReader r : reader.getSequentialSubReaders()) {
Terms terms = r.terms(Consts.FULL);
if (terms != null) { // cannot really happen, but be on the safe side
termsEnum = terms.iterator(termsEnum);
while (termsEnum.next() != null) {
BytesRef t = termsEnum.term();
// Since we guarantee uniqueness of categories, each term has exactly
// one document. Also, since we do not allow removing categories (and
// hence documents), there are no deletions in the index. Therefore, it
// is sufficient to call next(), and then doc(), exactly once with no
// 'validation' checks.
cp.clear();
cp.add(t.utf8ToString(), delimiter);
docsEnum = termsEnum.docs(null, docsEnum, false);
cache.put(cp, docsEnum.nextDoc() + base);
}
}
base += r.maxDoc(); // we don't have any deletions, so we're ok
}
/*Terms terms = MultiFields.getTerms(reader, Consts.FULL);
// The check is done here to avoid checking it on every iteration of the
// below loop. A null term wlil be returned if there are no terms in the
// lexicon, or after the Consts.FULL term. However while the loop is
// executed we're safe, because we only iterate as long as there are next()
// terms.
if (terms != null) {
TermsEnum termsEnum = terms.iterator(null);
Bits liveDocs = MultiFields.getLiveDocs(reader);
DocsEnum docsEnum = null;
while (termsEnum.next() != null) {
BytesRef t = termsEnum.term();
// Since we guarantee uniqueness of categories, each term has exactly
// one document. Also, since we do not allow removing categories (and
// hence documents), there are no deletions in the index. Therefore, it
// is sufficient to call next(), and then doc(), exactly once with no
// 'validation' checks.
docsEnum = termsEnum.docs(liveDocs, docsEnum, false);
docsEnum.nextDoc();
cp.clear();
cp.add(t.utf8ToString(), delimiter);
cache.put(cp, docsEnum.docID());
}
}*/
cacheIsComplete = true;
// No sense to keep the reader open - we will not need to read from it
// if everything is in the cache.
reader.close();
reader = null;
return true;
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
private synchronized ParentArray getParentArray() throws IOException {
if (parentArray==null) {
if (reader == null) {
openInternalReader();
}
parentArray = new ParentArray();
parentArray.refresh(reader);
}
return parentArray;
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
Override
public int getParent(int ordinal) throws IOException {
ensureOpen();
// Note: the following if() just enforces that a user can never ask
// for the parent of a nonexistant category - even if the parent array
// was allocated bigger than it really needs to be.
if (ordinal >= getSize()) {
throw new ArrayIndexOutOfBoundsException();
}
return getParentArray().getArray()[ordinal];
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
public void addTaxonomy(Directory taxoDir, OrdinalMap map) throws IOException {
ensureOpen();
DirectoryReader r = DirectoryReader.open(taxoDir);
try {
final int size = r.numDocs();
final OrdinalMap ordinalMap = map;
ordinalMap.setSize(size);
CategoryPath cp = new CategoryPath();
int base = 0;
TermsEnum te = null;
DocsEnum docs = null;
for (AtomicReader ar : r.getSequentialSubReaders()) {
Terms terms = ar.terms(Consts.FULL);
te = terms.iterator(te);
while (te.next() != null) {
String value = te.term().utf8ToString();
cp.clear();
cp.add(value, Consts.DEFAULT_DELIMITER);
int ordinal = findCategory(cp);
if (ordinal < 0) {
// NOTE: call addCategory so that it works well in a multi-threaded
// environment, in case e.g. a thread just added the category, after
// the findCategory() call above failed to find it.
ordinal = addCategory(cp);
}
docs = te.docs(null, docs, false);
ordinalMap.addMapping(docs.nextDoc() + base, ordinal);
}
base += ar.maxDoc(); // no deletions, so we're ok
}
ordinalMap.addDone();
} finally {
r.close();
}
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
Override
public void addMapping(int origOrdinal, int newOrdinal) throws IOException {
out.writeInt(origOrdinal);
out.writeInt(newOrdinal);
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
Override
public void setSize(int taxonomySize) throws IOException {
out.writeInt(taxonomySize);
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
Override
public void addDone() throws IOException {
if (out!=null) {
out.close();
out = null;
}
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
Override
public int[] getMap() throws IOException {
if (map!=null) {
return map;
}
addDone(); // in case this wasn't previously called
DataInputStream in = new DataInputStream(new BufferedInputStream(
new FileInputStream(tmpfile)));
map = new int[in.readInt()];
// NOTE: The current code assumes here that the map is complete,
// i.e., every ordinal gets one and exactly one value. Otherwise,
// we may run into an EOF here, or vice versa, not read everything.
for (int i=0; i<map.length; i++) {
int origordinal = in.readInt();
int newordinal = in.readInt();
map[origordinal] = newordinal;
}
in.close();
// Delete the temporary file, which is no longer needed.
if (!tmpfile.delete()) {
tmpfile.deleteOnExit();
}
return map;
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
Override
public synchronized void rollback() throws IOException {
ensureOpen();
indexWriter.rollback();
doClose();
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/Consts.java
Override
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
fullPath = value;
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/Consts.java
Override
public Status needsField(FieldInfo fieldInfo) throws IOException {
return fullPath == null ? Status.YES : Status.STOP;
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/ParentArray.java
void refresh(IndexReader indexReader) throws IOException {
// Note that it is not necessary for us to obtain the read lock.
// The reason is that we are only called from refresh() (precluding
// another concurrent writer) or from the constructor (when no method
// could be running).
// The write lock is also not held during the following code, meaning
// that reads *can* happen while this code is running. The "volatile"
// property of the prefetchParentOrdinal and prefetchDepth array
// references ensure the correct visibility property of the assignment
// but other than that, we do *not* guarantee that a reader will not
// use an old version of one of these arrays (or both) while a refresh
// is going on. But we find this acceptable - until a refresh has
// finished, the reader should not expect to see new information
// (and the old information is the same in the old and new versions).
int first;
int num = indexReader.maxDoc();
if (prefetchParentOrdinal==null) {
prefetchParentOrdinal = new int[num];
// Starting Lucene 2.9, following the change LUCENE-1542, we can
// no longer reliably read the parent "-1" (see comment in
// LuceneTaxonomyWriter.SinglePositionTokenStream). We have no way
// to fix this in indexing without breaking backward-compatibility
// with existing indexes, so what we'll do instead is just
// hard-code the parent of ordinal 0 to be -1, and assume (as is
// indeed the case) that no other parent can be -1.
if (num>0) {
prefetchParentOrdinal[0] = TaxonomyReader.INVALID_ORDINAL;
}
first = 1;
} else {
first = prefetchParentOrdinal.length;
if (first==num) {
return; // nothing to do - no category was added
}
// In Java 6, we could just do Arrays.copyOf()...
int[] newarray = new int[num];
System.arraycopy(prefetchParentOrdinal, 0, newarray, 0,
prefetchParentOrdinal.length);
prefetchParentOrdinal = newarray;
}
// Read the new part of the parents array from the positions:
// TODO (Facet): avoid Multi*?
Bits liveDocs = MultiFields.getLiveDocs(indexReader);
DocsAndPositionsEnum positions = MultiFields.getTermPositionsEnum(indexReader, liveDocs,
Consts.FIELD_PAYLOADS, new BytesRef(Consts.PAYLOAD_PARENT),
false);
if ((positions == null || positions.advance(first) == DocIdSetIterator.NO_MORE_DOCS) && first < num) {
throw new CorruptIndexException("Missing parent data for category " + first);
}
for (int i=first; i<num; i++) {
// Note that we know positions.doc() >= i (this is an
// invariant kept throughout this loop)
if (positions.docID()==i) {
if (positions.freq() == 0) { // shouldn't happen
throw new CorruptIndexException(
"Missing parent data for category "+i);
}
// TODO (Facet): keep a local (non-volatile) copy of the prefetchParentOrdinal
// reference, because access to volatile reference is slower (?).
// Note: The positions we get here are one less than the position
// increment we added originally, so we get here the right numbers:
prefetchParentOrdinal[i] = positions.nextPosition();
if (positions.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
if ( i+1 < num ) {
throw new CorruptIndexException(
"Missing parent data for category "+(i+1));
}
break;
}
} else { // this shouldn't happen
throw new CorruptIndexException(
"Missing parent data for category "+i);
}
}
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/ParentArray.java
void add(int ordinal, int parentOrdinal) throws IOException {
if (ordinal >= prefetchParentOrdinal.length) {
// grow the array, if necessary.
// In Java 6, we could just do Arrays.copyOf()...
int[] newarray = new int[ordinal*2+1];
System.arraycopy(prefetchParentOrdinal, 0, newarray, 0,
prefetchParentOrdinal.length);
prefetchParentOrdinal = newarray;
}
prefetchParentOrdinal[ordinal] = parentOrdinal;
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java
protected DirectoryReader openIndexReader(Directory directory) throws CorruptIndexException, IOException {
return DirectoryReader.open(directory);
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java
public int getOrdinal(CategoryPath categoryPath) throws IOException {
ensureOpen();
if (categoryPath.length()==0) {
return ROOT_ORDINAL;
}
String path = categoryPath.toString(delimiter);
// First try to find the answer in the LRU cache:
synchronized(ordinalCache) {
Integer res = ordinalCache.get(path);
if (res!=null) {
return res.intValue();
}
}
// If we're still here, we have a cache miss. We need to fetch the
// value from disk, and then also put it in the cache:
int ret = TaxonomyReader.INVALID_ORDINAL;
try {
indexReaderLock.readLock().lock();
// TODO (Facet): avoid Multi*?
Bits liveDocs = MultiFields.getLiveDocs(indexReader);
DocsEnum docs = MultiFields.getTermDocsEnum(indexReader, liveDocs, Consts.FULL, new BytesRef(path), false);
if (docs != null && docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
ret = docs.docID();
}
} finally {
indexReaderLock.readLock().unlock();
}
// Put the new value in the cache. Note that it is possible that while
// we were doing the above fetching (without the cache locked), some
// other thread already added the same category to the cache. We do
// not care about this possibilty, as LRUCache replaces previous values
// of the same keys (it doesn't store duplicates).
synchronized(ordinalCache) {
// GB: new Integer(int); creates a new object each and every time.
// Integer.valueOf(int) might not (See JavaDoc).
ordinalCache.put(path, Integer.valueOf(ret));
}
return ret;
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java
public CategoryPath getPath(int ordinal) throws CorruptIndexException, IOException {
ensureOpen();
// TODO (Facet): Currently, the LRU cache we use (getCategoryCache) holds
// strings with delimiters, not CategoryPath objects, so even if
// we have a cache hit, we need to process the string and build a new
// CategoryPath object every time. What is preventing us from putting
// the actual CategoryPath object in the cache is the fact that these
// objects are mutable. So we should create an immutable (read-only)
// interface that CategoryPath implements, and this method should
// return this interface, not the writable CategoryPath.
String label = getLabel(ordinal);
if (label==null) {
return null;
}
return new CategoryPath(label, delimiter);
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java
public boolean getPath(int ordinal, CategoryPath result) throws CorruptIndexException, IOException {
ensureOpen();
String label = getLabel(ordinal);
if (label==null) {
return false;
}
result.clear();
result.add(label, delimiter);
return true;
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java
private String getLabel(int catID) throws CorruptIndexException, IOException {
ensureOpen();
// First try to find the answer in the LRU cache. It is very
// unfortunate that we need to allocate an Integer object here -
// it would have been better if we used a hash table specifically
// designed for int keys...
// GB: new Integer(int); creates a new object each and every time.
// Integer.valueOf(int) might not (See JavaDoc).
Integer catIDInteger = Integer.valueOf(catID);
synchronized(categoryCache) {
String res = categoryCache.get(catIDInteger);
if (res!=null) {
return res;
}
}
// If we're still here, we have a cache miss. We need to fetch the
// value from disk, and then also put it in the cache:
String ret;
try {
indexReaderLock.readLock().lock();
// The taxonomy API dictates that if we get an invalid category
// ID, we should return null, If we don't check this here, we
// can some sort of an exception from the document() call below.
// NOTE: Currently, we *do not* cache this return value; There
// isn't much point to do so, because checking the validity of
// the docid doesn't require disk access - just comparing with
// the number indexReader.maxDoc().
if (catID<0 || catID>=indexReader.maxDoc()) {
return null;
}
final LoadFullPathOnly loader = new LoadFullPathOnly();
indexReader.document(catID, loader);
ret = loader.getFullPath();
} finally {
indexReaderLock.readLock().unlock();
}
// Put the new value in the cache. Note that it is possible that while
// we were doing the above fetching (without the cache locked), some
// other thread already added the same category to the cache. We do
// not care about this possibility, as LRUCache replaces previous
// values of the same keys (it doesn't store duplicates).
synchronized (categoryCache) {
categoryCache.put(catIDInteger, ret);
}
return ret;
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java
public synchronized boolean refresh() throws IOException, InconsistentTaxonomyException {
ensureOpen();
/*
* Since refresh() can be a lengthy operation, it is very important that we
* avoid locking out all readers for its duration. This is why we don't hold
* the indexReaderLock write lock for the entire duration of this method. In
* fact, it is enough to hold it only during a single assignment! Other
* comments in this method will explain this.
*/
// note that the lengthy operation indexReader.reopen() does not
// modify the reader, so we can do it without holding a lock. We can
// safely read indexReader without holding the write lock, because
// no other thread can be writing at this time (this method is the
// only possible writer, and it is "synchronized" to avoid this case).
DirectoryReader r2 = DirectoryReader.openIfChanged(indexReader);
if (r2 == null) {
return false; // no changes, nothing to do
}
// validate that a refresh is valid at this point, i.e. that the taxonomy
// was not recreated since this reader was last opened or refresshed.
String t1 = indexReader.getIndexCommit().getUserData().get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME);
String t2 = r2.getIndexCommit().getUserData().get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME);
if (t1==null) {
if (t2!=null) {
r2.close();
throw new InconsistentTaxonomyException("Taxonomy was recreated at: "+t2);
}
} else if (!t1.equals(t2)) {
r2.close();
throw new InconsistentTaxonomyException("Taxonomy was recreated at: "+t2+" != "+t1);
}
IndexReader oldreader = indexReader;
// we can close the old searcher, but need to synchronize this
// so that we don't close it in the middle that another routine
// is reading from it.
indexReaderLock.writeLock().lock();
indexReader = r2;
indexReaderLock.writeLock().unlock();
// We can close the old reader, but need to be certain that we
// don't close it while another method is reading from it.
// Luckily, we can be certain of that even without putting the
// oldreader.close() in the locked section. The reason is that
// after lock() succeeded above, we know that all existing readers
// had finished (this is what a read-write lock ensures). New
// readers, starting after the unlock() we just did, already got
// the new indexReader we set above. So nobody can be possibly
// using the old indexReader, and we can close it:
oldreader.close();
// We prefetch some of the arrays to make requests much faster.
// Let's refresh these prefetched arrays; This refresh is much
// is made more efficient by assuming that it is enough to read
// the values for new categories (old categories could not have been
// changed or deleted)
// Note that this this done without the write lock being held,
// which means that it is possible that during a refresh(), a
// reader will have some methods (like getOrdinal and getCategory)
// return fresh information, while getParent()
// (only to be prefetched now) still return older information.
// We consider this to be acceptable. The important thing,
// however, is that refreshPrefetchArrays() itself writes to
// the arrays in a correct manner (see discussion there)
parentArray.refresh(indexReader);
// Remove any INVALID_ORDINAL values from the ordinal cache,
// because it is possible those are now answered by the new data!
Iterator<Entry<String, Integer>> i = ordinalCache.entrySet().iterator();
while (i.hasNext()) {
Entry<String, Integer> e = i.next();
if (e.getValue().intValue() == INVALID_ORDINAL) {
i.remove();
}
}
return true;
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java
public void close() throws IOException {
if (!closed) {
synchronized (this) {
if (!closed) {
decRef();
closed = true;
}
}
}
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java
private void doClose() throws IOException {
indexReader.close();
closed = true;
parentArray = null;
childrenArrays = null;
categoryCache.clear();
ordinalCache.clear();
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java
public Map<String, String> getCommitUserData() throws IOException {
ensureOpen();
return indexReader.getIndexCommit().getUserData();
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java
public void decRef() throws IOException {
ensureOpen();
final int rc = refCount.decrementAndGet();
if (rc == 0) {
boolean success = false;
try {
doClose();
success = true;
} finally {
if (!success) {
// Put reference back on failure
refCount.incrementAndGet();
}
}
} else if (rc < 0) {
throw new IllegalStateException("too many decRef calls: refCount is " + rc + " after decrement");
}
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/cl2o/CompactLabelToOrdinal.java
static CompactLabelToOrdinal open(File file, float loadFactor,
int numHashArrays) throws IOException {
/**
* Part of the file is the labelRepository, which needs to be rehashed
* and label offsets re-added to the object. I am unsure as to why we
* can't just store these off in the file as well, but in keeping with
* the spirit of the original code, I did it this way. (ssuppe)
*/
CompactLabelToOrdinal l2o = new CompactLabelToOrdinal();
l2o.loadFactor = loadFactor;
l2o.hashArrays = new HashArray[numHashArrays];
DataInputStream dis = null;
try {
dis = new DataInputStream(new BufferedInputStream(
new FileInputStream(file)));
// TaxiReader needs to load the "counter" or occupancy (L2O) to know
// the next unique facet. we used to load the delimiter too, but
// never used it.
l2o.counter = dis.readInt();
l2o.capacity = determineCapacity((int) Math.pow(2,
l2o.hashArrays.length), l2o.counter);
l2o.init();
// now read the chars
l2o.labelRepository = CharBlockArray.open(dis);
l2o.collisionMap = new CollisionMap(l2o.labelRepository);
// Calculate hash on the fly based on how CategoryPath hashes
// itself. Maybe in the future we can call some static based methods
// in CategoryPath so that this doesn't break again? I don't like
// having code in two different places...
int cid = 0;
// Skip the initial offset, it's the CategoryPath(0,0), which isn't
// a hashed value.
int offset = 1;
int lastStartOffset = offset;
// This loop really relies on a well-formed input (assumes pretty blindly
// that array offsets will work). Since the initial file is machine
// generated, I think this should be OK.
while (offset < l2o.labelRepository.length()) {
// First component is numcomponents, so we initialize the hash
// to this
int ncomponents = l2o.labelRepository.charAt(offset++);
int hash = ncomponents;
// If ncomponents is 0, then we are done?
if (ncomponents != 0) {
// usedchars is always the last member of the 'ends' array
// in serialization. Rather than rebuild the entire array,
// assign usedchars to the last value we read in. This will
// be slightly more memory efficient.
int usedchars = 0;
for (int i = 0; i < ncomponents; i++) {
usedchars = l2o.labelRepository.charAt(offset++);
hash = hash * 31 + usedchars;
}
// Hash the usedchars for this label
for (int i = 0; i < usedchars; i++) {
hash = hash * 31 + l2o.labelRepository.charAt(offset++);
}
}
// Now that we've hashed the components of the label, do the
// final part of the hash algorithm.
hash = hash ^ ((hash >>> 20) ^ (hash >>> 12));
hash = hash ^ (hash >>> 7) ^ (hash >>> 4);
// Add the label, and let's keep going
l2o.addLabelOffset(hash, cid, lastStartOffset);
cid++;
lastStartOffset = offset;
}
} catch (ClassNotFoundException cnfe) {
throw new IOException("Invalid file format. Cannot deserialize.");
} finally {
if (dis != null) {
dis.close();
}
}
l2o.threshold = (int) (l2o.loadFactor * l2o.capacity);
return l2o;
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/cl2o/CompactLabelToOrdinal.java
void flush(File file) throws IOException {
FileOutputStream fos = new FileOutputStream(file);
try {
BufferedOutputStream os = new BufferedOutputStream(fos);
DataOutputStream dos = new DataOutputStream(os);
dos.writeInt(this.counter);
// write the labelRepository
this.labelRepository.flush(dos);
// Closes the data output stream
dos.close();
} finally {
fos.close();
}
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/cl2o/CharBlockArray.java
void flush(OutputStream out) throws IOException {
ObjectOutputStream oos = null;
try {
oos = new ObjectOutputStream(out);
oos.writeObject(this);
oos.flush();
} finally {
if (oos != null) {
oos.close();
}
}
}
// in lucene/facet/src/java/org/apache/lucene/facet/taxonomy/writercache/cl2o/CharBlockArray.java
public static CharBlockArray open(InputStream in) throws IOException, ClassNotFoundException {
ObjectInputStream ois = null;
try {
ois = new ObjectInputStream(in);
CharBlockArray a = (CharBlockArray) ois.readObject();
return a;
} finally {
if (ois != null) {
ois.close();
}
}
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/sampling/RepeatableSampler.java
Override
protected SampleResult createSample(ScoredDocIDs docids, int actualSize,
int sampleSetSize) throws IOException {
int[] sampleSet = null;
try {
sampleSet = repeatableSample(docids, actualSize,
sampleSetSize);
} catch (IOException e) {
if (logger.isLoggable(Level.WARNING)) {
logger.log(Level.WARNING, "sampling failed: "+e.getMessage()+" - falling back to no sampling!", e);
}
return new SampleResult(docids, 1d);
}
ScoredDocIDs sampled = ScoredDocIdsUtils.createScoredDocIDsSubset(docids,
sampleSet);
if (logger.isLoggable(Level.FINEST)) {
logger.finest("******************** " + sampled.size());
}
return new SampleResult(sampled, sampled.size()/(double)docids.size());
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/sampling/RepeatableSampler.java
private static int[] repeatableSample(ScoredDocIDs collection,
int collectionSize, int sampleSize)
throws IOException {
return repeatableSample(collection, collectionSize,
sampleSize, Algorithm.HASHING, Sorted.NO);
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/sampling/RepeatableSampler.java
private static int[] repeatableSample(ScoredDocIDs collection,
int collectionSize, int sampleSize,
Algorithm algorithm, Sorted sorted)
throws IOException {
if (collection == null) {
throw new IOException("docIdSet is null");
}
if (sampleSize < 1) {
throw new IOException("sampleSize < 1 (" + sampleSize + ")");
}
if (collectionSize < sampleSize) {
throw new IOException("collectionSize (" + collectionSize + ") less than sampleSize (" + sampleSize + ")");
}
int[] sample = new int[sampleSize];
long[] times = new long[4];
if (algorithm == Algorithm.TRAVERSAL) {
sample1(collection, collectionSize, sample, times);
} else if (algorithm == Algorithm.HASHING) {
sample2(collection, collectionSize, sample, times);
} else {
throw new IllegalArgumentException("Invalid algorithm selection");
}
if (sorted == Sorted.YES) {
Arrays.sort(sample);
}
if (returnTimings) {
times[3] = System.currentTimeMillis();
if (logger.isLoggable(Level.FINEST)) {
logger.finest("Times: " + (times[1] - times[0]) + "ms, "
+ (times[2] - times[1]) + "ms, " + (times[3] - times[2])+"ms");
}
}
return sample;
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/sampling/RepeatableSampler.java
private static void sample1(ScoredDocIDs collection, int collectionSize, int[] sample, long[] times)
throws IOException {
ScoredDocIDsIterator it = collection.iterator();
if (returnTimings) {
times[0] = System.currentTimeMillis();
}
int sampleSize = sample.length;
int prime = findGoodStepSize(collectionSize, sampleSize);
int mod = prime % collectionSize;
if (returnTimings) {
times[1] = System.currentTimeMillis();
}
int sampleCount = 0;
int index = 0;
for (; sampleCount < sampleSize;) {
if (index + mod < collectionSize) {
for (int i = 0; i < mod; i++, index++) {
it.next();
}
} else {
index = index + mod - collectionSize;
it = collection.iterator();
for (int i = 0; i < index; i++) {
it.next();
}
}
sample[sampleCount++] = it.getDocID();
}
if (returnTimings) {
times[2] = System.currentTimeMillis();
}
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/sampling/RepeatableSampler.java
private static void sample2(ScoredDocIDs collection, int collectionSize, int[] sample, long[] times)
throws IOException {
if (returnTimings) {
times[0] = System.currentTimeMillis();
}
int sampleSize = sample.length;
IntPriorityQueue pq = new IntPriorityQueue(sampleSize);
/*
* Convert every value in the collection to a hashed "weight" value, and insert
* into a bounded PQ (retains only sampleSize highest weights).
*/
ScoredDocIDsIterator it = collection.iterator();
while (it.next()) {
pq.insertWithReuse((int)(it.getDocID() * PHI_32) & 0x7FFFFFFF);
}
if (returnTimings) {
times[1] = System.currentTimeMillis();
}
/*
* Extract heap, convert weights back to original values, and return as integers.
*/
Object[] heap = pq.getHeap();
for (int si = 0; si < sampleSize; si++) {
sample[si] = (int)(((IntPriorityQueue.MI)(heap[si+1])).value * PHI_32I) & 0x7FFFFFFF;
}
if (returnTimings) {
times[2] = System.currentTimeMillis();
}
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/sampling/RandomSampler.java
Override
protected SampleResult createSample(ScoredDocIDs docids, int actualSize, int sampleSetSize) throws IOException {
final int[] sample = new int[sampleSetSize];
final int maxStep = (actualSize * 2 ) / sampleSetSize; //floor
int remaining = actualSize;
ScoredDocIDsIterator it = docids.iterator();
int i = 0;
// select sample docs with random skipStep, make sure to leave sufficient #docs for selection after last skip
while (i<sample.length && remaining>(sampleSetSize-maxStep-i)) {
int skipStep = 1 + random.nextInt(maxStep);
// Skip over 'skipStep' documents
for (int j=0; j<skipStep; j++) {
it.next();
-- remaining;
}
sample[i++] = it.getDocID();
}
// Add leftover documents to the sample set
while (i<sample.length) {
it.next();
sample[i++] = it.getDocID();
}
ScoredDocIDs sampleRes = ScoredDocIdsUtils.createScoredDocIDsSubset(docids, sample);
SampleResult res = new SampleResult(sampleRes, sampleSetSize/(double)actualSize);
return res;
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/sampling/Sampler.java
public SampleResult getSampleSet(ScoredDocIDs docids) throws IOException {
if (!shouldSample(docids)) {
return new SampleResult(docids, 1d);
}
int actualSize = docids.size();
int sampleSetSize = (int) (actualSize * samplingParams.getSampleRatio());
sampleSetSize = Math.max(sampleSetSize, samplingParams.getMinSampleSize());
sampleSetSize = Math.min(sampleSetSize, samplingParams.getMaxSampleSize());
return createSample(docids, actualSize, sampleSetSize);
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/sampling/Sampler.java
Override
public Aggregator createAggregator(boolean useComplements,
FacetArrays arrays, IndexReader indexReader,
TaxonomyReader taxonomy) throws IOException {
return orig.createAggregator(useComplements, arrays, indexReader,
taxonomy);
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/sampling/TakmiSampleFixer.java
public void fixResult(ScoredDocIDs origDocIds, FacetResult fres)
throws IOException {
FacetResultNode topRes = fres.getFacetResultNode();
fixResultNode(topRes, origDocIds);
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/sampling/TakmiSampleFixer.java
private void fixResultNode(FacetResultNode facetResNode, ScoredDocIDs docIds)
throws IOException {
recount(facetResNode, docIds);
for (FacetResultNode frn : facetResNode.getSubResults()) {
fixResultNode(frn, docIds);
}
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/sampling/TakmiSampleFixer.java
private void recount(FacetResultNode fresNode, ScoredDocIDs docIds)
throws IOException {
// TODO (Facet): change from void to return the new, smaller docSet, and use
// that for the children, as this will make their intersection ops faster.
// can do this only when the new set is "sufficiently" smaller.
/* We need the category's path name in order to do its recounting.
* If it is missing, because the option to label only part of the
* facet results was exercise, we need to calculate them anyway, so
* in essence sampling with recounting spends some extra cycles for
* labeling results for which labels are not required. */
CategoryPath catPath = fresNode.getLabel(taxonomyReader); // force labeling
Term drillDownTerm = DrillDown.term(searchParams, catPath);
// TODO (Facet): avoid Multi*?
Bits liveDocs = MultiFields.getLiveDocs(indexReader);
int updatedCount = countIntersection(MultiFields.getTermDocsEnum(indexReader, liveDocs,
drillDownTerm.field(), drillDownTerm.bytes(),
false),
docIds.iterator());
fresNode.setValue(updatedCount);
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/sampling/TakmiSampleFixer.java
private static int countIntersection(DocsEnum p1, ScoredDocIDsIterator p2)
throws IOException {
// The documentation of of both TermDocs and DocIdSetIterator claim
// that we must do next() before doc(). So we do, and if one of the
// lists is empty, obviously return 0;
if (p1 == null || p1.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
return 0;
}
if (!p2.next()) {
return 0;
}
int d1 = p1.docID();
int d2 = p2.getDocID();
int count = 0;
for (;;) {
if (d1 == d2) {
++count;
if (p1.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
break; // end of list 1, nothing more in intersection
}
d1 = p1.docID();
if (!advance(p2, d1)) {
break; // end of list 2, nothing more in intersection
}
d2 = p2.getDocID();
} else if (d1 < d2) {
if (p1.advance(d2) == DocIdSetIterator.NO_MORE_DOCS) {
break; // end of list 1, nothing more in intersection
}
d1 = p1.docID();
} else /* d1>d2 */ {
if (!advance(p2, d1)) {
break; // end of list 2, nothing more in intersection
}
d2 = p2.getDocID();
}
}
return count;
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/sampling/SamplingAccumulator.java
Override
public List<FacetResult> accumulate(ScoredDocIDs docids) throws IOException {
// first let delegee accumulate without labeling at all (though
// currently it doesn't matter because we have to label all returned anyhow)
boolean origAllowLabeling = isAllowLabeling();
setAllowLabeling(false);
// Replacing the original searchParams with the over-sampled
FacetSearchParams original = searchParams;
searchParams = sampler.overSampledSearchParams(original);
List<FacetResult> sampleRes = super.accumulate(docids);
setAllowLabeling(origAllowLabeling);
List<FacetResult> fixedRes = new ArrayList<FacetResult>();
for (FacetResult fres : sampleRes) {
// for sure fres is not null because this is guaranteed by the delegee.
FacetResultsHandler frh = fres.getFacetRequest().createFacetResultsHandler(
taxonomyReader);
// fix the result of current request
sampler.getSampleFixer(indexReader, taxonomyReader, searchParams)
.fixResult(docids, fres);
fres = frh.rearrangeFacetResult(fres); // let delegee's handler do any
// Using the sampler to trim the extra (over-sampled) results
fres = sampler.trimResult(fres);
// arranging it needs to
// final labeling if allowed (because labeling is a costly operation)
if (isAllowLabeling()) {
frh.labelResult(fres);
}
fixedRes.add(fres); // add to final results
}
searchParams = original; // Back to original params
return fixedRes;
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/sampling/SamplingAccumulator.java
Override
protected ScoredDocIDs actualDocsToAccumulate(ScoredDocIDs docids) throws IOException {
SampleResult sampleRes = sampler.getSampleSet(docids);
samplingRatio = sampleRes.actualSampleRatio;
return sampleRes.docids;
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/FacetsCollector.java
public List<FacetResult> getFacetResults() throws IOException {
synchronized (resultsGuard) { // over protection
if (results == null) {
// lazy creation but just once
results = facetsAccumulator.accumulate(scoreDocIdCollector.getScoredDocIDs());
scoreDocIdCollector = null;
}
return results;
}
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/FacetsCollector.java
Override
public void collect(int doc) throws IOException {
scoreDocIdCollector.collect(doc);
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/FacetsCollector.java
Override
public void setNextReader(AtomicReaderContext context) throws IOException {
scoreDocIdCollector.setNextReader(context);
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/FacetsCollector.java
Override
public void setScorer(Scorer scorer) throws IOException {
scoreDocIdCollector.setScorer(scorer);
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/TotalFacetCounts.java
static TotalFacetCounts loadFromFile(File inputFile, TaxonomyReader taxonomy,
FacetIndexingParams facetIndexingParams) throws IOException {
DataInputStream dis = new DataInputStream(new BufferedInputStream(new FileInputStream(inputFile)));
try {
int[][] counts = new int[dis.readInt()][];
for (int i=0; i<counts.length; i++) {
int size = dis.readInt();
if (size<0) {
counts[i] = null;
} else {
counts[i] = new int[size];
for (int j=0; j<size; j++) {
counts[i][j] = dis.readInt();
}
}
}
return new TotalFacetCounts(taxonomy, facetIndexingParams, counts, CreationType.Loaded);
} finally {
dis.close();
}
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/TotalFacetCounts.java
static void storeToFile(File outputFile, TotalFacetCounts tfc) throws IOException {
DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(outputFile)));
try {
dos.writeInt(tfc.totalCounts.length);
for (int[] counts : tfc.totalCounts) {
if (counts == null) {
dos.writeInt(-1);
} else {
dos.writeInt(counts.length);
for (int i : counts) {
dos.writeInt(i);
}
}
}
} finally {
dos.close();
}
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/TotalFacetCounts.java
static TotalFacetCounts compute(final IndexReader indexReader,
final TaxonomyReader taxonomy, final FacetIndexingParams facetIndexingParams,
final CategoryListCache clCache) throws IOException {
int partitionSize = PartitionsUtils.partitionSize(facetIndexingParams, taxonomy);
final int[][] counts = new int[(int) Math.ceil(taxonomy.getSize() /(float) partitionSize)][partitionSize];
FacetSearchParams newSearchParams = new FacetSearchParams(facetIndexingParams);
//createAllListsSearchParams(facetIndexingParams, this.totalCounts);
FacetsAccumulator fe = new StandardFacetsAccumulator(newSearchParams, indexReader, taxonomy) {
@Override
protected HashMap<CategoryListIterator, Aggregator> getCategoryListMap(
FacetArrays facetArrays, int partition) throws IOException {
Aggregator aggregator = new CountingAggregator(counts[partition]);
HashMap<CategoryListIterator, Aggregator> map = new HashMap<CategoryListIterator, Aggregator>();
for (CategoryListParams clp: facetIndexingParams.getAllCategoryListParams()) {
final CategoryListIterator cli = clIteraor(clCache, clp, indexReader, partition);
map.put(cli, aggregator);
}
return map;
}
};
fe.setComplementThreshold(FacetsAccumulator.DISABLE_COMPLEMENT);
fe.accumulate(ScoredDocIdsUtils.createAllDocsScoredDocIDs(indexReader));
return new TotalFacetCounts(taxonomy, facetIndexingParams, counts, CreationType.Computed);
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/TotalFacetCounts.java
Override
protected HashMap<CategoryListIterator, Aggregator> getCategoryListMap(
FacetArrays facetArrays, int partition) throws IOException {
Aggregator aggregator = new CountingAggregator(counts[partition]);
HashMap<CategoryListIterator, Aggregator> map = new HashMap<CategoryListIterator, Aggregator>();
for (CategoryListParams clp: facetIndexingParams.getAllCategoryListParams()) {
final CategoryListIterator cli = clIteraor(clCache, clp, indexReader, partition);
map.put(cli, aggregator);
}
return map;
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/TotalFacetCounts.java
static CategoryListIterator clIteraor(CategoryListCache clCache, CategoryListParams clp,
IndexReader indexReader, int partition) throws IOException {
if (clCache != null) {
CategoryListData cld = clCache.get(clp);
if (cld != null) {
return cld.iterator(partition);
}
}
return clp.createCategoryListIterator(indexReader, partition);
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/AdaptiveFacetsAccumulator.java
Override
public List<FacetResult> accumulate(ScoredDocIDs docids) throws IOException {
FacetsAccumulator delegee = appropriateFacetCountingAccumulator(docids);
if (delegee == this) {
return super.accumulate(docids);
}
return delegee.accumulate(docids);
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/TotalFacetCountsCache.java
public TotalFacetCounts getTotalCounts(IndexReader indexReader, TaxonomyReader taxonomy,
FacetIndexingParams facetIndexingParams, CategoryListCache clCache) throws IOException {
// create the key
TFCKey key = new TFCKey(indexReader, taxonomy, facetIndexingParams);
// it is important that this call is not synchronized, so that available TFC
// would not wait for one that needs to be computed.
TotalFacetCounts tfc = cache.get(key);
if (tfc != null) {
markRecentlyUsed(key);
return tfc;
}
return computeAndCache(key, clCache);
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/TotalFacetCountsCache.java
private synchronized TotalFacetCounts computeAndCache(TFCKey key, CategoryListCache clCache) throws IOException {
TotalFacetCounts tfc = cache.get(key);
if (tfc == null) {
tfc = TotalFacetCounts.compute(key.indexReader, key.taxonomy, key.facetIndexingParams, clCache);
lruKeys.add(key);
cache.put(key,tfc);
trimCache();
}
return tfc;
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/TotalFacetCountsCache.java
public synchronized void load(File inputFile, IndexReader indexReader, TaxonomyReader taxonomy,
FacetIndexingParams facetIndexingParams) throws IOException {
if (!inputFile.isFile() || !inputFile.exists() || !inputFile.canRead()) {
throw new IllegalArgumentException("Exepecting an existing readable file: "+inputFile);
}
TFCKey key = new TFCKey(indexReader, taxonomy, facetIndexingParams);
TotalFacetCounts tfc = TotalFacetCounts.loadFromFile(inputFile, taxonomy, facetIndexingParams);
cache.put(key,tfc);
trimCache();
markRecentlyUsed(key);
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/TotalFacetCountsCache.java
public void store(File outputFile, IndexReader indexReader, TaxonomyReader taxonomy,
FacetIndexingParams facetIndexingParams, CategoryListCache clCache) throws IOException {
File parentFile = outputFile.getParentFile();
if (
( outputFile.exists() && (!outputFile.isFile() || !outputFile.canWrite())) ||
(!outputFile.exists() && (!parentFile.isDirectory() || !parentFile.canWrite()))
) {
throw new IllegalArgumentException("Exepecting a writable file: "+outputFile);
}
TotalFacetCounts tfc = getTotalCounts(indexReader, taxonomy, facetIndexingParams, clCache);
TotalFacetCounts.storeToFile(outputFile, tfc);
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/PayloadIterator.java
public boolean init() throws IOException {
hasMore = tp != null && tp.nextDoc() != DocIdSetIterator.NO_MORE_DOCS;
return hasMore;
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/PayloadIterator.java
public boolean setdoc(int docId) throws IOException {
if (!hasMore) {
return false;
}
if (tp.docID() > docId) {
return false;
}
// making sure we have the requested document
if (tp.docID() < docId) {
// Skipping to requested document
if (tp.advance(docId) == DocIdSetIterator.NO_MORE_DOCS) {
this.hasMore = false;
return false;
}
// If document not found (skipped to much)
if (tp.docID() != docId) {
return false;
}
}
// Prepare for payload extraction
tp.nextPosition();
// TODO: fix bug in SepCodec and then remove this check (the null check should be enough)
if (!tp.hasPayload()) {
return false;
}
BytesRef br = tp.getPayload();
if (br == null || br.length == 0) {
return false;
}
this.payloadLength = br.length;
if (this.payloadLength > this.buffer.length) {
// Growing if necessary.
this.buffer = new byte[this.payloadLength * 2 + 1];
}
// Loading the payload
System.arraycopy(br.bytes, br.offset, this.buffer, 0, payloadLength);
return true;
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/PayloadIntDecodingIterator.java
public boolean init() throws IOException {
return pi.init();
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/PayloadIntDecodingIterator.java
public long nextCategory() throws IOException {
return decoder.decode();
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/PayloadIntDecodingIterator.java
public boolean skipTo(int docId) throws IOException {
if (!pi.setdoc(docId)) {
return false;
}
// Initializing the decoding mechanism with the new payload data
ubais.reInit(pi.getBuffer(), 0, pi.getPayloadLength());
decoder.reInit(ubais);
return true;
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/TopKInEachNodeHandler.java
Override
public IntermediateFacetResult fetchPartitionResult(FacetArrays arrays, int offset) throws IOException {
// get the root of the result tree to be returned, and the depth of that result tree
// (depth means number of node levels excluding the root).
int rootNode = this.taxonomyReader.getOrdinal(this.facetRequest.getCategoryPath());
if (rootNode == TaxonomyReader.INVALID_ORDINAL) {
return null;
}
int K = Math.min(facetRequest.getNumResults(),taxonomyReader.getSize()); // number of best results in each node
// this will grow into the returned IntermediateFacetResult
IntToObjectMap<AACO> AACOsOfOnePartition = new IntToObjectMap<AACO>();
int partitionSize = arrays.getArraysLength(); // all partitions, except, possibly, the last,
// have the same length. Hence modulo is OK.
int depth = facetRequest.getDepth();
if (depth == 0) {
// Need to only have root node.
IntermediateFacetResultWithHash tempFRWH = new IntermediateFacetResultWithHash(
facetRequest, AACOsOfOnePartition);
if (isSelfPartition(rootNode, arrays, offset)) {
tempFRWH.isRootNodeIncluded = true;
tempFRWH.rootNodeValue = this.facetRequest.getValueOf(arrays, rootNode % partitionSize);
}
return tempFRWH;
}
if (depth > Short.MAX_VALUE - 3) {
depth = Short.MAX_VALUE -3;
}
int endOffset = offset + partitionSize; // one past the largest ordinal in the partition
ChildrenArrays childrenArray = taxonomyReader.getChildrenArrays();
int[] youngestChild = childrenArray.getYoungestChildArray();
int[] olderSibling = childrenArray.getOlderSiblingArray();
int totalNumOfDescendantsConsidered = 0; // total number of facets with value != 0,
// in the tree. These include those selected as top K in each node, and all the others that
// were not. Not including rootNode
// the following priority queue will be used again and again for each node recursed into
// to select its best K children among its children encountered in the given partition
PriorityQueue<AggregatedCategory> pq =
new AggregatedCategoryHeap(K, this.getSuitableACComparator());
// reusables will feed the priority queue in each use
AggregatedCategory [] reusables = new AggregatedCategory[2+K];
for (int i = 0; i < reusables.length; i++) {
reusables[i] = new AggregatedCategory(1,0);
}
/*
* The returned map is built by a recursive visit of potential tree nodes. Nodes
* determined to be excluded from the FacetResult are not recursively explored as others,
* they are only recursed in order to count the number of their descendants.
* Also, nodes that they and any of their descendants can not be mapped into facets encountered
* in this partition, are, too, explored no further. These are facets whose ordinal
* numbers are greater than the ordinals of the given partition. (recall that the Taxonomy
* maintains that a parent ordinal is smaller than any of its descendants' ordinals).
* So, when scanning over all children of a potential tree node n: (1) all children with ordinal number
* greater than those in the given partition are skipped over, (2) among the children of n residing
* in this partition, the best K children are selected (using pq) for usual further recursion
* and the rest (those rejected out from the pq) are only recursed for counting total number
* of descendants, and (3) all the children of ordinal numbers smaller than the given partition
* are further explored in the usual way, since these may lead to descendants residing in this partition.
*
* ordinalStack drives the recursive descent.
* Top of stack holds the current node which we recurse from.
* ordinalStack[0] holds the root of the facetRequest, and
* it is always maintained that parent(ordianlStack[i]) = ordinalStack[i-1].
* localDepth points to the current top of ordinalStack.
* Only top of ordinalStack can be TaxonomyReader.INVALID_ORDINAL, and this if and only if
* the element below it explored all its relevant children.
*/
int[] ordinalStack = new int[depth+2]; // for 0 and for invalid on top
ordinalStack[0] = rootNode;
int localDepth = 0;
/*
* bestSignlingsStack[i] maintains the best K children of ordinalStack[i-1], namely,
* the best K siblings of ordinalStack[i], best K among those residing in the given partition.
* Note that the residents of ordinalStack need not belong
* to the current partition, only the residents of bestSignlingsStack.
* When exploring the children of ordianlStack[i-1] that reside in the current partition
* (after the top K of them have been determined and stored into bestSignlingsStack[i]),
* siblingExplored[i] points into bestSignlingsStack[i], to the child now explored, hence
* residing in ordinalStack[i], and firstToTheLeftOfPartition[i] holds the largest ordinal of
* a sibling smaller than the ordinals in the partition.
* When siblingExplored[i] == max int, the top K siblings of ordinalStack[i] among those siblings
* that reside in this partition have not been determined yet.
* if siblingExplored[i] < 0, the node in ordinalStack[i] is to the left of partition
* (i.e. of a smaller ordinal than the current partition)
* (step (3) above is executed for the children of ordianlStack[i-1])
*/
int[][] bestSignlingsStack = new int[depth+2][];
int[] siblingExplored = new int[depth+2];
int[] firstToTheLeftOfPartition = new int [depth+2];
int tosOrdinal; // top of stack element, the ordinal at the top of stack
/*
* to start the loop, complete the datastructures for root node:
* push its youngest child to ordinalStack; make a note in siblingExplored[] that the children
* of rootNode, which reside in the current partition have not been read yet to select the top
* K of them. Also, make rootNode as if, related to its parent, rootNode belongs to the children
* of ordinal numbers smaller than those of the current partition (this will ease on end condition --
* we can continue to the older sibling of rootNode once the localDepth goes down, before we verify that
* it went that down)
*/
ordinalStack[++localDepth] = youngestChild[rootNode];
siblingExplored[localDepth] = Integer.MAX_VALUE; // we have not verified position wrt current partition
siblingExplored[0] = -1; // as if rootNode resides to the left of current position
/*
* now the whole recursion: loop as long as stack is not empty of elements descendants of
* facetRequest's root.
*/
while (localDepth > 0) {
tosOrdinal = ordinalStack[localDepth];
if (tosOrdinal == TaxonomyReader.INVALID_ORDINAL) {
// the brotherhood that has been occupying the top of stack is all exhausted.
// Hence, element below tos, namely, father of tos, has all its children,
// and itself, all explored.
localDepth--;
// replace this father, now on top of stack, by this father's sibling:
// this parent's ordinal can not be greater than current partition, as otherwise
// its child, now just removed, would not have been pushed on it.
// so the father is either inside the partition, or smaller ordinal
if (siblingExplored[localDepth] < 0 ) {
ordinalStack[localDepth] = olderSibling[ordinalStack[localDepth]];
continue;
}
// in this point, siblingExplored[localDepth] between 0 and number of bestSiblings
// it can not be max int
siblingExplored[localDepth]--;
if (siblingExplored[localDepth] == -1 ) {
//siblings residing in the partition have been all processed, we now move
// to those of ordinal numbers smaller than the partition
ordinalStack[localDepth] = firstToTheLeftOfPartition[localDepth];
} else {
// still explore siblings residing in the partition
// just move to the next one
ordinalStack[localDepth] = bestSignlingsStack[localDepth][siblingExplored[localDepth]];
}
continue;
} // endof tosOrdinal is invalid, and hence removed, and its parent was replaced by this
// parent's sibling
// now try to push a kid, but first look at tos whether it 'deserves' its kids explored:
// it is not to the right of current partition, and we know whether to only count or to
// select best K siblings.
if (siblingExplored[localDepth] == Integer.MAX_VALUE) {
//tosOrdinal was not examined yet for its position relative to current partition
// and the best K of current partition, among its siblings, have not been determined yet
while (tosOrdinal >= endOffset) {
tosOrdinal = olderSibling[tosOrdinal];
}
// now it is inside. Run it and all its siblings inside the partition through a heap
// and in doing so, count them, find best K, and sum into residue
double residue = 0f; // the sum of all the siblings from this partition that do not make
// it to top K
pq.clear();
//reusables are consumed as from a stack. The stack starts full and returns full.
int tosReuslables = reusables.length -1;
while (tosOrdinal >= offset) { // while tosOrdinal belongs to the given partition; here, too, we use the fact
// that TaxonomyReader.INVALID_ORDINAL == -1 < offset
double value = facetRequest.getValueOf(arrays, tosOrdinal % partitionSize);
if (value != 0) { // the value of yc is not 0, it is to be considered.
totalNumOfDescendantsConsidered++;
// consume one reusable, and push to the priority queue
AggregatedCategory ac = reusables[tosReuslables--];
ac.ordinal = tosOrdinal;
ac.value = value;
ac = pq.insertWithOverflow(ac);
if (null != ac) {
residue += ac.value;
// TODO (Facet): could it be that we need to do something
// else, not add, depending on the aggregator?
/* when a facet is excluded from top K, because already in this partition it has
* K better siblings, it is only recursed for count only.
*/
// update totalNumOfDescendants by the now excluded node and all its descendants
totalNumOfDescendantsConsidered--; // reduce the 1 earned when the excluded node entered the heap
// and now return it and all its descendants. These will never make it to FacetResult
totalNumOfDescendantsConsidered += countOnly (ac.ordinal, youngestChild,
olderSibling, arrays, partitionSize, offset, endOffset, localDepth, depth);
reusables[++tosReuslables] = ac;
}
}
tosOrdinal = olderSibling[tosOrdinal];
}
// now pq has best K children of ordinals that belong to the given partition.
// Populate a new AACO with them.
// tosOrdinal is now first sibling smaller than partition, make a note of that
firstToTheLeftOfPartition[localDepth] = tosOrdinal;
int aaci = pq.size();
int[] ords = new int[aaci];
double [] vals = new double [aaci];
while (aaci > 0) {
AggregatedCategory ac = pq.pop();
ords[--aaci] = ac.ordinal;
vals[aaci] = ac.value;
reusables[++tosReuslables] = ac;
}
// if more than 0 ordinals, add this AACO to the map to be returned,
// and add ords to sibling stack, and make a note in siblingExplored that these are to
// be visited now
if (ords.length > 0) {
AACOsOfOnePartition.put(ordinalStack[localDepth-1], new AACO(ords,vals,residue));
bestSignlingsStack[localDepth] = ords;
siblingExplored[localDepth] = ords.length-1;
ordinalStack[localDepth] = ords[ords.length-1];
} else {
// no ordinals siblings of tosOrdinal in current partition, move to the left of it
// tosOrdinal is already there (to the left of partition).
// make a note of it in siblingExplored
ordinalStack[localDepth] = tosOrdinal;
siblingExplored[localDepth] = -1;
}
continue;
} // endof we did not check the position of a valid ordinal wrt partition
// now tosOrdinal is a valid ordinal, inside partition or to the left of it, we need
// to push its kids on top of it, if not too deep.
// Make a note that we did not check them yet
if (localDepth >= depth) {
// localDepth == depth; current tos exhausted its possible children, mark this by pushing INVALID_ORDINAL
ordinalStack[++localDepth] = TaxonomyReader.INVALID_ORDINAL;
continue;
}
ordinalStack[++localDepth] = youngestChild[tosOrdinal];
siblingExplored[localDepth] = Integer.MAX_VALUE;
} // endof loop while stack is not empty
// now generate a TempFacetResult from AACOsOfOnePartition, and consider self.
IntermediateFacetResultWithHash tempFRWH = new IntermediateFacetResultWithHash(
facetRequest, AACOsOfOnePartition);
if (isSelfPartition(rootNode, arrays, offset)) {
tempFRWH.isRootNodeIncluded = true;
tempFRWH.rootNodeValue = this.facetRequest.getValueOf(arrays, rootNode % partitionSize);
}
tempFRWH.totalNumOfFacetsConsidered = totalNumOfDescendantsConsidered;
return tempFRWH;
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/TopKInEachNodeHandler.java
private void recursivelyLabel(FacetResultNode node, int numToLabel) throws IOException {
if (node == null) {
return;
}
node.getLabel(this.taxonomyReader); // attach a label -- category path -- to the node
if (null == node.getSubResults()) {
return; // if node has no children -- done
}
// otherwise, label the first numToLabel of these children, and recursively -- their children.
int numLabeled = 0;
for (FacetResultNode frn : node.getSubResults()) {
// go over the children of node from first to last, no more than numToLable of them
recursivelyLabel(frn, numToLabel);
if (++numLabeled >= numToLabel) {
return;
}
}
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/TopKInEachNodeHandler.java
Override
public FacetResult renderFacetResult(IntermediateFacetResult tmpResult) throws IOException {
IntermediateFacetResultWithHash tmp = (IntermediateFacetResultWithHash) tmpResult;
int ordinal = this.taxonomyReader.getOrdinal(this.facetRequest.getCategoryPath());
if ((tmp == null) || (ordinal == TaxonomyReader.INVALID_ORDINAL)) {
return null;
}
double value = Double.NaN;
if (tmp.isRootNodeIncluded) {
value = tmp.rootNodeValue;
}
MutableFacetResultNode root = generateNode (ordinal, value, tmp.mapToAACOs);
return new FacetResult (tmp.facetRequest, root, tmp.totalNumOfFacetsConsidered);
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/SamplingWrapper.java
Override
public List<FacetResult> accumulate(ScoredDocIDs docids) throws IOException {
// first let delegee accumulate without labeling at all (though
// currently it doesn't matter because we have to label all returned anyhow)
boolean origAllowLabeling = isAllowLabeling();
setAllowLabeling(false);
// Replacing the original searchParams with the over-sampled (and without statistics-compute)
FacetSearchParams original = delegee.searchParams;
delegee.searchParams = sampler.overSampledSearchParams(original);
SampleResult sampleSet = sampler.getSampleSet(docids);
List<FacetResult> sampleRes = delegee.accumulate(sampleSet.docids);
setAllowLabeling(origAllowLabeling);
List<FacetResult> fixedRes = new ArrayList<FacetResult>();
for (FacetResult fres : sampleRes) {
// for sure fres is not null because this is guaranteed by the delegee.
FacetResultsHandler frh = fres.getFacetRequest().createFacetResultsHandler(taxonomyReader);
// fix the result of current request
sampler.getSampleFixer(indexReader, taxonomyReader, searchParams)
.fixResult(docids, fres);
fres = frh.rearrangeFacetResult(fres); // let delegee's handler do any
// Using the sampler to trim the extra (over-sampled) results
fres = sampler.trimResult(fres);
// final labeling if allowed (because labeling is a costly operation)
if (isAllowLabeling()) {
frh.labelResult(fres);
}
fixedRes.add(fres); // add to final results
}
delegee.searchParams = original; // Back to original params
return fixedRes;
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/results/MutableFacetResultNode.java
public final CategoryPath getLabel(TaxonomyReader taxonomyReader)
throws IOException {
if (label == null) {
label = taxonomyReader.getPath(ordinal);
}
return label;
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/cache/CategoryListCache.java
public void loadAndRegister(CategoryListParams clp,
IndexReader reader, TaxonomyReader taxo, FacetIndexingParams iparams) throws IOException {
CategoryListData clData = new CategoryListData(reader, taxo, iparams, clp);
register(clp,clData);
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/cache/CategoryListData.java
public CategoryListIterator iterator(int partition) throws IOException {
return new RAMCategoryListIterator(partition, docPartitionCategories);
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/cache/CategoryListData.java
public boolean init() throws IOException {
return dpc!=null && dpc.length>part;
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/cache/CategoryListData.java
public long nextCategory() throws IOException {
if (nextCategoryIndex >= dpc[currDoc][part].length) {
return 1L+Integer.MAX_VALUE;
}
return dpc[currDoc][part][nextCategoryIndex++];
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/cache/CategoryListData.java
public boolean skipTo(int docId) throws IOException {
final boolean res = dpc.length>docId && dpc[docId]!=null && dpc[docId][part]!=null;
if (res) {
currDoc = docId;
nextCategoryIndex = 0;
}
return res;
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java
Override
public List<FacetResult> accumulate(ScoredDocIDs docids) throws IOException {
// synchronize to prevent calling two accumulate()'s at the same time.
// We decided not to synchronize the method because that might mislead
// users to feel encouraged to call this method simultaneously.
synchronized (accumulateGuard) {
// only now we can compute this
isUsingComplements = shouldComplement(docids);
if (isUsingComplements) {
try {
totalFacetCounts = TotalFacetCountsCache.getSingleton()
.getTotalCounts(indexReader, taxonomyReader,
searchParams.getFacetIndexingParams(), searchParams.getClCache());
if (totalFacetCounts != null) {
docids = ScoredDocIdsUtils.getComplementSet(docids, indexReader);
} else {
isUsingComplements = false;
}
} catch (UnsupportedOperationException e) {
// TODO (Facet): this exception is thrown from TotalCountsKey if the
// IndexReader used does not support getVersion(). We should re-think
// this: is this tiny detail worth disabling total counts completely
// for such readers? Currently, it's not supported by Parallel and
// MultiReader, which might be problematic for several applications.
// We could, for example, base our "isCurrent" logic on something else
// than the reader's version. Need to think more deeply about it.
if (logger.isLoggable(Level.FINEST)) {
logger.log(Level.FINEST, "IndexReader used does not support completents: ", e);
}
isUsingComplements = false;
} catch (IOException e) {
if (logger.isLoggable(Level.FINEST)) {
logger.log(Level.FINEST, "Failed to load/calculate total counts (complement counting disabled): ", e);
}
// silently fail if for some reason failed to load/save from/to dir
isUsingComplements = false;
} catch (Exception e) {
// give up: this should not happen!
IOException ioEx = new IOException(
"PANIC: Got unexpected exception while trying to get/calculate total counts: "
+e.getMessage());
ioEx.initCause(e);
throw ioEx;
}
}
docids = actualDocsToAccumulate(docids);
FacetArrays facetArrays = new FacetArrays(intArrayAllocator, floatArrayAllocator);
HashMap<FacetRequest, IntermediateFacetResult> fr2tmpRes = new HashMap<FacetRequest, IntermediateFacetResult>();
try {
for (int part = 0; part < maxPartitions; part++) {
// fill arrays from category lists
fillArraysForPartition(docids, facetArrays, part);
int offset = part * partitionSize;
// for each partition we go over all requests and handle
// each, where
// the request maintains the merged result.
// In this implementation merges happen after each
// partition,
// but other impl could merge only at the end.
for (FacetRequest fr : searchParams.getFacetRequests()) {
FacetResultsHandler frHndlr = fr.createFacetResultsHandler(taxonomyReader);
IntermediateFacetResult res4fr = frHndlr.fetchPartitionResult(facetArrays, offset);
IntermediateFacetResult oldRes = fr2tmpRes.get(fr);
if (oldRes != null) {
res4fr = frHndlr.mergeResults(oldRes, res4fr);
}
fr2tmpRes.put(fr, res4fr);
}
}
} finally {
facetArrays.free();
}
// gather results from all requests into a list for returning them
List<FacetResult> res = new ArrayList<FacetResult>();
for (FacetRequest fr : searchParams.getFacetRequests()) {
FacetResultsHandler frHndlr = fr.createFacetResultsHandler(taxonomyReader);
IntermediateFacetResult tmpResult = fr2tmpRes.get(fr);
if (tmpResult == null) {
continue; // do not add a null to the list.
}
FacetResult facetRes = frHndlr.renderFacetResult(tmpResult);
// final labeling if allowed (because labeling is a costly operation)
if (isAllowLabeling()) {
frHndlr.labelResult(facetRes);
}
res.add(facetRes);
}
return res;
}
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java
protected ScoredDocIDs actualDocsToAccumulate(ScoredDocIDs docids) throws IOException {
return docids;
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java
private final void fillArraysForPartition(ScoredDocIDs docids,
FacetArrays facetArrays, int partition) throws IOException {
if (isUsingComplements) {
initArraysByTotalCounts(facetArrays, partition, docids.size());
} else {
facetArrays.free(); // to get a cleared array for this partition
}
HashMap<CategoryListIterator, Aggregator> categoryLists = getCategoryListMap(
facetArrays, partition);
for (Entry<CategoryListIterator, Aggregator> entry : categoryLists.entrySet()) {
CategoryListIterator categoryList = entry.getKey();
if (!categoryList.init()) {
continue;
}
Aggregator categorator = entry.getValue();
ScoredDocIDsIterator iterator = docids.iterator();
while (iterator.next()) {
int docID = iterator.getDocID();
if (!categoryList.skipTo(docID)) {
continue;
}
categorator.setNextDoc(docID, iterator.getScore());
long ordinal;
while ((ordinal = categoryList.nextCategory()) <= Integer.MAX_VALUE) {
categorator.aggregate((int) ordinal);
}
}
}
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java
protected HashMap<CategoryListIterator, Aggregator> getCategoryListMap(FacetArrays facetArrays,
int partition) throws IOException {
HashMap<CategoryListIterator, Aggregator> categoryLists = new HashMap<CategoryListIterator, Aggregator>();
for (FacetRequest facetRequest : searchParams.getFacetRequests()) {
Aggregator categoryAggregator = facetRequest.createAggregator(
isUsingComplements, facetArrays, indexReader, taxonomyReader);
CategoryListIterator cli =
facetRequest.createCategoryListIterator(indexReader, taxonomyReader, searchParams, partition);
// get the aggregator
Aggregator old = categoryLists.put(cli, categoryAggregator);
if (old != null && !old.equals(categoryAggregator)) {
// TODO (Facet): create a more meaningful RE class, and throw it.
throw new RuntimeException(
"Overriding existing category list with different aggregator. THAT'S A NO NO!");
}
// if the aggregator is the same we're covered
}
return categoryLists;
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/ScoredDocIdCollector.java
Override
public void collect(int doc) throws IOException {
docIds.fastSet(docBase + doc);
++numDocIds;
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/ScoredDocIdCollector.java
Override
public ScoredDocIDsIterator scoredDocIdsIterator() throws IOException {
return new ScoredDocIDsIterator() {
private DocIdSetIterator docIdsIter = docIds.iterator();
private int nextDoc;
public int getDocID() { return nextDoc; }
public float getScore() { return defaultScore; }
public boolean next() {
try {
nextDoc = docIdsIter.nextDoc();
return nextDoc != DocIdSetIterator.NO_MORE_DOCS;
} catch (IOException e) {
// This should not happen as we're iterating over an OpenBitSet. For
// completeness, terminate iteration
nextDoc = DocIdSetIterator.NO_MORE_DOCS;
return false;
}
}
};
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/ScoredDocIdCollector.java
Override
public void setScorer(Scorer scorer) throws IOException {}
// in lucene/facet/src/java/org/apache/lucene/facet/search/ScoredDocIdCollector.java
Override
public void collect(int doc) throws IOException {
docIds.fastSet(docBase + doc);
float score = this.scorer.score();
if (numDocIds >= scores.length) {
float[] newScores = new float[ArrayUtil.oversize(numDocIds + 1, 4)];
System.arraycopy(scores, 0, newScores, 0, numDocIds);
scores = newScores;
}
scores[numDocIds] = score;
++numDocIds;
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/ScoredDocIdCollector.java
Override
public ScoredDocIDsIterator scoredDocIdsIterator() throws IOException {
return new ScoredDocIDsIterator() {
private DocIdSetIterator docIdsIter = docIds.iterator();
private int nextDoc;
private int scoresIdx = -1;
public int getDocID() { return nextDoc; }
public float getScore() { return scores[scoresIdx]; }
public boolean next() {
try {
nextDoc = docIdsIter.nextDoc();
if (nextDoc == DocIdSetIterator.NO_MORE_DOCS) {
return false;
}
++scoresIdx;
return true;
} catch (IOException e) {
// This should not happen as we're iterating over an OpenBitSet. For
// completeness, terminate iteration
nextDoc = DocIdSetIterator.NO_MORE_DOCS;
return false;
}
}
};
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/ScoredDocIdCollector.java
Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/ScoredDocIdCollector.java
public ScoredDocIDs getScoredDocIDs() {
return new ScoredDocIDs() {
public ScoredDocIDsIterator iterator() throws IOException {
return scoredDocIdsIterator();
}
public DocIdSet getDocIDs() {
return docIds;
}
public int size() {
return numDocIds;
}
};
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/ScoredDocIdCollector.java
public ScoredDocIDsIterator iterator() throws IOException {
return scoredDocIdsIterator();
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/ScoredDocIdCollector.java
Override
public void setNextReader(AtomicReaderContext context) throws IOException {
this.docBase = context.docBase;
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/association/AssociationIntSumAggregator.java
public void setNextDoc(int docid, float score) throws IOException {
associationsPayloadIterator.setNextDoc(docid);
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/association/AssociationFloatSumAggregator.java
public void setNextDoc(int docid, float score) throws IOException {
associationsPayloadIterator.setNextDoc(docid);
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/params/association/AssociationFloatSumFacetRequest.java
Override
public Aggregator createAggregator(boolean useComplements,
FacetArrays arrays, IndexReader reader,
TaxonomyReader taxonomy) throws IOException {
assert !useComplements : "complements are not supported by this FacetRequest";
return new AssociationFloatSumAggregator(reader, arrays.getFloatArray());
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/params/association/AssociationIntSumFacetRequest.java
Override
public Aggregator createAggregator(boolean useComplements,
FacetArrays arrays, IndexReader reader,
TaxonomyReader taxonomy) throws IOException {
assert !useComplements : "complements are not supported by this FacetRequest";
return new AssociationIntSumAggregator(reader, arrays.getIntArray());
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/params/FacetRequest.java
public CategoryListIterator createCategoryListIterator(IndexReader reader,
TaxonomyReader taxo, FacetSearchParams sParams, int partition)
throws IOException {
CategoryListCache clCache = sParams.getClCache();
CategoryListParams clParams = sParams.getFacetIndexingParams().getCategoryListParams(categoryPath);
if (clCache!=null) {
CategoryListData clData = clCache.get(clParams);
if (clData!=null) {
return clData.iterator(partition);
}
}
return clParams.createCategoryListIterator(reader, partition);
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/TopKFacetResultsHandler.java
Override
public IntermediateFacetResult fetchPartitionResult(FacetArrays facetArrays, int offset)
throws IOException {
TopKFacetResult res = null;
int ordinal = taxonomyReader.getOrdinal(facetRequest.getCategoryPath());
if (ordinal != TaxonomyReader.INVALID_ORDINAL) {
double value = 0;
if (isSelfPartition(ordinal, facetArrays, offset)) {
int partitionSize = facetArrays.getArraysLength();
value = facetRequest.getValueOf(facetArrays, ordinal % partitionSize);
}
// TODO (Facet): should initial value of "residue" depend on aggregator if not sum?
MutableFacetResultNode parentResultNode =
new MutableFacetResultNode(ordinal, value);
Heap<FacetResultNode> heap = ResultSortUtils.createSuitableHeap(facetRequest);
int totalFacets = heapDescendants(ordinal, heap, parentResultNode, facetArrays, offset);
res = new TopKFacetResult(facetRequest, parentResultNode, totalFacets);
res.setHeap(heap);
}
return res;
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/TopKFacetResultsHandler.java
Override
public IntermediateFacetResult mergeResults(IntermediateFacetResult... tmpResults) throws IOException {
int ordinal = taxonomyReader.getOrdinal(facetRequest.getCategoryPath());
MutableFacetResultNode resNode = new MutableFacetResultNode(ordinal, 0);
int totalFacets = 0;
Heap<FacetResultNode> heap = null;
// merge other results in queue
for (IntermediateFacetResult tmpFres : tmpResults) {
// cast should succeed
TopKFacetResult fres = (TopKFacetResult) tmpFres;
totalFacets += fres.getNumValidDescendants();
// set the value for the result node representing the facet request
resNode.increaseValue(fres.getFacetResultNode().getValue());
Heap<FacetResultNode> tmpHeap = fres.getHeap();
if (heap == null) {
heap = tmpHeap;
continue;
}
// bring sub results from heap of tmp res into result heap
for (int i = tmpHeap.size(); i > 0; i--) {
FacetResultNode a = heap.insertWithOverflow(tmpHeap.pop());
if (a != null) {
resNode.increaseResidue(a.getResidue());
}
}
}
TopKFacetResult res = new TopKFacetResult(facetRequest, resNode, totalFacets);
res.setHeap(heap);
return res;
}
// in lucene/facet/src/java/org/apache/lucene/facet/search/TopKFacetResultsHandler.java
Override
// label top K sub results
public void labelResult(FacetResult facetResult) throws IOException {
if (facetResult != null) { // any result to label?
FacetResultNode facetResultNode = facetResult.getFacetResultNode();
if (facetResultNode != null) { // any result to label?
facetResultNode.getLabel(taxonomyReader);
int num2label = facetRequest.getNumLabel();
for (FacetResultNode frn : facetResultNode.getSubResults()) {
if (--num2label < 0) {
break;
}
frn.getLabel(taxonomyReader);
}
}
}
}
// in lucene/facet/src/java/org/apache/lucene/facet/enhancements/association/AssociationsPayloadIterator.java
public boolean setNextDoc(int docId) throws IOException {
ordinalToAssociationMap.clear();
boolean docContainsAssociations = false;
try {
docContainsAssociations = fetchAssociations(docId);
} catch (IOException e) {
IOException ioe = new IOException(
"An Error occured while reading a document's associations payload (docId="
+ docId + ")");
ioe.initCause(e);
throw ioe;
}
return docContainsAssociations;
}
// in lucene/facet/src/java/org/apache/lucene/facet/enhancements/association/AssociationsPayloadIterator.java
private boolean fetchAssociations(int docId) throws IOException {
// No associations at all? don't bother trying to seek the docID in the
// posting
if (!hasAssociations) {
return false;
}
// No associations for this document? well, nothing to decode than,
// return false
if (!associationPayloadIter.skipTo(docId)) {
return false;
}
// loop over all the values decoded from the payload in pairs.
for (;;) {
// Get the ordinal
long ordinal = associationPayloadIter.nextCategory();
// if no ordinal - it's the end of data, break the loop
if (ordinal > Integer.MAX_VALUE) {
break;
}
// get the associated value
long association = associationPayloadIter.nextCategory();
// If we're at this step - it means we have an ordinal, do we have
// an association for it?
if (association > Integer.MAX_VALUE) {
// No association!!! A Broken Pair!! PANIC!
throw new IOException(
"ERROR! Associations should come in pairs of (ordinal, value), yet this payload has an odd number of values! (docId="
+ docId + ")");
}
// Populate the map with the given ordinal and association pair
ordinalToAssociationMap.put((int) ordinal, (int) association);
}
return true;
}
// in lucene/facet/src/java/org/apache/lucene/facet/enhancements/association/AssociationListTokenizer.java
Override
protected void handleStartOfInput() throws IOException {
payloadStream = null;
}
// in lucene/facet/src/java/org/apache/lucene/facet/enhancements/association/AssociationListTokenizer.java
Override
public final boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (categoryAttribute != null) {
AssociationProperty associationProperty = AssociationEnhancement
.getAssociationProperty(categoryAttribute);
if (associationProperty != null
&& associationProperty.hasBeenSet()) {
OrdinalProperty ordinalProperty = (OrdinalProperty) categoryAttribute
.getProperty(OrdinalProperty.class);
if (ordinalProperty == null) {
throw new IOException(
"Error: Association without ordinal");
}
if (payloadStream == null) {
payloadStream = new CategoryListPayloadStream(
new SimpleIntEncoder());
}
payloadStream.appendIntToStream(ordinalProperty
.getOrdinal());
payloadStream.appendIntToStream(associationProperty
.getAssociation());
}
}
return true;
}
if (payloadStream != null) {
termAttribute.setEmpty().append(categoryListTermText);
payload.setData(payloadStream.convertStreamToByteArray());
payloadAttribute.setPayload(payload);
payloadStream = null;
return true;
}
return false;
}
// in lucene/facet/src/java/org/apache/lucene/facet/enhancements/EnhancementsDocumentBuilder.java
Override
protected CategoryTokenizer getCategoryTokenizer(TokenStream categoryStream)
throws IOException {
return new EnhancementsCategoryTokenizer(categoryStream,
(EnhancementsIndexingParams) indexingParams);
}
// in lucene/facet/src/java/org/apache/lucene/facet/enhancements/EnhancementsPayloadIterator.java
Override
public boolean setdoc(int docId) throws IOException {
if (!super.setdoc(docId)) {
return false;
}
// read header - number of enhancements and their lengths
Position position = new Position();
nEnhancements = Vint8.decode(buffer, position);
for (int i = 0; i < nEnhancements; i++) {
enhancementLength[i] = Vint8.decode(buffer, position);
}
// set enhancements start points
enhancementStart[0] = position.pos;
for (int i = 1; i < nEnhancements; i++) {
enhancementStart[i] = enhancementStart[i - 1] + enhancementLength[i - 1];
}
return true;
}
// in lucene/facet/src/java/org/apache/lucene/facet/index/FacetsPayloadProcessorProvider.java
Override
public ReaderPayloadProcessor getReaderProcessor(AtomicReader reader) throws IOException {
if (reader instanceof SegmentReader) {
if (workDir == ((SegmentReader) reader).directory()) {
return dirProcessor;
}
}
return null;
}
// in lucene/facet/src/java/org/apache/lucene/facet/index/FacetsPayloadProcessorProvider.java
Override
public PayloadProcessor getProcessor(String field, BytesRef bytes) throws IOException {
// TODO (Facet): don't create terms
CategoryListParams params = termMap.get(new Term(field, bytes));
if (params == null) {
return null;
}
return new FacetsPayloadProcessor(params, ordinalMap);
}
// in lucene/facet/src/java/org/apache/lucene/facet/index/FacetsPayloadProcessorProvider.java
Override
public void processPayload(BytesRef payload) throws IOException {
InputStream is = new ByteArrayInputStream(payload.bytes, payload.offset, payload.length);
decoder.reInit(is);
os.reset();
encoder.reInit(os);
long ordinal;
while ((ordinal = decoder.decode()) != IntDecoder.EOS) {
int newOrdinal = ordinalMap[(int)ordinal];
encoder.encode(newOrdinal);
}
encoder.close();
// TODO (Facet): avoid copy?
byte out[] = os.toByteArray();
payload.bytes = out;
payload.offset = 0;
payload.length = out.length;
}
// in lucene/facet/src/java/org/apache/lucene/facet/index/CategoryContainer.java
private void writeObject(ObjectOutputStream out) throws IOException {
out.defaultWriteObject();
// write the number of categories
out.writeInt(size());
// write the category attributes
for (CategoryAttribute ca : this) {
serializeCategoryAttribute(out, ca);
}
}
// in lucene/facet/src/java/org/apache/lucene/facet/index/CategoryContainer.java
protected void serializeCategoryAttribute(ObjectOutputStream out,
CategoryAttribute ca) throws IOException {
out.writeObject(ca.getCategoryPath());
Set<Class<? extends CategoryProperty>> propertyClasses = ca.getPropertyClasses();
if (propertyClasses != null) {
out.writeInt(propertyClasses.size());
for (Class<? extends CategoryProperty> clazz : propertyClasses) {
out.writeObject(ca.getProperty(clazz));
}
} else {
out.writeInt(0);
}
}
// in lucene/facet/src/java/org/apache/lucene/facet/index/CategoryContainer.java
private void readObject(ObjectInputStream in) throws IOException,
ClassNotFoundException {
in.defaultReadObject();
map = new HashMap<CategoryPath, CategoryAttribute>();
int size = in.readInt();
for (int i = 0; i < size; i++) {
deserializeCategoryAttribute(in);
}
}
// in lucene/facet/src/java/org/apache/lucene/facet/index/CategoryContainer.java
protected void deserializeCategoryAttribute(ObjectInputStream in)
throws IOException, ClassNotFoundException {
CategoryPath cp = (CategoryPath) in.readObject();
int nProperties = in.readInt();
if (nProperties == 0) {
addCategory(cp);
} else {
for (int j = 0; j < nProperties; j++) {
CategoryProperty property = (CategoryProperty) in.readObject();
addCategory(cp, property);
}
}
}
// in lucene/facet/src/java/org/apache/lucene/facet/index/streaming/CategoryAttributesStream.java
Override
public final boolean incrementToken() throws IOException {
if (iterator == null) {
if (iterable == null) {
return false;
}
iterator = iterable.iterator();
}
if (iterator.hasNext()) {
categoryAttribute.set(iterator.next());
return true;
}
return false;
}
// in lucene/facet/src/java/org/apache/lucene/facet/index/streaming/CategoryTokenizer.java
Override
public final boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (categoryAttribute != null && categoryAttribute.getCategoryPath() != null) {
CategoryPath categoryPath = categoryAttribute.getCategoryPath();
char[] termBuffer = termAttribute.resizeBuffer(categoryPath.charsNeededForFullPath());
int nChars = indexingParams.drillDownTermText(categoryPath, termBuffer);
termAttribute.setLength(nChars);
setPayload();
}
return true;
}
return false;
}
// in lucene/facet/src/java/org/apache/lucene/facet/index/streaming/CategoryListTokenizer.java
protected void handleStartOfInput() throws IOException {
// In this class, we do nothing.
}
// in lucene/facet/src/java/org/apache/lucene/facet/index/streaming/CategoryListTokenizer.java
protected void handleEndOfInput() throws IOException {
// In this class, we do nothing.
}
// in lucene/facet/src/java/org/apache/lucene/facet/index/streaming/CategoryListTokenizer.java
Override
public void reset() throws IOException {
super.reset();
handleStartOfInput();
}
// in lucene/facet/src/java/org/apache/lucene/facet/index/streaming/CategoryParentsStream.java
Override
public final boolean incrementToken() throws IOException {
if (this.categoryAttribute.getCategoryPath() != null) {
// try adding the parent of the current category to the stream
clearCategoryProperties();
boolean added = false;
// set the parent's ordinal, if illegal set -1
int ordinal = this.ordinalProperty.getOrdinal();
if (ordinal != -1) {
ordinal = this.taxonomyWriter.getParent(ordinal);
if (this.ordinalPolicy.shouldAdd(ordinal)) {
this.ordinalProperty.setOrdinal(ordinal);
try {
this.categoryAttribute.addProperty(ordinalProperty);
} catch (UnsupportedOperationException e) {
throw new IOException(e.getLocalizedMessage());
}
added = true;
} else {
this.ordinalProperty.setOrdinal(-1);
}
}
// set the parent's category path, if illegal set null
CategoryPath cp = this.categoryAttribute.getCategoryPath();
if (cp != null) {
cp.trim(1);
// if ordinal added, must also have category paths
if (added || this.pathPolicy.shouldAdd(cp)) {
this.categoryAttribute.setCategoryPath(cp);
added = true;
} else {
this.categoryAttribute.clear();
}
}
if (added) {
// a legal parent exists
return true;
}
}
// no more parents - get new category
if (input.incrementToken()) {
int ordinal = taxonomyWriter.addCategory(this.categoryAttribute.getCategoryPath());
this.ordinalProperty.setOrdinal(ordinal);
try {
this.categoryAttribute.addProperty(this.ordinalProperty);
} catch (UnsupportedOperationException e) {
throw new IOException(e.getLocalizedMessage());
}
return true;
}
return false;
}
// in lucene/facet/src/java/org/apache/lucene/facet/index/streaming/CountingListTokenizer.java
Override
protected void handleStartOfInput() throws IOException {
payloadStreamsByName.clear();
payloadStreamIterator = null;
}
// in lucene/facet/src/java/org/apache/lucene/facet/index/streaming/CountingListTokenizer.java
Override
public final boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (this.categoryAttribute != null) {
OrdinalProperty ordinalProperty = (OrdinalProperty) this.categoryAttribute
.getProperty(OrdinalProperty.class);
if (ordinalProperty != null && legalCategory()) {
CategoryPath categoryPath = this.categoryAttribute
.getCategoryPath();
int ordinal = ordinalProperty.getOrdinal();
CategoryListPayloadStream payloadStream = getPayloadStream(
categoryPath, ordinal);
int partitionSize = indexingParams.getPartitionSize();
payloadStream.appendIntToStream(ordinal % partitionSize);
}
}
return true;
}
if (this.payloadStreamIterator == null) {
this.handleEndOfInput();
this.payloadStreamIterator = this.payloadStreamsByName.entrySet()
.iterator();
}
if (this.payloadStreamIterator.hasNext()) {
Entry<String, CategoryListPayloadStream> entry = this.payloadStreamIterator
.next();
String countingListName = entry.getKey();
int length = countingListName.length();
this.termAttribute.resizeBuffer(length);
countingListName.getChars(0, length, termAttribute.buffer(), 0);
this.termAttribute.setLength(length);
CategoryListPayloadStream payloadStream = entry.getValue();
payload.setData(payloadStream.convertStreamToByteArray());
this.payloadAttribute.setPayload(payload);
return true;
}
return false;
}
// in lucene/facet/src/java/org/apache/lucene/facet/index/streaming/CountingListTokenizer.java
protected CategoryListPayloadStream getPayloadStream(
CategoryPath categoryPath, int ordinal) throws IOException {
CategoryListParams clParams = this.indexingParams.getCategoryListParams(categoryPath);
String name = PartitionsUtils.partitionNameByOrdinal(indexingParams, clParams, ordinal);
CategoryListPayloadStream fps = payloadStreamsByName.get(name);
if (fps == null) {
IntEncoder encoder = clParams.createEncoder();
fps = new CategoryListPayloadStream(encoder);
payloadStreamsByName.put(name, fps);
}
return fps;
}
// in lucene/facet/src/java/org/apache/lucene/facet/index/CategoryDocumentBuilder.java
public CategoryDocumentBuilder setCategoryPaths(
Iterable<CategoryPath> categoryPaths) throws IOException {
if (categoryPaths == null) {
fieldList.clear();
return this;
}
return setCategories(new CategoryAttributesIterable(categoryPaths));
}
// in lucene/facet/src/java/org/apache/lucene/facet/index/CategoryDocumentBuilder.java
public CategoryDocumentBuilder setCategories(
Iterable<CategoryAttribute> categories) throws IOException {
fieldList.clear();
if (categories == null) {
return this;
}
// get field-name to a list of facets mapping as different facets could
// be added to different category-lists on different fields
fillCategoriesMap(categories);
// creates a different stream for each different field
for (Entry<String, List<CategoryAttribute>> e : categoriesMap
.entrySet()) {
// create a category attributes stream for the array of facets
CategoryAttributesStream categoryAttributesStream = new CategoryAttributesStream(
e.getValue());
// Set a suitable {@link TokenStream} using
// CategoryParentsStream, followed by CategoryListTokenizer and
// CategoryTokenizer composition (the ordering of the last two is
// not mandatory).
CategoryParentsStream parentsStream = (CategoryParentsStream) getParentsStream(categoryAttributesStream);
CategoryListTokenizer categoryListTokenizer = getCategoryListTokenizer(parentsStream);
CategoryTokenizer stream = getCategoryTokenizer(categoryListTokenizer);
// Finally creating a suitable field with stream and adding it to a
// master field-list, used during the build process (see
// super.build())
FieldType ft = new FieldType(TextField.TYPE_UNSTORED);
ft.setOmitNorms(true);
fieldList.add(new Field(e.getKey(), stream, ft));
}
return this;
}
// in lucene/facet/src/java/org/apache/lucene/facet/index/CategoryDocumentBuilder.java
protected void fillCategoriesMap(Iterable<CategoryAttribute> categories)
throws IOException {
categoriesMap.clear();
// for-each category
for (CategoryAttribute category : categories) {
// extracting the field-name to which this category belongs
String fieldName = indexingParams.getCategoryListParams(
category.getCategoryPath()).getTerm().field();
// getting the list of categories which belongs to that field
List<CategoryAttribute> list = categoriesMap.get(fieldName);
// if no such list exists
if (list == null) {
// adding a new one to the map
list = new ArrayList<CategoryAttribute>();
categoriesMap.put(fieldName, list);
}
// adding the new category to the list
list.add(category.clone());
}
}
// in lucene/facet/src/java/org/apache/lucene/facet/index/CategoryDocumentBuilder.java
protected CategoryTokenizer getCategoryTokenizer(TokenStream categoryStream)
throws IOException {
return new CategoryTokenizer(categoryStream, indexingParams);
}
// in lucene/facet/src/java/org/apache/lucene/facet/index/CategoryListPayloadStream.java
public void appendIntToStream(int intValue) throws IOException {
encoder.encode(intValue);
}
// in lucene/facet/src/java/org/apache/lucene/facet/index/CategoryListPayloadStream.java
public void reset() throws IOException {
encoder.close();
baos.reset();
encoder.reInit(baos);
}
// in lucene/facet/src/java/org/apache/lucene/facet/index/params/CategoryListParams.java
public CategoryListIterator createCategoryListIterator(IndexReader reader,
int partition) throws IOException {
String categoryListTermStr = PartitionsUtils.partitionName(this, partition);
Term payloadTerm = new Term(term.field(), categoryListTermStr);
return new PayloadIntDecodingIterator(reader, payloadTerm,
createEncoder().createMatchingDecoder());
}
// in lucene/facet/src/java/org/apache/lucene/facet/util/MultiCategoryListIterator.java
public boolean init() throws IOException {
for (CategoryListIterator cli : iterators) {
if (cli.init()) {
validIterators.add(cli);
}
}
return !validIterators.isEmpty();
}
// in lucene/facet/src/java/org/apache/lucene/facet/util/MultiCategoryListIterator.java
public long nextCategory() throws IOException {
while (!perDocValidIterators.isEmpty()) {
long value = perDocValidIterators.get(0).nextCategory();
if (value <= Integer.MAX_VALUE) {
return value;
}
perDocValidIterators.remove(0);
}
return 0x100000000L;
}
// in lucene/facet/src/java/org/apache/lucene/facet/util/MultiCategoryListIterator.java
public boolean skipTo(int docId) throws IOException {
perDocValidIterators.clear();
for (CategoryListIterator cli : validIterators) {
if (cli.skipTo(docId)) {
perDocValidIterators.add(cli);
}
}
return !perDocValidIterators.isEmpty();
}
// in lucene/facet/src/java/org/apache/lucene/facet/util/ScoredDocIdsUtils.java
public final static ScoredDocIDs getComplementSet(final ScoredDocIDs docids, final IndexReader reader)
throws IOException {
final int maxDoc = reader.maxDoc();
DocIdSet docIdSet = docids.getDocIDs();
final OpenBitSet complement;
if (docIdSet instanceof OpenBitSet) {
// That is the most common case, if ScoredDocIdsCollector was used.
complement = ((OpenBitSet) docIdSet).clone();
} else {
complement = new OpenBitSetDISI(docIdSet.iterator(), maxDoc);
}
complement.flip(0, maxDoc);
// Remove all Deletions from the complement set
clearDeleted(reader, complement);
return createScoredDocIds(complement, maxDoc);
}
// in lucene/facet/src/java/org/apache/lucene/facet/util/ScoredDocIdsUtils.java
private static void clearDeleted(final IndexReader reader,
final OpenBitSet set) throws IOException {
// If there are no deleted docs
if (!reader.hasDeletions()) {
return; // return immediately
}
Bits bits = MultiFields.getLiveDocs(reader);
DocIdSetIterator it = set.iterator();
int doc = DocIdSetIterator.NO_MORE_DOCS;
while ((doc = it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (!bits.get(doc)) {
set.fastClear(doc);
}
}
}
// in lucene/facet/src/java/org/apache/lucene/facet/util/ScoredDocIdsUtils.java
public static final ScoredDocIDs createScoredDocIDsSubset(final ScoredDocIDs allDocIds,
final int[] sampleSet) throws IOException {
// sort so that we can scan docs in order
final int[] docids = sampleSet;
Arrays.sort(docids);
final float[] scores = new float[docids.length];
// fetch scores and compute size
ScoredDocIDsIterator it = allDocIds.iterator();
int n = 0;
while (it.next() && n < docids.length) {
int doc = it.getDocID();
if (doc == docids[n]) {
scores[n] = it.getScore();
++n;
}
}
final int size = n;
return new ScoredDocIDs() {
public DocIdSet getDocIDs() {
return new DocIdSet() {
@Override
public boolean isCacheable() { return true; }
@Override
public DocIdSetIterator iterator() throws IOException {
return new DocIdSetIterator() {
private int next = -1;
@Override
public int advance(int target) throws IOException {
while (next < size && docids[next++] < target) {
}
return next == size ? NO_MORE_DOCS : docids[next];
}
@Override
public int docID() {
return docids[next];
}
@Override
public int nextDoc() throws IOException {
if (++next >= size) {
return NO_MORE_DOCS;
}
return docids[next];
}
};
}
};
}
public ScoredDocIDsIterator iterator() throws IOException {
return new ScoredDocIDsIterator() {
int next = -1;
public boolean next() { return ++next < size; }
public float getScore() { return scores[next]; }
public int getDocID() { return docids[next]; }
};
}
public int size() { return size; }
};
}
// in lucene/facet/src/java/org/apache/lucene/facet/util/ScoredDocIdsUtils.java
public DocIdSet getDocIDs() {
return new DocIdSet() {
@Override
public boolean isCacheable() { return true; }
@Override
public DocIdSetIterator iterator() throws IOException {
return new DocIdSetIterator() {
private int next = -1;
@Override
public int advance(int target) throws IOException {
while (next < size && docids[next++] < target) {
}
return next == size ? NO_MORE_DOCS : docids[next];
}
@Override
public int docID() {
return docids[next];
}
@Override
public int nextDoc() throws IOException {
if (++next >= size) {
return NO_MORE_DOCS;
}
return docids[next];
}
};
}
};
}
// in lucene/facet/src/java/org/apache/lucene/facet/util/ScoredDocIdsUtils.java
Override
public DocIdSetIterator iterator() throws IOException {
return new DocIdSetIterator() {
private int next = -1;
@Override
public int advance(int target) throws IOException {
while (next < size && docids[next++] < target) {
}
return next == size ? NO_MORE_DOCS : docids[next];
}
@Override
public int docID() {
return docids[next];
}
@Override
public int nextDoc() throws IOException {
if (++next >= size) {
return NO_MORE_DOCS;
}
return docids[next];
}
};
}
// in lucene/facet/src/java/org/apache/lucene/facet/util/ScoredDocIdsUtils.java
Override
public int advance(int target) throws IOException {
while (next < size && docids[next++] < target) {
}
return next == size ? NO_MORE_DOCS : docids[next];
}
// in lucene/facet/src/java/org/apache/lucene/facet/util/ScoredDocIdsUtils.java
Override
public int nextDoc() throws IOException {
if (++next >= size) {
return NO_MORE_DOCS;
}
return docids[next];
}
// in lucene/facet/src/java/org/apache/lucene/facet/util/ScoredDocIdsUtils.java
public ScoredDocIDsIterator iterator() throws IOException {
return new ScoredDocIDsIterator() {
int next = -1;
public boolean next() { return ++next < size; }
public float getScore() { return scores[next]; }
public int getDocID() { return docids[next]; }
};
}
// in lucene/facet/src/java/org/apache/lucene/facet/util/ScoredDocIdsUtils.java
public static final ScoredDocIDs createScoredDocIds(final DocIdSet docIdSet, final int maxDoc) {
return new ScoredDocIDs() {
private int size = -1;
public DocIdSet getDocIDs() { return docIdSet; }
public ScoredDocIDsIterator iterator() throws IOException {
final DocIdSetIterator docIterator = docIdSet.iterator();
return new ScoredDocIDsIterator() {
public boolean next() {
try {
return docIterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public float getScore() { return DEFAULT_SCORE; }
public int getDocID() { return docIterator.docID(); }
};
}
public int size() {
// lazy size computation
if (size < 0) {
OpenBitSetDISI openBitSetDISI;
try {
openBitSetDISI = new OpenBitSetDISI(docIdSet.iterator(), maxDoc);
} catch (IOException e) {
throw new RuntimeException(e);
}
size = (int) openBitSetDISI.cardinality();
}
return size;
}
};
}
// in lucene/facet/src/java/org/apache/lucene/facet/util/ScoredDocIdsUtils.java
public ScoredDocIDsIterator iterator() throws IOException {
final DocIdSetIterator docIterator = docIdSet.iterator();
return new ScoredDocIDsIterator() {
public boolean next() {
try {
return docIterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public float getScore() { return DEFAULT_SCORE; }
public int getDocID() { return docIterator.docID(); }
};
}
// in lucene/facet/src/java/org/apache/lucene/facet/util/ScoredDocIdsUtils.java
public DocIdSet getDocIDs() {
return new DocIdSet() {
@Override
public boolean isCacheable() {
return true;
}
@Override
public DocIdSetIterator iterator() throws IOException {
return new DocIdSetIterator() {
private int next = -1;
@Override
public int advance(int target) throws IOException {
if (target <= next) {
target = next + 1;
}
return next = target >= maxDoc ? NO_MORE_DOCS
: target;
}
@Override
public int docID() {
return next;
}
@Override
public int nextDoc() throws IOException {
return ++next < maxDoc ? next : NO_MORE_DOCS;
}
};
}
};
}
// in lucene/facet/src/java/org/apache/lucene/facet/util/ScoredDocIdsUtils.java
Override
public DocIdSetIterator iterator() throws IOException {
return new DocIdSetIterator() {
private int next = -1;
@Override
public int advance(int target) throws IOException {
if (target <= next) {
target = next + 1;
}
return next = target >= maxDoc ? NO_MORE_DOCS
: target;
}
@Override
public int docID() {
return next;
}
@Override
public int nextDoc() throws IOException {
return ++next < maxDoc ? next : NO_MORE_DOCS;
}
};
}
// in lucene/facet/src/java/org/apache/lucene/facet/util/ScoredDocIdsUtils.java
Override
public int advance(int target) throws IOException {
if (target <= next) {
target = next + 1;
}
return next = target >= maxDoc ? NO_MORE_DOCS
: target;
}
// in lucene/facet/src/java/org/apache/lucene/facet/util/ScoredDocIdsUtils.java
Override
public int nextDoc() throws IOException {
return ++next < maxDoc ? next : NO_MORE_DOCS;
}
// in lucene/facet/src/java/org/apache/lucene/facet/util/ScoredDocIdsUtils.java
public DocIdSet getDocIDs() {
return new DocIdSet() {
@Override
public boolean isCacheable() {
return true;
}
@Override
public DocIdSetIterator iterator() throws IOException {
return new DocIdSetIterator() {
final Bits liveDocs = MultiFields.getLiveDocs(reader);
private int next = -1;
@Override
public int advance(int target) throws IOException {
if (target > next) {
next = target - 1;
}
return nextDoc();
}
@Override
public int docID() {
return next;
}
@Override
public int nextDoc() throws IOException {
do {
++next;
} while (next < maxDoc && liveDocs != null && !liveDocs.get(next));
return next < maxDoc ? next : NO_MORE_DOCS;
}
};
}
};
}
// in lucene/facet/src/java/org/apache/lucene/facet/util/ScoredDocIdsUtils.java
Override
public DocIdSetIterator iterator() throws IOException {
return new DocIdSetIterator() {
final Bits liveDocs = MultiFields.getLiveDocs(reader);
private int next = -1;
@Override
public int advance(int target) throws IOException {
if (target > next) {
next = target - 1;
}
return nextDoc();
}
@Override
public int docID() {
return next;
}
@Override
public int nextDoc() throws IOException {
do {
++next;
} while (next < maxDoc && liveDocs != null && !liveDocs.get(next));
return next < maxDoc ? next : NO_MORE_DOCS;
}
};
}
// in lucene/facet/src/java/org/apache/lucene/facet/util/ScoredDocIdsUtils.java
Override
public int advance(int target) throws IOException {
if (target > next) {
next = target - 1;
}
return nextDoc();
}
// in lucene/facet/src/java/org/apache/lucene/facet/util/ScoredDocIdsUtils.java
Override
public int nextDoc() throws IOException {
do {
++next;
} while (next < maxDoc && liveDocs != null && !liveDocs.get(next));
return next < maxDoc ? next : NO_MORE_DOCS;
}
// in lucene/facet/src/java/org/apache/lucene/util/encoding/VInt8IntEncoder.java
Override
public void encode(int value) throws IOException {
if ((value & ~0x7F) == 0) {
out.write(value);
} else if ((value & ~0x3FFF) == 0) {
out.write(0x80 | (value >> 7));
out.write(0x7F & value);
} else if ((value & ~0x1FFFFF) == 0) {
out.write(0x80 | (value >> 14));
out.write(0x80 | (value >> 7));
out.write(0x7F & value);
} else if ((value & ~0xFFFFFFF) == 0) {
out.write(0x80 | (value >> 21));
out.write(0x80 | (value >> 14));
out.write(0x80 | (value >> 7));
out.write(0x7F & value);
} else {
out.write(0x80 | (value >> 28));
out.write(0x80 | (value >> 21));
out.write(0x80 | (value >> 14));
out.write(0x80 | (value >> 7));
out.write(0x7F & value);
}
}
// in lucene/facet/src/java/org/apache/lucene/util/encoding/IntEncoder.java
public void close() throws IOException {
if (out != null) {
out.close();
}
}
// in lucene/facet/src/java/org/apache/lucene/util/encoding/VInt8IntDecoder.java
Override
public long decode() throws IOException {
int value = 0;
while (true) {
int first = in.read();
if (first < 0) {
if (!legalEOS) {
throw new IOException("Unexpected End-Of-Stream");
}
return EOS;
}
value |= first & 0x7F;
if ((first & 0x80) == 0) {
legalEOS = true;
return value;
}
legalEOS = false;
value <<= 7;
}
}
// in lucene/facet/src/java/org/apache/lucene/util/encoding/SortingIntEncoder.java
Override
public void close() throws IOException {
if (index == 0) {
return;
}
Arrays.sort(set, 0, index);
for (int i = 0; i < index; i++) {
encoder.encode(set[i]);
}
encoder.close();
index = 0;
super.close();
}
// in lucene/facet/src/java/org/apache/lucene/util/encoding/SortingIntEncoder.java
Override
public void encode(int value) throws IOException {
if (index == set.length) {
int[] newSet = new int[(int) (set.length * grow)];
System.arraycopy(set, 0, newSet, 0, set.length);
set = newSet;
}
set[index++] = value;
}
// in lucene/facet/src/java/org/apache/lucene/util/encoding/UniqueValuesIntEncoder.java
Override
public void encode(int value) throws IOException {
if (prev != value) {
encoder.encode(value);
prev = value;
}
}
// in lucene/facet/src/java/org/apache/lucene/util/encoding/FourFlagsIntDecoder.java
Override
public long decode() throws IOException {
// If we've decoded 8 integers, read the next indicator.
if ((ordinal & 0x3) == 0) {
indicator = in.read();
if (indicator < 0) {
return EOS;
}
ordinal = 0;
}
byte decodeVal = decodeTable[indicator][ordinal++];
if (decodeVal == 0) {
// decode the value from the stream.
long decode = decoder.decode();
return decode == EOS ? EOS : decode + 4;
}
return decodeVal;
}
// in lucene/facet/src/java/org/apache/lucene/util/encoding/ChunksIntEncoder.java
protected void encodeChunk() throws IOException {
out.write(indicator);
for (int i = 0; i < encodeQueueSize; ++i) {
encoder.encode(encodeQueue[i]);
}
encodeQueueSize = 0;
ordinal = 0;
indicator = 0;
}
// in lucene/facet/src/java/org/apache/lucene/util/encoding/ChunksIntEncoder.java
Override
public void close() throws IOException {
if (ordinal != 0) {
encodeChunk();
}
encoder.close();
super.close();
}
// in lucene/facet/src/java/org/apache/lucene/util/encoding/NOnesIntDecoder.java
Override
public long decode() throws IOException {
// If we read '2', we should return n '1's.
if (onesCounter > 0) {
--onesCounter;
return 1;
}
long decode = super.decode();
if (decode == 1) {
return 1;
}
if (decode == 2) {
onesCounter = n - 1;
return 1;
}
if (decode == 3) {
return 2;
}
return decode == EOS ? EOS : decode - 1;
}
// in lucene/facet/src/java/org/apache/lucene/util/encoding/DGapIntDecoder.java
Override
public long decode() throws IOException {
long decode = decoder.decode();
if (decode == EOS) {
return EOS;
}
return prev += decode;
}
// in lucene/facet/src/java/org/apache/lucene/util/encoding/SimpleIntEncoder.java
Override
public void encode(int value) throws IOException {
out.write(value >>> 24);
out.write((value >> 16) & 0xFF);
out.write((value >> 8) & 0xFF);
out.write(value & 0xFF);
}
// in lucene/facet/src/java/org/apache/lucene/util/encoding/SimpleIntDecoder.java
Override
public long decode() throws IOException {
// we need exactly 4 bytes to decode an int in this decoder impl, otherwise, throw an exception
int offset = 0;
while (offset < 4) {
int nRead = in.read(buffer, offset, 4 - offset);
if (nRead == -1) {
if (offset > 0) {
throw new StreamCorruptedException(
"Need 4 bytes for decoding an int, got only " + offset);
}
return EOS;
}
offset += nRead;
}
int v = buffer[3] & 0xff;
v |= (buffer[2] << 8) & 0xff00;
v |= (buffer[1] << 16) & 0xff0000;
v |= (buffer[0] << 24) & 0xff000000;
return v;
}
// in lucene/facet/src/java/org/apache/lucene/util/encoding/FourFlagsIntEncoder.java
Override
public void encode(int data) throws IOException {
if (data <= 3) {
indicator |= encodeTable[data][ordinal];
} else {
encodeQueue[encodeQueueSize++] = data - 4;
}
++ordinal;
// If 4 values were encoded thus far, 'flush' them including the indicator.
if ((ordinal & 0x3) == 0) {
encodeChunk();
}
}
// in lucene/facet/src/java/org/apache/lucene/util/encoding/NOnesIntEncoder.java
Override
public void close() throws IOException {
// We might have ones in our buffer, encode them as neccesary.
while (onesCounter-- > 0) {
super.encode(1);
}
super.close();
}
// in lucene/facet/src/java/org/apache/lucene/util/encoding/NOnesIntEncoder.java
Override
public void encode(int value) throws IOException {
if (value == 1) {
// Increment the number of consecutive ones seen so far
if (++onesCounter == n) {
super.encode(2);
onesCounter = 0;
}
return;
}
// If it's not one - there might have been ones we had to encode prior to
// this value
while (onesCounter > 0) {
--onesCounter;
super.encode(1);
}
// encode value + 1 --> the translation.
super.encode(value + 1);
}
// in lucene/facet/src/java/org/apache/lucene/util/encoding/EightFlagsIntDecoder.java
Override
public long decode() throws IOException {
// If we've decoded 8 integers, read the next indicator.
if ((ordinal & 0x7) == 0) {
indicator = in.read();
if (indicator < 0) {
return EOS;
}
ordinal = 0;
}
if (decodeTable[indicator][ordinal++] == 0) {
// decode the value from the stream.
long decode = decoder.decode();
return decode == EOS ? EOS : decode + 2;
}
return 1;
}
// in lucene/facet/src/java/org/apache/lucene/util/encoding/DGapIntEncoder.java
Override
public void encode(int value) throws IOException {
encoder.encode(value - prev);
prev = value;
}
// in lucene/facet/src/java/org/apache/lucene/util/encoding/IntEncoderFilter.java
Override
public void close() throws IOException {
// There is no need to call super.close(), since we don't pass the output
// stream to super.
encoder.close();
}
// in lucene/facet/src/java/org/apache/lucene/util/encoding/EightFlagsIntEncoder.java
Override
public void encode(int data) throws IOException {
if (data == 1) {
indicator |= encodeTable[ordinal];
} else {
encodeQueue[encodeQueueSize++] = data - 2;
}
++ordinal;
// If 8 values were encoded thus far, 'flush' them including the indicator.
if ((ordinal & 0x7) == 0) {
encodeChunk();
}
}
// in lucene/facet/src/java/org/apache/lucene/util/UnsafeByteArrayInputStream.java
Override
public int available() throws IOException {
return upperLimit - index;
}
// in lucene/facet/src/java/org/apache/lucene/util/UnsafeByteArrayInputStream.java
Override
public int read() throws IOException {
return index < upperLimit ? buffer[index++] & 0xff : -1;
}
// in lucene/facet/src/java/org/apache/lucene/util/UnsafeByteArrayInputStream.java
Override
public void reset() throws IOException {
index = markIndex;
}
// in lucene/facet/src/java/org/apache/lucene/util/UnsafeByteArrayOutputStream.java
Override
public void write(int value) throws IOException {
if (index >= buffer.length) {
grow(buffer.length << 1);
}
buffer[index++] = (byte) value;
}
// in lucene/facet/src/java/org/apache/lucene/util/UnsafeByteArrayOutputStream.java
Override
public void write(byte[] b, int off, int len) throws IOException {
// If there's not enough space for the data
int targetLength = index + len;
if (targetLength >= buffer.length) {
// Calculating the new required length of the array, keeping the array
// size a power of 2 if it was initialized like that.
int newlen = buffer.length;
while ((newlen <<= 1) < targetLength) {}
grow(newlen);
}
// Now that we have enough spare space, we could copy the rest of the
// data
System.arraycopy(b, off, buffer, index, len);
// Updating the index to next available index.
index += len;
}
// in lucene/facet/src/java/org/apache/lucene/util/Vint8.java
public static void encode(int number, OutputStream out) throws IOException {
if ((number & ~0x7F) == 0) {
out.write(number);
} else if ((number & ~0x3FFF) == 0) {
out.write(0x80 | (number >> 7));
out.write(0x7F & number);
} else if ((number & ~0x1FFFFF) == 0) {
out.write(0x80 | (number >> 14));
out.write(0x80 | (number >> 7));
out.write(0x7F & number);
} else if ((number & ~0xFFFFFFF) == 0) {
out.write(0x80 | (number >> 21));
out.write(0x80 | (number >> 14));
out.write(0x80 | (number >> 7));
out.write(0x7F & number);
} else {
out.write(0x80 | (number >> 28));
out.write(0x80 | (number >> 21));
out.write(0x80 | (number >> 14));
out.write(0x80 | (number >> 7));
out.write(0x7F & number);
}
}
// in lucene/facet/src/java/org/apache/lucene/util/Vint8.java
public static int decode(InputStream in) throws IOException {
int value = 0;
while (true) {
int first = in.read();
if (first < 0) {
throw new EOFException();
}
value |= first & 0x7F;
if ((first & 0x80) == 0) {
return value;
}
value <<= 7;
}
}
// in lucene/facet/src/examples/org/apache/lucene/facet/example/merge/TaxonomyMergeUtils.java
public static void merge(Directory srcIndexDir, Directory srcTaxDir,
Directory destIndexDir, Directory destTaxDir) throws IOException {
IndexWriter destIndexWriter = new IndexWriter(destIndexDir,
new IndexWriterConfig(ExampleUtils.EXAMPLE_VER, null));
DirectoryTaxonomyWriter destTaxWriter = new DirectoryTaxonomyWriter(destTaxDir);
merge(srcIndexDir, srcTaxDir, new MemoryOrdinalMap(), destIndexWriter, destTaxWriter);
destTaxWriter.close();
destIndexWriter.close();
}
// in lucene/facet/src/examples/org/apache/lucene/facet/example/merge/TaxonomyMergeUtils.java
public static void merge(Directory srcIndexDir, Directory srcTaxDir,
IndexWriter destIndexWriter,
DirectoryTaxonomyWriter destTaxWriter) throws IOException {
merge(srcIndexDir, srcTaxDir, new MemoryOrdinalMap(), destIndexWriter, destTaxWriter);
}
// in lucene/facet/src/examples/org/apache/lucene/facet/example/merge/TaxonomyMergeUtils.java
public static void merge(Directory srcIndexDir, Directory srcTaxDir,
OrdinalMap map, IndexWriter destIndexWriter,
DirectoryTaxonomyWriter destTaxWriter) throws IOException {
// merge the taxonomies
destTaxWriter.addTaxonomy(srcTaxDir, map);
PayloadProcessorProvider payloadProcessor = new FacetsPayloadProcessorProvider(
srcIndexDir, map.getMap(), new DefaultFacetIndexingParams());
destIndexWriter.setPayloadProcessorProvider(payloadProcessor);
IndexReader reader = DirectoryReader.open(srcIndexDir);
try {
destIndexWriter.addIndexes(reader);
// commit changes to taxonomy and index respectively.
destTaxWriter.commit();
destIndexWriter.commit();
} finally {
reader.close();
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/AbstractSecondPassGroupingCollector.java
Override
public void setScorer(Scorer scorer) throws IOException {
for (SearchGroupDocs<GROUP_VALUE_TYPE> group : groupMap.values()) {
group.collector.setScorer(scorer);
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/AbstractSecondPassGroupingCollector.java
Override
public void collect(int doc) throws IOException {
totalHitCount++;
SearchGroupDocs<GROUP_VALUE_TYPE> group = retrieveGroup(doc);
if (group != null) {
totalGroupedHitCount++;
group.collector.collect(doc);
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/AbstractSecondPassGroupingCollector.java
Override
public void setNextReader(AtomicReaderContext readerContext) throws IOException {
//System.out.println("SP.setNextReader");
for (SearchGroupDocs<GROUP_VALUE_TYPE> group : groupMap.values()) {
group.collector.setNextReader(readerContext);
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/AbstractFirstPassGroupingCollector.java
Override
public void setScorer(Scorer scorer) throws IOException {
for (FieldComparator<?> comparator : comparators) {
comparator.setScorer(scorer);
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/AbstractFirstPassGroupingCollector.java
Override
public void collect(int doc) throws IOException {
//System.out.println("FP.collect doc=" + doc);
// If orderedGroups != null we already have collected N groups and
// can short circuit by comparing this document to the bottom group,
// without having to find what group this document belongs to.
// Even if this document belongs to a group in the top N, we'll know that
// we don't have to update that group.
// Downside: if the number of unique groups is very low, this is
// wasted effort as we will most likely be updating an existing group.
if (orderedGroups != null) {
for (int compIDX = 0;; compIDX++) {
final int c = reversed[compIDX] * comparators[compIDX].compareBottom(doc);
if (c < 0) {
// Definitely not competitive. So don't even bother to continue
return;
} else if (c > 0) {
// Definitely competitive.
break;
} else if (compIDX == compIDXEnd) {
// Here c=0. If we're at the last comparator, this doc is not
// competitive, since docs are visited in doc Id order, which means
// this doc cannot compete with any other document in the queue.
return;
}
}
}
// TODO: should we add option to mean "ignore docs that
// don't have the group field" (instead of stuffing them
// under null group)?
final GROUP_VALUE_TYPE groupValue = getDocGroupValue(doc);
final CollectedSearchGroup<GROUP_VALUE_TYPE> group = groupMap.get(groupValue);
if (group == null) {
// First time we are seeing this group, or, we've seen
// it before but it fell out of the top N and is now
// coming back
if (groupMap.size() < topNGroups) {
// Still in startup transient: we have not
// seen enough unique groups to start pruning them;
// just keep collecting them
// Add a new CollectedSearchGroup:
CollectedSearchGroup<GROUP_VALUE_TYPE> sg = new CollectedSearchGroup<GROUP_VALUE_TYPE>();
sg.groupValue = copyDocGroupValue(groupValue, null);
sg.comparatorSlot = groupMap.size();
sg.topDoc = docBase + doc;
for (FieldComparator<?> fc : comparators) {
fc.copy(sg.comparatorSlot, doc);
}
groupMap.put(sg.groupValue, sg);
if (groupMap.size() == topNGroups) {
// End of startup transient: we now have max
// number of groups; from here on we will drop
// bottom group when we insert new one:
buildSortedSet();
}
return;
}
// We already tested that the document is competitive, so replace
// the bottom group with this new group.
final CollectedSearchGroup<GROUP_VALUE_TYPE> bottomGroup = orderedGroups.pollLast();
assert orderedGroups.size() == topNGroups -1;
groupMap.remove(bottomGroup.groupValue);
// reuse the removed CollectedSearchGroup
bottomGroup.groupValue = copyDocGroupValue(groupValue, bottomGroup.groupValue);
bottomGroup.topDoc = docBase + doc;
for (FieldComparator<?> fc : comparators) {
fc.copy(bottomGroup.comparatorSlot, doc);
}
groupMap.put(bottomGroup.groupValue, bottomGroup);
orderedGroups.add(bottomGroup);
assert orderedGroups.size() == topNGroups;
final int lastComparatorSlot = orderedGroups.last().comparatorSlot;
for (FieldComparator<?> fc : comparators) {
fc.setBottom(lastComparatorSlot);
}
return;
}
// Update existing group:
for (int compIDX = 0;; compIDX++) {
final FieldComparator<?> fc = comparators[compIDX];
fc.copy(spareSlot, doc);
final int c = reversed[compIDX] * fc.compare(group.comparatorSlot, spareSlot);
if (c < 0) {
// Definitely not competitive.
return;
} else if (c > 0) {
// Definitely competitive; set remaining comparators:
for (int compIDX2=compIDX+1; compIDX2<comparators.length; compIDX2++) {
comparators[compIDX2].copy(spareSlot, doc);
}
break;
} else if (compIDX == compIDXEnd) {
// Here c=0. If we're at the last comparator, this doc is not
// competitive, since docs are visited in doc Id order, which means
// this doc cannot compete with any other document in the queue.
return;
}
}
// Remove before updating the group since lookup is done via comparators
// TODO: optimize this
final CollectedSearchGroup<GROUP_VALUE_TYPE> prevLast;
if (orderedGroups != null) {
prevLast = orderedGroups.last();
orderedGroups.remove(group);
assert orderedGroups.size() == topNGroups-1;
} else {
prevLast = null;
}
group.topDoc = docBase + doc;
// Swap slots
final int tmp = spareSlot;
spareSlot = group.comparatorSlot;
group.comparatorSlot = tmp;
// Re-add the changed group
if (orderedGroups != null) {
orderedGroups.add(group);
assert orderedGroups.size() == topNGroups;
final CollectedSearchGroup<?> newLast = orderedGroups.last();
// If we changed the value of the last group, or changed which group was last, then update bottom:
if (group == newLast || prevLast != newLast) {
for (FieldComparator<?> fc : comparators) {
fc.setBottom(newLast.comparatorSlot);
}
}
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/AbstractFirstPassGroupingCollector.java
Override
public void setNextReader(AtomicReaderContext readerContext) throws IOException {
docBase = readerContext.docBase;
for (int i=0; i<comparators.length; i++) {
comparators[i] = comparators[i].setNextReader(readerContext);
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/SearchGroup.java
public static <T> Collection<SearchGroup<T>> merge(List<Collection<SearchGroup<T>>> topGroups, int offset, int topN, Sort groupSort)
throws IOException {
if (topGroups.size() == 0) {
return null;
} else {
return new GroupMerger<T>(groupSort).merge(topGroups, offset, topN);
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVSecondPassGroupingCollector.java
Override
public void setNextReader(AtomicReaderContext readerContext) throws IOException {
super.setNextReader(readerContext);
final DocValues dv = readerContext.reader().docValues(groupField);
final DocValues.Source dvSource;
if (dv != null) {
dvSource = diskResident ? dv.getDirectSource() : dv.getSource();
} else {
dvSource = getDefaultSource(readerContext);
}
setDocValuesSources(dvSource, readerContext);
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVSecondPassGroupingCollector.java
protected SearchGroupDocs<Long> retrieveGroup(int doc) throws IOException {
return groupMap.get(source.getInt(doc));
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVSecondPassGroupingCollector.java
protected SearchGroupDocs<Double> retrieveGroup(int doc) throws IOException {
return groupMap.get(source.getFloat(doc));
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVSecondPassGroupingCollector.java
protected SearchGroupDocs<BytesRef> retrieveGroup(int doc) throws IOException {
return groupMap.get(source.getBytes(doc, spare));
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVSecondPassGroupingCollector.java
protected SearchGroupDocs<BytesRef> retrieveGroup(int doc) throws IOException {
int slot = ordSet.find(source.ord(doc));
if (slot >= 0) {
return groupDocs[slot];
}
return null;
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVAllGroupsCollector.java
Override
public void setNextReader(AtomicReaderContext readerContext) throws IOException {
final DocValues dv = readerContext.reader().docValues(groupField);
final DocValues.Source dvSource;
if (dv != null) {
dvSource = diskResident ? dv.getDirectSource() : dv.getSource();
} else {
dvSource = getDefaultSource(readerContext);
}
setDocValuesSources(dvSource, readerContext);
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVAllGroupsCollector.java
public void collect(int doc) throws IOException {
long value = source.getInt(doc);
if (!groups.contains(value)) {
groups.add(value);
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVAllGroupsCollector.java
public void collect(int doc) throws IOException {
double value = source.getFloat(doc);
if (!groups.contains(value)) {
groups.add(value);
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVAllGroupsCollector.java
public void collect(int doc) throws IOException {
BytesRef value = source.getBytes(doc, spare);
if (!groups.contains(value)) {
groups.add(BytesRef.deepCopyOf(value));
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVAllGroupsCollector.java
public void collect(int doc) throws IOException {
int ord = source.ord(doc);
if (!ordSet.exists(ord)) {
ordSet.put(ord);
BytesRef value = source.getBytes(doc, new BytesRef());
groups.add(value);
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVFirstPassGroupingCollector.java
Override
public void setNextReader(AtomicReaderContext readerContext) throws IOException {
super.setNextReader(readerContext);
final DocValues dv = readerContext.reader().docValues(groupField);
final DocValues.Source dvSource;
if (dv != null) {
dvSource = diskResident ? dv.getDirectSource() : dv.getSource();
} else {
dvSource = getDefaultSource(readerContext);
}
setDocValuesSources(dvSource);
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVAllGroupHeadsCollector.java
public int compare(int compIDX, int doc) throws IOException {
return comparators[compIDX].compareBottom(doc);
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVAllGroupHeadsCollector.java
public void updateDocHead(int doc) throws IOException {
for (FieldComparator<?> comparator : comparators) {
comparator.copy(0, doc);
comparator.setBottom(0);
}
this.doc = doc + readerContext.docBase;
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVAllGroupHeadsCollector.java
Override
public void setNextReader(AtomicReaderContext readerContext) throws IOException {
this.readerContext = readerContext;
final DocValues dv = readerContext.reader().docValues(groupField);
final DocValues.Source dvSource;
if (dv != null) {
dvSource = diskResident ? dv.getDirectSource() : dv.getSource();
} else {
dvSource = getDefaultSource(readerContext);
}
setDocValuesSources(dvSource);
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVAllGroupHeadsCollector.java
protected void retrieveGroupHeadAndAddIfNotExist(int doc) throws IOException {
final Comparable<?> groupValue = getGroupValue(doc);
GroupHead groupHead = groups.get(groupValue);
if (groupHead == null) {
groupHead = new GroupHead(groupValue, sortWithinGroup, doc, readerContext, scorer);
groups.put(groupValue == null ? null : duplicate(groupValue), groupHead);
temporalResult.stop = true;
} else {
temporalResult.stop = false;
}
temporalResult.groupHead = groupHead;
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVAllGroupHeadsCollector.java
public void setNextReader(AtomicReaderContext context) throws IOException {
super.setNextReader(context);
for (GroupHead groupHead : groups.values()) {
for (int i = 0; i < groupHead.comparators.length; i++) {
groupHead.comparators[i] = groupHead.comparators[i].setNextReader(context);
groupHead.readerContext = context;
}
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVAllGroupHeadsCollector.java
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
for (GroupHead groupHead : groups.values()) {
groupHead.scorer = scorer;
for (FieldComparator<?> comparator : groupHead.comparators) {
comparator.setScorer(scorer);
}
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVDistinctValuesCollector.java
public void setNextReader(AtomicReaderContext context) throws IOException {
groupFieldSource = retrieveSource(groupField, context);
countFieldSource = retrieveSource(countField, context);
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVDistinctValuesCollector.java
private DocValues.Source retrieveSource(String fieldName, AtomicReaderContext context) throws IOException {
DocValues groupFieldDv = context.reader().docValues(fieldName);
if (groupFieldDv != null) {
return diskResident ? groupFieldDv.getDirectSource() : groupFieldDv.getSource();
} else {
return DocValues.getDefaultSource(valueType);
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVDistinctValuesCollector.java
public void collect(int doc) throws IOException {
GroupCount groupCount = groupMap.get(groupFieldSource.getFloat(doc));
if (groupCount != null) {
groupCount.uniqueValues.add(countFieldSource.getFloat(doc));
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVDistinctValuesCollector.java
public void collect(int doc) throws IOException {
GroupCount groupCount = groupMap.get(groupFieldSource.getInt(doc));
if (groupCount != null) {
groupCount.uniqueValues.add(countFieldSource.getInt(doc));
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVDistinctValuesCollector.java
public void collect(int doc) throws IOException {
GroupCount groupCount = groupMap.get(groupFieldSource.getBytes(doc, spare));
if (groupCount != null) {
BytesRef countValue = countFieldSource.getBytes(doc, spare);
if (!groupCount.uniqueValues.contains(countValue)) {
groupCount.uniqueValues.add(BytesRef.deepCopyOf(countValue));
}
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVDistinctValuesCollector.java
public void setNextReader(AtomicReaderContext context) throws IOException {
groupFieldSource = retrieveSortedSource(groupField, context);
countFieldSource = retrieveSortedSource(countField, context);
ordSet.clear();
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVDistinctValuesCollector.java
private DocValues.SortedSource retrieveSortedSource(String field, AtomicReaderContext context) throws IOException {
DocValues countFieldDv = context.reader().docValues(field);
if (countFieldDv != null) {
return diskResident ? countFieldDv.getDirectSource().asSortedSource() : countFieldDv.getSource().asSortedSource();
} else {
return DocValues.getDefaultSortedSource(valueType, context.reader().maxDoc());
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVDistinctValuesCollector.java
public void collect(int doc) throws IOException {
int slot = ordSet.find(groupFieldSource.ord(doc));
if (slot < 0) {
return;
}
GroupCount gc = groupCounts[slot];
int countOrd = countFieldSource.ord(doc);
if (doesNotContainsOrd(countOrd, gc.ords)) {
gc.uniqueValues.add(countFieldSource.getByOrd(countOrd, new BytesRef()));
gc.ords = Arrays.copyOf(gc.ords, gc.ords.length + 1);
gc.ords[gc.ords.length - 1] = countOrd;
if (gc.ords.length > 1) {
Arrays.sort(gc.ords);
}
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVDistinctValuesCollector.java
Override
public void setNextReader(AtomicReaderContext context) throws IOException {
super.setNextReader(context);
for (GroupCount group : groups) {
int groupOrd = groupFieldSource.getOrdByValue((BytesRef) group.groupValue, spare);
if (groupOrd < 0) {
continue;
}
groupCounts[ordSet.put(groupOrd)] = group;
group.ords = new int[group.uniqueValues.size()];
Arrays.fill(group.ords, -1);
int i = 0;
for (Comparable<?> value : group.uniqueValues) {
int countOrd = countFieldSource.getOrdByValue((BytesRef) value, spare);
if (countOrd >= 0) {
group.ords[i++] = countOrd;
}
}
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVGroupFacetCollector.java
public void collect(int doc) throws IOException {
int facetOrd = facetFieldSource.ord(doc);
if (facetOrd < startFacetOrd || facetOrd >= endFacetOrd) {
return;
}
int groupOrd = groupFieldSource.ord(doc);
int segmentGroupedFacetsIndex = (groupOrd * facetFieldSource.getValueCount()) + facetOrd;
if (segmentGroupedFacetHits.exists(segmentGroupedFacetsIndex)) {
return;
}
segmentTotalCount++;
segmentFacetCounts[facetOrd]++;
segmentGroupedFacetHits.put(segmentGroupedFacetsIndex);
groupedFacetHits.add(
new GroupedFacetHit(
groupFieldSource.getByOrd(groupOrd, new BytesRef()),
facetFieldSource.getByOrd(facetOrd, new BytesRef())
)
);
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVGroupFacetCollector.java
public void setNextReader(AtomicReaderContext context) throws IOException {
if (segmentFacetCounts != null) {
segmentResults.add(createSegmentResult());
}
groupFieldSource = getDocValuesSortedSource(groupField, groupDvType, groupDiskResident, context.reader());
facetFieldSource = getDocValuesSortedSource(facetField, facetFieldDvType, facetDiskResident, context.reader());
segmentFacetCounts = new int[facetFieldSource.getValueCount()];
segmentTotalCount = 0;
segmentGroupedFacetHits.clear();
for (GroupedFacetHit groupedFacetHit : groupedFacetHits) {
int facetOrd = facetFieldSource.getOrdByValue(groupedFacetHit.facetValue, facetSpare);
if (facetOrd < 0) {
continue;
}
int groupOrd = groupFieldSource.getOrdByValue(groupedFacetHit.groupValue, groupSpare);
if (groupOrd < 0) {
continue;
}
int segmentGroupedFacetsIndex = (groupOrd * facetFieldSource.getValueCount()) + facetOrd;
segmentGroupedFacetHits.put(segmentGroupedFacetsIndex);
}
if (facetPrefix != null) {
startFacetOrd = facetFieldSource.getOrdByValue(facetPrefix, facetSpare);
if (startFacetOrd < 0) {
// Points to the ord one higher than facetPrefix
startFacetOrd = -startFacetOrd - 1;
}
BytesRef facetEndPrefix = BytesRef.deepCopyOf(facetPrefix);
facetEndPrefix.append(UnicodeUtil.BIG_TERM);
endFacetOrd = facetFieldSource.getOrdByValue(facetEndPrefix, facetSpare);
endFacetOrd = -endFacetOrd - 1; // Points to the ord one higher than facetEndPrefix
} else {
startFacetOrd = 0;
endFacetOrd = facetFieldSource.getValueCount();
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVGroupFacetCollector.java
protected SegmentResult createSegmentResult() throws IOException {
if (startFacetOrd == 0 && facetFieldSource.getByOrd(startFacetOrd, facetSpare).length == 0) {
int missing = segmentFacetCounts[0];
int total = segmentTotalCount - segmentFacetCounts[0];
return new SegmentResult(segmentFacetCounts, total, missing, facetFieldSource, endFacetOrd);
} else {
return new SegmentResult(segmentFacetCounts, segmentTotalCount, facetFieldSource, startFacetOrd, endFacetOrd);
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVGroupFacetCollector.java
private DocValues.SortedSource getDocValuesSortedSource(String field, Type dvType, boolean diskResident, AtomicReader reader) throws IOException {
DocValues dv = reader.docValues(field);
DocValues.Source dvSource;
if (dv != null) {
dvSource = diskResident ? dv.getDirectSource() : dv.getSource();
} else {
dvSource = DocValues.getDefaultSortedSource(dvType, reader.maxDoc());
}
return dvSource.asSortedSource();
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/dv/DVGroupFacetCollector.java
protected void nextTerm() throws IOException {
mergeTerm = facetFieldSource.getByOrd(mergePos, spare);
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/AbstractDistinctValuesCollector.java
public void setScorer(Scorer scorer) throws IOException {
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermSecondPassGroupingCollector.java
Override
public void setNextReader(AtomicReaderContext readerContext) throws IOException {
super.setNextReader(readerContext);
index = FieldCache.DEFAULT.getTermsIndex(readerContext.reader(), groupField);
// Rebuild ordSet
ordSet.clear();
for (SearchGroupDocs<BytesRef> group : groupMap.values()) {
// System.out.println(" group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString()));
int ord = group.groupValue == null ? 0 : index.binarySearchLookup(group.groupValue, spareBytesRef);
if (ord >= 0) {
groupDocs[ordSet.put(ord)] = group;
}
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermSecondPassGroupingCollector.java
Override
protected SearchGroupDocs<BytesRef> retrieveGroup(int doc) throws IOException {
int slot = ordSet.find(index.getOrd(doc));
if (slot >= 0) {
return groupDocs[slot];
}
return null;
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermAllGroupsCollector.java
public void collect(int doc) throws IOException {
int key = index.getOrd(doc);
if (!ordSet.exists(key)) {
ordSet.put(key);
BytesRef term = key == 0 ? null : index.lookup(key, new BytesRef());
groups.add(term);
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermAllGroupsCollector.java
public void setNextReader(AtomicReaderContext context) throws IOException {
index = FieldCache.DEFAULT.getTermsIndex(context.reader(), groupField);
// Clear ordSet and fill it with previous encountered groups that can occur in the current segment.
ordSet.clear();
for (BytesRef countedGroup : groups) {
int ord = index.binarySearchLookup(countedGroup, spareBytesRef);
if (ord >= 0) {
ordSet.put(ord);
}
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermGroupFacetCollector.java
public void collect(int doc) throws IOException {
int facetOrd = facetFieldTermsIndex.getOrd(doc);
if (facetOrd < startFacetOrd || facetOrd >= endFacetOrd) {
return;
}
int groupOrd = groupFieldTermsIndex.getOrd(doc);
int segmentGroupedFacetsIndex = (groupOrd * facetFieldTermsIndex.numOrd()) + facetOrd;
if (segmentGroupedFacetHits.exists(segmentGroupedFacetsIndex)) {
return;
}
segmentTotalCount++;
segmentFacetCounts[facetOrd]++;
segmentGroupedFacetHits.put(segmentGroupedFacetsIndex);
groupedFacetHits.add(
new GroupedFacetHit(
groupOrd == 0 ? null : groupFieldTermsIndex.lookup(groupOrd, new BytesRef()),
facetOrd == 0 ? null : facetFieldTermsIndex.lookup(facetOrd, new BytesRef())
)
);
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermGroupFacetCollector.java
public void setNextReader(AtomicReaderContext context) throws IOException {
if (segmentFacetCounts != null) {
segmentResults.add(createSegmentResult());
}
groupFieldTermsIndex = FieldCache.DEFAULT.getTermsIndex(context.reader(), groupField);
facetFieldTermsIndex = FieldCache.DEFAULT.getTermsIndex(context.reader(), facetField);
segmentFacetCounts = new int[facetFieldTermsIndex.numOrd()];
segmentTotalCount = 0;
segmentGroupedFacetHits.clear();
for (GroupedFacetHit groupedFacetHit : groupedFacetHits) {
int facetOrd = facetFieldTermsIndex.binarySearchLookup(groupedFacetHit.facetValue, spare);
if (facetOrd < 0) {
continue;
}
int groupOrd = groupFieldTermsIndex.binarySearchLookup(groupedFacetHit.groupValue, spare);
if (groupOrd < 0) {
continue;
}
int segmentGroupedFacetsIndex = (groupOrd * facetFieldTermsIndex.numOrd()) + facetOrd;
segmentGroupedFacetHits.put(segmentGroupedFacetsIndex);
}
if (facetPrefix != null) {
startFacetOrd = facetFieldTermsIndex.binarySearchLookup(facetPrefix, spare);
if (startFacetOrd < 0) {
// Points to the ord one higher than facetPrefix
startFacetOrd = -startFacetOrd - 1;
}
BytesRef facetEndPrefix = BytesRef.deepCopyOf(facetPrefix);
facetEndPrefix.append(UnicodeUtil.BIG_TERM);
endFacetOrd = facetFieldTermsIndex.binarySearchLookup(facetEndPrefix, spare);
endFacetOrd = -endFacetOrd - 1; // Points to the ord one higher than facetEndPrefix
} else {
startFacetOrd = 0;
endFacetOrd = facetFieldTermsIndex.numOrd();
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermGroupFacetCollector.java
protected SegmentResult createSegmentResult() throws IOException {
return new SegmentResult(segmentFacetCounts, segmentTotalCount, facetFieldTermsIndex.getTermsEnum(), startFacetOrd, endFacetOrd);
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermGroupFacetCollector.java
protected void nextTerm() throws IOException {
mergeTerm = tenum.next();
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermGroupFacetCollector.java
public void collect(int doc) throws IOException {
int groupOrd = groupFieldTermsIndex.getOrd(doc);
if (facetOrdTermsEnum != null) {
reuse = facetFieldDocTermOrds.lookup(doc, reuse);
}
int chunk;
boolean first = true;
int[] buffer = new int[5];
do {
chunk = reuse != null ? reuse.read(buffer) : 0;
if (first && chunk == 0) {
chunk = 1;
buffer[0] = facetFieldDocTermOrds.numTerms(); // this facet ord is reserved for docs not containing facet field.
}
first = false;
for (int pos = 0; pos < chunk; pos++) {
int facetOrd = buffer[pos];
if (facetOrd < startFacetOrd || facetOrd >= endFacetOrd) {
continue;
}
int segmentGroupedFacetsIndex = (groupOrd * (facetFieldDocTermOrds.numTerms() + 1)) + facetOrd;
if (segmentGroupedFacetHits.exists(segmentGroupedFacetsIndex)) {
continue;
}
segmentTotalCount++;
segmentFacetCounts[facetOrd]++;
segmentGroupedFacetHits.put(segmentGroupedFacetsIndex);
groupedFacetHits.add(
new GroupedFacetHit(
groupOrd == 0 ? null : groupFieldTermsIndex.lookup(groupOrd, new BytesRef()),
facetOrd == facetFieldDocTermOrds.numTerms() ? null : BytesRef.deepCopyOf(facetFieldDocTermOrds.lookupTerm(facetOrdTermsEnum, facetOrd))
)
);
}
} while (chunk >= buffer.length);
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermGroupFacetCollector.java
public void setNextReader(AtomicReaderContext context) throws IOException {
if (segmentFacetCounts != null) {
segmentResults.add(createSegmentResult());
}
reuse = null;
groupFieldTermsIndex = FieldCache.DEFAULT.getTermsIndex(context.reader(), groupField);
facetFieldDocTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), facetField);
facetOrdTermsEnum = facetFieldDocTermOrds.getOrdTermsEnum(context.reader());
// [facetFieldDocTermOrds.numTerms() + 1] for all possible facet values and docs not containing facet field
segmentFacetCounts = new int[facetFieldDocTermOrds.numTerms() + 1];
segmentTotalCount = 0;
segmentGroupedFacetHits.clear();
for (GroupedFacetHit groupedFacetHit : groupedFacetHits) {
int groupOrd = groupFieldTermsIndex.binarySearchLookup(groupedFacetHit.groupValue, spare);
if (groupOrd < 0) {
continue;
}
int facetOrd;
if (groupedFacetHit.facetValue != null) {
if (facetOrdTermsEnum == null || !facetOrdTermsEnum.seekExact(groupedFacetHit.facetValue, true)) {
continue;
}
facetOrd = (int) facetOrdTermsEnum.ord();
} else {
facetOrd = facetFieldDocTermOrds.numTerms();
}
// (facetFieldDocTermOrds.numTerms() + 1) for all possible facet values and docs not containing facet field
int segmentGroupedFacetsIndex = (groupOrd * (facetFieldDocTermOrds.numTerms() + 1)) + facetOrd;
segmentGroupedFacetHits.put(segmentGroupedFacetsIndex);
}
if (facetPrefix != null) {
TermsEnum.SeekStatus seekStatus;
if (facetOrdTermsEnum != null) {
seekStatus = facetOrdTermsEnum.seekCeil(facetPrefix, true);
} else {
seekStatus = TermsEnum.SeekStatus.END;
}
if (seekStatus != TermsEnum.SeekStatus.END) {
startFacetOrd = (int) facetOrdTermsEnum.ord();
} else {
startFacetOrd = 0;
endFacetOrd = 0;
return;
}
BytesRef facetEndPrefix = BytesRef.deepCopyOf(facetPrefix);
facetEndPrefix.append(UnicodeUtil.BIG_TERM);
seekStatus = facetOrdTermsEnum.seekCeil(facetEndPrefix, true);
if (seekStatus != TermsEnum.SeekStatus.END) {
endFacetOrd = (int) facetOrdTermsEnum.ord();
} else {
endFacetOrd = facetFieldDocTermOrds.numTerms(); // Don't include null...
}
} else {
startFacetOrd = 0;
endFacetOrd = facetFieldDocTermOrds.numTerms() + 1;
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermGroupFacetCollector.java
protected SegmentResult createSegmentResult() throws IOException {
return new SegmentResult(segmentFacetCounts, segmentTotalCount, facetFieldDocTermOrds.numTerms(), facetOrdTermsEnum, startFacetOrd, endFacetOrd);
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermGroupFacetCollector.java
protected void nextTerm() throws IOException {
mergeTerm = tenum.next();
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermDistinctValuesCollector.java
public void collect(int doc) throws IOException {
int slot = ordSet.find(groupFieldTermIndex.getOrd(doc));
if (slot < 0) {
return;
}
GroupCount gc = groupCounts[slot];
int countOrd = countFieldTermIndex.getOrd(doc);
if (doesNotContainsOrd(countOrd, gc.ords)) {
if (countOrd == 0) {
gc.uniqueValues.add(null);
} else {
gc.uniqueValues.add(countFieldTermIndex.lookup(countOrd, new BytesRef()));
}
gc.ords = Arrays.copyOf(gc.ords, gc.ords.length + 1);
gc.ords[gc.ords.length - 1] = countOrd;
if (gc.ords.length > 1) {
Arrays.sort(gc.ords);
}
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermDistinctValuesCollector.java
public void setNextReader(AtomicReaderContext context) throws IOException {
groupFieldTermIndex = FieldCache.DEFAULT.getTermsIndex(context.reader(), groupField);
countFieldTermIndex = FieldCache.DEFAULT.getTermsIndex(context.reader(), countField);
ordSet.clear();
for (GroupCount group : groups) {
int groupOrd = group.groupValue == null ? 0 : groupFieldTermIndex.binarySearchLookup(group.groupValue, spare);
if (groupOrd < 0) {
continue;
}
groupCounts[ordSet.put(groupOrd)] = group;
group.ords = new int[group.uniqueValues.size()];
Arrays.fill(group.ords, -1);
int i = 0;
for (BytesRef value : group.uniqueValues) {
int countOrd = value == null ? 0 : countFieldTermIndex.binarySearchLookup(value, new BytesRef());
if (countOrd >= 0) {
group.ords[i++] = countOrd;
}
}
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermFirstPassGroupingCollector.java
Override
public void setNextReader(AtomicReaderContext readerContext) throws IOException {
super.setNextReader(readerContext);
index = FieldCache.DEFAULT.getTermsIndex(readerContext.reader(), groupField);
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermAllGroupHeadsCollector.java
public static AbstractAllGroupHeadsCollector<?> create(String groupField, Sort sortWithinGroup) throws IOException {
return create(groupField, sortWithinGroup, DEFAULT_INITIAL_SIZE);
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermAllGroupHeadsCollector.java
public static AbstractAllGroupHeadsCollector<?> create(String groupField, Sort sortWithinGroup, int initialSize) throws IOException {
boolean sortAllScore = true;
boolean sortAllFieldValue = true;
for (SortField sortField : sortWithinGroup.getSort()) {
if (sortField.getType() == SortField.Type.SCORE) {
sortAllFieldValue = false;
} else if (needGeneralImpl(sortField)) {
return new GeneralAllGroupHeadsCollector(groupField, sortWithinGroup);
} else {
sortAllScore = false;
}
}
if (sortAllScore) {
return new ScoreAllGroupHeadsCollector(groupField, sortWithinGroup, initialSize);
} else if (sortAllFieldValue) {
return new OrdAllGroupHeadsCollector(groupField, sortWithinGroup, initialSize);
} else {
return new OrdScoreAllGroupHeadsCollector(groupField, sortWithinGroup, initialSize);
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermAllGroupHeadsCollector.java
protected void retrieveGroupHeadAndAddIfNotExist(int doc) throws IOException {
final int ord = groupIndex.getOrd(doc);
final BytesRef groupValue = ord == 0 ? null : groupIndex.lookup(ord, scratchBytesRef);
GroupHead groupHead = groups.get(groupValue);
if (groupHead == null) {
groupHead = new GroupHead(groupValue, sortWithinGroup, doc);
groups.put(groupValue == null ? null : BytesRef.deepCopyOf(groupValue), groupHead);
temporalResult.stop = true;
} else {
temporalResult.stop = false;
}
temporalResult.groupHead = groupHead;
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermAllGroupHeadsCollector.java
public void setNextReader(AtomicReaderContext context) throws IOException {
this.readerContext = context;
groupIndex = FieldCache.DEFAULT.getTermsIndex(context.reader(), groupField);
for (GroupHead groupHead : groups.values()) {
for (int i = 0; i < groupHead.comparators.length; i++) {
groupHead.comparators[i] = groupHead.comparators[i].setNextReader(context);
}
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermAllGroupHeadsCollector.java
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
for (GroupHead groupHead : groups.values()) {
for (FieldComparator<?> comparator : groupHead.comparators) {
comparator.setScorer(scorer);
}
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermAllGroupHeadsCollector.java
public int compare(int compIDX, int doc) throws IOException {
return comparators[compIDX].compareBottom(doc);
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermAllGroupHeadsCollector.java
public void updateDocHead(int doc) throws IOException {
for (FieldComparator<?> comparator : comparators) {
comparator.copy(0, doc);
comparator.setBottom(0);
}
this.doc = doc + readerContext.docBase;
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermAllGroupHeadsCollector.java
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermAllGroupHeadsCollector.java
protected void retrieveGroupHeadAndAddIfNotExist(int doc) throws IOException {
int key = groupIndex.getOrd(doc);
GroupHead groupHead;
if (!ordSet.exists(key)) {
ordSet.put(key);
BytesRef term = key == 0 ? null : groupIndex.getTerm(doc, new BytesRef());
groupHead = new GroupHead(doc, term);
collectedGroups.add(groupHead);
segmentGroupHeads[key] = groupHead;
temporalResult.stop = true;
} else {
temporalResult.stop = false;
groupHead = segmentGroupHeads[key];
}
temporalResult.groupHead = groupHead;
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermAllGroupHeadsCollector.java
public void setNextReader(AtomicReaderContext context) throws IOException {
this.readerContext = context;
groupIndex = FieldCache.DEFAULT.getTermsIndex(context.reader(), groupField);
for (int i = 0; i < fields.length; i++) {
if (fields[i].getType() == SortField.Type.SCORE) {
continue;
}
sortsIndex[i] = FieldCache.DEFAULT.getTermsIndex(context.reader(), fields[i].getField());
}
// Clear ordSet and fill it with previous encountered groups that can occur in the current segment.
ordSet.clear();
segmentGroupHeads = new GroupHead[groupIndex.numOrd()];
for (GroupHead collectedGroup : collectedGroups) {
int ord = groupIndex.binarySearchLookup(collectedGroup.groupValue, scratchBytesRef);
if (ord >= 0) {
ordSet.put(ord);
segmentGroupHeads[ord] = collectedGroup;
for (int i = 0; i < sortsIndex.length; i++) {
if (fields[i].getType() == SortField.Type.SCORE) {
continue;
}
collectedGroup.sortOrds[i] = sortsIndex[i].binarySearchLookup(collectedGroup.sortValues[i], scratchBytesRef);
}
}
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermAllGroupHeadsCollector.java
public int compare(int compIDX, int doc) throws IOException {
if (fields[compIDX].getType() == SortField.Type.SCORE) {
float score = scorer.score();
if (scores[compIDX] < score) {
return 1;
} else if (scores[compIDX] > score) {
return -1;
}
return 0;
} else {
if (sortOrds[compIDX] < 0) {
// The current segment doesn't contain the sort value we encountered before. Therefore the ord is negative.
return sortValues[compIDX].compareTo(sortsIndex[compIDX].getTerm(doc, scratchBytesRef));
} else {
return sortOrds[compIDX] - sortsIndex[compIDX].getOrd(doc);
}
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermAllGroupHeadsCollector.java
public void updateDocHead(int doc) throws IOException {
for (int i = 0; i < sortsIndex.length; i++) {
if (fields[i].getType() == SortField.Type.SCORE) {
scores[i] = scorer.score();
} else {
sortValues[i] = sortsIndex[i].getTerm(doc, sortValues[i]);
sortOrds[i] = sortsIndex[i].getOrd(doc);
}
}
this.doc = doc + readerContext.docBase;
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermAllGroupHeadsCollector.java
public void setScorer(Scorer scorer) throws IOException {
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermAllGroupHeadsCollector.java
protected void retrieveGroupHeadAndAddIfNotExist(int doc) throws IOException {
int key = groupIndex.getOrd(doc);
GroupHead groupHead;
if (!ordSet.exists(key)) {
ordSet.put(key);
BytesRef term = key == 0 ? null : groupIndex.getTerm(doc, new BytesRef());
groupHead = new GroupHead(doc, term);
collectedGroups.add(groupHead);
segmentGroupHeads[key] = groupHead;
temporalResult.stop = true;
} else {
temporalResult.stop = false;
groupHead = segmentGroupHeads[key];
}
temporalResult.groupHead = groupHead;
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermAllGroupHeadsCollector.java
public void setNextReader(AtomicReaderContext context) throws IOException {
this.readerContext = context;
groupIndex = FieldCache.DEFAULT.getTermsIndex(context.reader(), groupField);
for (int i = 0; i < fields.length; i++) {
sortsIndex[i] = FieldCache.DEFAULT.getTermsIndex(context.reader(), fields[i].getField());
}
// Clear ordSet and fill it with previous encountered groups that can occur in the current segment.
ordSet.clear();
segmentGroupHeads = new GroupHead[groupIndex.numOrd()];
for (GroupHead collectedGroup : collectedGroups) {
int groupOrd = groupIndex.binarySearchLookup(collectedGroup.groupValue, scratchBytesRef);
if (groupOrd >= 0) {
ordSet.put(groupOrd);
segmentGroupHeads[groupOrd] = collectedGroup;
for (int i = 0; i < sortsIndex.length; i++) {
collectedGroup.sortOrds[i] = sortsIndex[i].binarySearchLookup(collectedGroup.sortValues[i], scratchBytesRef);
}
}
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermAllGroupHeadsCollector.java
public int compare(int compIDX, int doc) throws IOException {
if (sortOrds[compIDX] < 0) {
// The current segment doesn't contain the sort value we encountered before. Therefore the ord is negative.
return sortValues[compIDX].compareTo(sortsIndex[compIDX].getTerm(doc, scratchBytesRef));
} else {
return sortOrds[compIDX] - sortsIndex[compIDX].getOrd(doc);
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermAllGroupHeadsCollector.java
public void updateDocHead(int doc) throws IOException {
for (int i = 0; i < sortsIndex.length; i++) {
sortValues[i] = sortsIndex[i].getTerm(doc, sortValues[i]);
sortOrds[i] = sortsIndex[i].getOrd(doc);
}
this.doc = doc + readerContext.docBase;
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermAllGroupHeadsCollector.java
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermAllGroupHeadsCollector.java
protected void retrieveGroupHeadAndAddIfNotExist(int doc) throws IOException {
int key = groupIndex.getOrd(doc);
GroupHead groupHead;
if (!ordSet.exists(key)) {
ordSet.put(key);
BytesRef term = key == 0 ? null : groupIndex.getTerm(doc, new BytesRef());
groupHead = new GroupHead(doc, term);
collectedGroups.add(groupHead);
segmentGroupHeads[key] = groupHead;
temporalResult.stop = true;
} else {
temporalResult.stop = false;
groupHead = segmentGroupHeads[key];
}
temporalResult.groupHead = groupHead;
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermAllGroupHeadsCollector.java
public void setNextReader(AtomicReaderContext context) throws IOException {
this.readerContext = context;
groupIndex = FieldCache.DEFAULT.getTermsIndex(context.reader(), groupField);
// Clear ordSet and fill it with previous encountered groups that can occur in the current segment.
ordSet.clear();
segmentGroupHeads = new GroupHead[groupIndex.numOrd()];
for (GroupHead collectedGroup : collectedGroups) {
int ord = groupIndex.binarySearchLookup(collectedGroup.groupValue, scratchBytesRef);
if (ord >= 0) {
ordSet.put(ord);
segmentGroupHeads[ord] = collectedGroup;
}
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermAllGroupHeadsCollector.java
public int compare(int compIDX, int doc) throws IOException {
float score = scorer.score();
if (scores[compIDX] < score) {
return 1;
} else if (scores[compIDX] > score) {
return -1;
}
return 0;
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/term/TermAllGroupHeadsCollector.java
public void updateDocHead(int doc) throws IOException {
float score = scorer.score();
for (int i = 0; i < scores.length; i++) {
scores[i] = score;
}
this.doc = doc + readerContext.docBase;
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/GroupingSearch.java
public <T> TopGroups<T> search(IndexSearcher searcher, Query query, int groupOffset, int groupLimit) throws IOException {
return search(searcher, null, query, groupOffset, groupLimit);
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/GroupingSearch.java
protected TopGroups groupByFieldOrFunction(IndexSearcher searcher, Filter filter, Query query, int groupOffset, int groupLimit) throws IOException {
int topN = groupOffset + groupLimit;
final AbstractFirstPassGroupingCollector firstPassCollector;
final AbstractAllGroupsCollector allGroupsCollector;
final AbstractAllGroupHeadsCollector allGroupHeadsCollector;
if (groupFunction != null) {
firstPassCollector = new FunctionFirstPassGroupingCollector(groupFunction, valueSourceContext, groupSort, topN);
if (allGroups) {
allGroupsCollector = new FunctionAllGroupsCollector(groupFunction, valueSourceContext);
} else {
allGroupsCollector = null;
}
if (allGroupHeads) {
allGroupHeadsCollector = new FunctionAllGroupHeadsCollector(groupFunction, valueSourceContext, sortWithinGroup);
} else {
allGroupHeadsCollector = null;
}
} else if (docValuesType != null) {
firstPassCollector = DVFirstPassGroupingCollector.create(groupSort, topN, groupField, docValuesType, diskResidentDocValues);
if (allGroups) {
allGroupsCollector = DVAllGroupsCollector.create(groupField, docValuesType, diskResidentDocValues, initialSize);
} else {
allGroupsCollector = null;
}
if (allGroupHeads) {
allGroupHeadsCollector = DVAllGroupHeadsCollector.create(groupField, sortWithinGroup, docValuesType, diskResidentDocValues);
} else {
allGroupHeadsCollector = null;
}
} else {
firstPassCollector = new TermFirstPassGroupingCollector(groupField, groupSort, topN);
if (allGroups) {
allGroupsCollector = new TermAllGroupsCollector(groupField, initialSize);
} else {
allGroupsCollector = null;
}
if (allGroupHeads) {
allGroupHeadsCollector = TermAllGroupHeadsCollector.create(groupField, sortWithinGroup, initialSize);
} else {
allGroupHeadsCollector = null;
}
}
final Collector firstRound;
if (allGroupHeads || allGroups) {
List<Collector> collectors = new ArrayList<Collector>();
collectors.add(firstPassCollector);
if (allGroupHeads) {
collectors.add(allGroupsCollector);
}
if (allGroupHeads) {
collectors.add(allGroupHeadsCollector);
}
firstRound = MultiCollector.wrap(collectors.toArray(new Collector[collectors.size()]));
} else {
firstRound = firstPassCollector;
}
CachingCollector cachedCollector = null;
if (maxCacheRAMMB != null || maxDocsToCache != null) {
if (maxCacheRAMMB != null) {
cachedCollector = CachingCollector.create(firstRound, cacheScores, maxCacheRAMMB);
} else {
cachedCollector = CachingCollector.create(firstRound, cacheScores, maxDocsToCache);
}
searcher.search(query, filter, cachedCollector);
} else {
searcher.search(query, filter, firstRound);
}
if (allGroups) {
matchingGroups = allGroupsCollector.getGroups();
} else {
matchingGroups = Collections.emptyList();
}
if (allGroupHeads) {
matchingGroupHeads = allGroupHeadsCollector.retrieveGroupHeads(searcher.getIndexReader().maxDoc());
} else {
matchingGroupHeads = new Bits.MatchNoBits(searcher.getIndexReader().maxDoc());
}
Collection<SearchGroup> topSearchGroups = firstPassCollector.getTopGroups(groupOffset, fillSortFields);
if (topSearchGroups == null) {
return new TopGroups(new SortField[0], new SortField[0], 0, 0, new GroupDocs[0]);
}
int topNInsideGroup = groupDocsOffset + groupDocsLimit;
AbstractSecondPassGroupingCollector secondPassCollector;
if (groupFunction != null) {
secondPassCollector = new FunctionSecondPassGroupingCollector((Collection) topSearchGroups, groupSort, sortWithinGroup, topNInsideGroup, includeScores, includeMaxScore, fillSortFields, groupFunction, valueSourceContext);
} else if (docValuesType != null) {
secondPassCollector = DVSecondPassGroupingCollector.create(groupField, diskResidentDocValues, docValuesType, (Collection) topSearchGroups, groupSort, sortWithinGroup, topNInsideGroup, includeScores, includeMaxScore, fillSortFields);
} else {
secondPassCollector = new TermSecondPassGroupingCollector(groupField, (Collection) topSearchGroups, groupSort, sortWithinGroup, topNInsideGroup, includeScores, includeMaxScore, fillSortFields);
}
if (cachedCollector != null && cachedCollector.isCached()) {
cachedCollector.replay(secondPassCollector);
} else {
searcher.search(query, filter, secondPassCollector);
}
if (allGroups) {
return new TopGroups(secondPassCollector.getTopGroups(groupDocsOffset), matchingGroups.size());
} else {
return secondPassCollector.getTopGroups(groupDocsOffset);
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/GroupingSearch.java
protected TopGroups<?> groupByDocBlock(IndexSearcher searcher, Filter filter, Query query, int groupOffset, int groupLimit) throws IOException {
int topN = groupOffset + groupLimit;
BlockGroupingCollector c = new BlockGroupingCollector(groupSort, topN, includeScores, groupEndDocs);
searcher.search(query, filter, c);
int topNInsideGroup = groupDocsOffset + groupDocsLimit;
return c.getTopGroups(sortWithinGroup, groupOffset, groupDocsOffset, topNInsideGroup, fillSortFields);
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/function/FunctionAllGroupHeadsCollector.java
protected void retrieveGroupHeadAndAddIfNotExist(int doc) throws IOException {
filler.fillValue(doc);
GroupHead groupHead = groups.get(mval);
if (groupHead == null) {
MutableValue groupValue = mval.duplicate();
groupHead = new GroupHead(groupValue, sortWithinGroup, doc);
groups.put(groupValue, groupHead);
temporalResult.stop = true;
} else {
temporalResult.stop = false;
}
this.temporalResult.groupHead = groupHead;
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/function/FunctionAllGroupHeadsCollector.java
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
for (GroupHead groupHead : groups.values()) {
for (FieldComparator<?> comparator : groupHead.comparators) {
comparator.setScorer(scorer);
}
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/function/FunctionAllGroupHeadsCollector.java
public void setNextReader(AtomicReaderContext context) throws IOException {
this.readerContext = context;
FunctionValues values = groupBy.getValues(vsContext, context);
filler = values.getValueFiller();
mval = filler.getValue();
for (GroupHead groupHead : groups.values()) {
for (int i = 0; i < groupHead.comparators.length; i++) {
groupHead.comparators[i] = groupHead.comparators[i].setNextReader(context);
}
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/function/FunctionAllGroupHeadsCollector.java
public int compare(int compIDX, int doc) throws IOException {
return comparators[compIDX].compareBottom(doc);
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/function/FunctionAllGroupHeadsCollector.java
public void updateDocHead(int doc) throws IOException {
for (FieldComparator<?> comparator : comparators) {
comparator.copy(0, doc);
comparator.setBottom(0);
}
this.doc = doc + readerContext.docBase;
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/function/FunctionFirstPassGroupingCollector.java
Override
public void setNextReader(AtomicReaderContext readerContext) throws IOException {
super.setNextReader(readerContext);
FunctionValues values = groupByVS.getValues(vsContext, readerContext);
filler = values.getValueFiller();
mval = filler.getValue();
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/function/FunctionDistinctValuesCollector.java
public void collect(int doc) throws IOException {
groupFiller.fillValue(doc);
GroupCount groupCount = groupMap.get(groupMval);
if (groupCount != null) {
countFiller.fillValue(doc);
groupCount.uniqueValues.add(countMval.duplicate());
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/function/FunctionDistinctValuesCollector.java
public void setNextReader(AtomicReaderContext context) throws IOException {
FunctionValues values = groupSource.getValues(vsContext, context);
groupFiller = values.getValueFiller();
groupMval = groupFiller.getValue();
values = countSource.getValues(vsContext, context);
countFiller = values.getValueFiller();
countMval = countFiller.getValue();
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/function/FunctionAllGroupsCollector.java
public void collect(int doc) throws IOException {
filler.fillValue(doc);
if (!groups.contains(mval)) {
groups.add(mval.duplicate());
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/function/FunctionAllGroupsCollector.java
public void setNextReader(AtomicReaderContext context) throws IOException {
FunctionValues values = groupBy.getValues(vsContext, context);
filler = values.getValueFiller();
mval = filler.getValue();
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/function/FunctionSecondPassGroupingCollector.java
protected SearchGroupDocs<MutableValue> retrieveGroup(int doc) throws IOException {
filler.fillValue(doc);
return groupMap.get(mval);
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/function/FunctionSecondPassGroupingCollector.java
public void setNextReader(AtomicReaderContext readerContext) throws IOException {
super.setNextReader(readerContext);
FunctionValues values = groupByVS.getValues(vsContext, readerContext);
filler = values.getValueFiller();
mval = filler.getValue();
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/AbstractAllGroupHeadsCollector.java
public void collect(int doc) throws IOException {
retrieveGroupHeadAndAddIfNotExist(doc);
if (temporalResult.stop) {
return;
}
GH groupHead = temporalResult.groupHead;
// Ok now we need to check if the current doc is more relevant then current doc for this group
for (int compIDX = 0; ; compIDX++) {
final int c = reversed[compIDX] * groupHead.compare(compIDX, doc);
if (c < 0) {
// Definitely not competitive. So don't even bother to continue
return;
} else if (c > 0) {
// Definitely competitive.
break;
} else if (compIDX == compIDXEnd) {
// Here c=0. If we're at the last comparator, this doc is not
// competitive, since docs are visited in doc Id order, which means
// this doc cannot compete with any other document in the queue.
return;
}
}
groupHead.updateDocHead(doc);
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/AbstractGroupFacetCollector.java
public GroupedFacetResult mergeSegmentResults(int size, int minCount, boolean orderByCount) throws IOException {
if (segmentFacetCounts != null) {
segmentResults.add(createSegmentResult());
segmentFacetCounts = null; // reset
}
int totalCount = 0;
int missingCount = 0;
SegmentResultPriorityQueue segments = new SegmentResultPriorityQueue(segmentResults.size());
for (SegmentResult segmentResult : segmentResults) {
missingCount += segmentResult.missing;
if (segmentResult.mergePos >= segmentResult.maxTermPos) {
continue;
}
totalCount += segmentResult.total;
segments.add(segmentResult);
}
GroupedFacetResult facetResult = new GroupedFacetResult(size, minCount, orderByCount, totalCount, missingCount);
while (segments.size() > 0) {
SegmentResult segmentResult = segments.top();
BytesRef currentFacetValue = BytesRef.deepCopyOf(segmentResult.mergeTerm);
int count = 0;
do {
count += segmentResult.counts[segmentResult.mergePos++];
if (segmentResult.mergePos < segmentResult.maxTermPos) {
segmentResult.nextTerm();
segmentResult = segments.updateTop();
} else {
segments.pop();
segmentResult = segments.top();
if (segmentResult == null) {
break;
}
}
} while (currentFacetValue.equals(segmentResult.mergeTerm));
facetResult.addFacetCount(currentFacetValue, count);
}
return facetResult;
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/AbstractGroupFacetCollector.java
public void setScorer(Scorer scorer) throws IOException {
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/TopGroups.java
public static <T> TopGroups<T> merge(TopGroups<T>[] shardGroups, Sort groupSort, Sort docSort, int docOffset, int docTopN)
throws IOException {
//System.out.println("TopGroups.merge");
if (shardGroups.length == 0) {
return null;
}
int totalHitCount = 0;
int totalGroupedHitCount = 0;
// Optionally merge the totalGroupCount.
Integer totalGroupCount = null;
final int numGroups = shardGroups[0].groups.length;
for(TopGroups<T> shard : shardGroups) {
if (numGroups != shard.groups.length) {
throw new IllegalArgumentException("number of groups differs across shards; you must pass same top groups to all shards' second-pass collector");
}
totalHitCount += shard.totalHitCount;
totalGroupedHitCount += shard.totalGroupedHitCount;
if (shard.totalGroupCount != null) {
if (totalGroupCount == null) {
totalGroupCount = 0;
}
totalGroupCount += shard.totalGroupCount;
}
}
@SuppressWarnings({"unchecked","rawtypes"})
final GroupDocs<T>[] mergedGroupDocs = new GroupDocs[numGroups];
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java
public TopGroups<?> getTopGroups(Sort withinGroupSort, int groupOffset, int withinGroupOffset, int maxDocsPerGroup, boolean fillSortFields) throws IOException {
//if (queueFull) {
//System.out.println("getTopGroups groupOffset=" + groupOffset + " topNGroups=" + topNGroups);
//}
if (subDocUpto != 0) {
processGroup();
}
if (groupOffset >= groupQueue.size()) {
return null;
}
int totalGroupedHitCount = 0;
final FakeScorer fakeScorer = new FakeScorer();
@SuppressWarnings({"unchecked","rawtypes"})
final GroupDocs<Object>[] groups = new GroupDocs[groupQueue.size() - groupOffset];
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java
Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
for (FieldComparator<?> comparator : comparators) {
comparator.setScorer(scorer);
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java
Override
public void collect(int doc) throws IOException {
// System.out.println("C " + doc);
if (doc > groupEndDocID) {
// Group changed
if (subDocUpto != 0) {
processGroup();
}
groupEndDocID = lastDocPerGroupBits.advance(doc);
//System.out.println(" adv " + groupEndDocID + " " + lastDocPerGroupBits);
subDocUpto = 0;
groupCompetes = !queueFull;
}
totalHitCount++;
// Always cache doc/score within this group:
if (subDocUpto == pendingSubDocs.length) {
pendingSubDocs = ArrayUtil.grow(pendingSubDocs);
}
pendingSubDocs[subDocUpto] = doc;
if (needsScores) {
if (subDocUpto == pendingSubScores.length) {
pendingSubScores = ArrayUtil.grow(pendingSubScores);
}
pendingSubScores[subDocUpto] = scorer.score();
}
subDocUpto++;
if (groupCompetes) {
if (subDocUpto == 1) {
assert !queueFull;
//System.out.println(" init copy to bottomSlot=" + bottomSlot);
for (FieldComparator<?> fc : comparators) {
fc.copy(bottomSlot, doc);
fc.setBottom(bottomSlot);
}
topGroupDoc = doc;
} else {
// Compare to bottomSlot
for (int compIDX = 0;; compIDX++) {
final int c = reversed[compIDX] * comparators[compIDX].compareBottom(doc);
if (c < 0) {
// Definitely not competitive -- done
return;
} else if (c > 0) {
// Definitely competitive.
break;
} else if (compIDX == compIDXEnd) {
// Ties with bottom, except we know this docID is
// > docID in the queue (docs are visited in
// order), so not competitive:
return;
}
}
//System.out.println(" best w/in group!");
for (FieldComparator<?> fc : comparators) {
fc.copy(bottomSlot, doc);
// Necessary because some comparators cache
// details of bottom slot; this forces them to
// re-cache:
fc.setBottom(bottomSlot);
}
topGroupDoc = doc;
}
} else {
// We're not sure this group will make it into the
// queue yet
for (int compIDX = 0;; compIDX++) {
final int c = reversed[compIDX] * comparators[compIDX].compareBottom(doc);
if (c < 0) {
// Definitely not competitive -- done
//System.out.println(" doc doesn't compete w/ top groups");
return;
} else if (c > 0) {
// Definitely competitive.
break;
} else if (compIDX == compIDXEnd) {
// Ties with bottom, except we know this docID is
// > docID in the queue (docs are visited in
// order), so not competitive:
//System.out.println(" doc doesn't compete w/ top groups");
return;
}
}
groupCompetes = true;
for (FieldComparator<?> fc : comparators) {
fc.copy(bottomSlot, doc);
// Necessary because some comparators cache
// details of bottom slot; this forces them to
// re-cache:
fc.setBottom(bottomSlot);
}
topGroupDoc = doc;
//System.out.println(" doc competes w/ top groups");
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java
Override
public void setNextReader(AtomicReaderContext readerContext) throws IOException {
if (subDocUpto != 0) {
processGroup();
}
subDocUpto = 0;
docBase = readerContext.docBase;
//System.out.println("setNextReader base=" + docBase + " r=" + readerContext.reader);
lastDocPerGroupBits = lastDocPerGroup.getDocIdSet(readerContext, readerContext.reader().getLiveDocs()).iterator();
groupEndDocID = -1;
currentReaderContext = readerContext;
for (int i=0; i<comparators.length; i++) {
comparators[i] = comparators[i].setNextReader(readerContext);
}
}
// in lucene/grouping/src/java/org/apache/lucene/search/grouping/AbstractAllGroupsCollector.java
public void setScorer(Scorer scorer) throws IOException {}
// in lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java
Override
public Weight createWeight(IndexSearcher searcher) throws IOException {
return new ToChildBlockJoinWeight(this, parentQuery.createWeight(searcher), parentsFilter, doScores);
}
// in lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java
Override
public float getValueForNormalization() throws IOException {
return parentWeight.getValueForNormalization() * joinQuery.getBoost() * joinQuery.getBoost();
}
// in lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java
Override
public Scorer scorer(AtomicReaderContext readerContext, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException {
// Pass scoreDocsInOrder true, topScorer false to our sub:
final Scorer parentScorer = parentWeight.scorer(readerContext, true, false, null);
if (parentScorer == null) {
// No matches
return null;
}
// NOTE: we cannot pass acceptDocs here because this
// will (most likely, justifiably) cause the filter to
// not return a FixedBitSet but rather a
// BitsFilteredDocIdSet. Instead, we filter by
// acceptDocs when we score:
final DocIdSet parents = parentsFilter.getDocIdSet(readerContext, null);
if (parents == null) {
// No matches
return null;
}
if (!(parents instanceof FixedBitSet)) {
throw new IllegalStateException("parentFilter must return FixedBitSet; got " + parents);
}
return new ToChildBlockJoinScorer(this, parentScorer, (FixedBitSet) parents, doScores, acceptDocs);
}
// in lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java
Override
public Explanation explain(AtomicReaderContext reader, int doc) throws IOException {
// TODO
throw new UnsupportedOperationException(getClass().getName() +
" cannot explain match on parent document");
}
// in lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java
Override
public int nextDoc() throws IOException {
//System.out.println("Q.nextDoc() parentDoc=" + parentDoc + " childDoc=" + childDoc);
// Loop until we hit a childDoc that's accepted
nextChildDoc:
while (true) {
if (childDoc+1 == parentDoc) {
// OK, we are done iterating through all children
// matching this one parent doc, so we now nextDoc()
// the parent. Use a while loop because we may have
// to skip over some number of parents w/ no
// children:
while (true) {
parentDoc = parentScorer.nextDoc();
if (parentDoc == 0) {
// Degenerate but allowed: parent has no children
// TODO: would be nice to pull initial parent
// into ctor so we can skip this if... but it's
// tricky because scorer must return -1 for
// .doc() on init...
parentDoc = parentScorer.nextDoc();
}
if (parentDoc == NO_MORE_DOCS) {
childDoc = NO_MORE_DOCS;
//System.out.println(" END");
return childDoc;
}
childDoc = 1 + parentBits.prevSetBit(parentDoc-1);
if (acceptDocs != null && !acceptDocs.get(childDoc)) {
continue nextChildDoc;
}
if (childDoc < parentDoc) {
if (doScores) {
parentScore = parentScorer.score();
}
//System.out.println(" " + childDoc);
return childDoc;
} else {
// Degenerate but allowed: parent has no children
}
}
} else {
assert childDoc < parentDoc: "childDoc=" + childDoc + " parentDoc=" + parentDoc;
childDoc++;
if (acceptDocs != null && !acceptDocs.get(childDoc)) {
continue;
}
//System.out.println(" " + childDoc);
return childDoc;
}
}
}
// in lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java
Override
public float score() throws IOException {
return parentScore;
}
// in lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java
Override
public int advance(int childTarget) throws IOException {
assert childTarget >= parentBits.length() || !parentBits.get(childTarget);
//System.out.println("Q.advance childTarget=" + childTarget);
if (childTarget == NO_MORE_DOCS) {
//System.out.println(" END");
return childDoc = parentDoc = NO_MORE_DOCS;
}
assert childDoc == -1 || childTarget != parentDoc: "childTarget=" + childTarget;
if (childDoc == -1 || childTarget > parentDoc) {
// Advance to new parent:
parentDoc = parentScorer.advance(childTarget);
//System.out.println(" advance to parentDoc=" + parentDoc);
assert parentDoc > childTarget;
if (parentDoc == NO_MORE_DOCS) {
//System.out.println(" END");
return childDoc = NO_MORE_DOCS;
}
if (doScores) {
parentScore = parentScorer.score();
}
final int firstChild = parentBits.prevSetBit(parentDoc-1);
//System.out.println(" firstChild=" + firstChild);
childTarget = Math.max(childTarget, firstChild);
}
assert childTarget < parentDoc;
// Advance within children of current parent:
childDoc = childTarget;
//System.out.println(" " + childDoc);
if (acceptDocs != null && !acceptDocs.get(childDoc)) {
nextDoc();
}
return childDoc;
}
// in lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java
Override
public Query rewrite(IndexReader reader) throws IOException {
final Query parentRewrite = parentQuery.rewrite(reader);
if (parentRewrite != parentQuery) {
Query rewritten = new ToChildBlockJoinQuery(parentQuery,
parentRewrite,
parentsFilter,
doScores);
rewritten.setBoost(getBoost());
return rewritten;
} else {
return this;
}
}
// in lucene/join/src/java/org/apache/lucene/search/join/TermsCollector.java
public void setScorer(Scorer scorer) throws IOException {
}
// in lucene/join/src/java/org/apache/lucene/search/join/TermsCollector.java
public void collect(int doc) throws IOException {
reuse = docTermOrds.lookup(doc, reuse);
int[] buffer = new int[5];
int chunk;
do {
chunk = reuse.read(buffer);
if (chunk == 0) {
return;
}
for (int idx = 0; idx < chunk; idx++) {
int key = buffer[idx];
docTermsEnum.seekExact((long) key);
collectorTerms.add(docTermsEnum.term());
}
} while (chunk >= buffer.length);
}
// in lucene/join/src/java/org/apache/lucene/search/join/TermsCollector.java
public void setNextReader(AtomicReaderContext context) throws IOException {
docTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), field);
docTermsEnum = docTermOrds.getOrdTermsEnum(context.reader());
reuse = null; // LUCENE-3377 needs to be fixed first then this statement can be removed...
}
// in lucene/join/src/java/org/apache/lucene/search/join/TermsCollector.java
public void collect(int doc) throws IOException {
collectorTerms.add(fromDocTerms.getTerm(doc, spare));
}
// in lucene/join/src/java/org/apache/lucene/search/join/TermsCollector.java
public void setNextReader(AtomicReaderContext context) throws IOException {
fromDocTerms = FieldCache.DEFAULT.getTerms(context.reader(), field);
}
// in lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java
Override
public Query rewrite(IndexReader reader) throws IOException {
final Query originalQueryRewrite = originalQuery.rewrite(reader);
if (originalQueryRewrite != originalQuery) {
Query rewritten = new TermsIncludingScoreQuery(field, multipleValuesPerDocument, terms, scores,
ords, originalQueryRewrite, originalQuery);
rewritten.setBoost(getBoost());
return rewritten;
} else {
return this;
}
}
// in lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java
Override
public Weight createWeight(IndexSearcher searcher) throws IOException {
final Weight originalWeight = originalQuery.createWeight(searcher);
return new Weight() {
private TermsEnum segmentTermsEnum;
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
SVInnerScorer scorer = (SVInnerScorer) scorer(context, true, false, context.reader().getLiveDocs());
if (scorer != null) {
if (scorer.advance(doc) == doc) {
return scorer.explain();
}
}
return new ComplexExplanation(false, 0.0f, "Not a match");
}
public Query getQuery() {
return TermsIncludingScoreQuery.this;
}
public float getValueForNormalization() throws IOException {
return originalWeight.getValueForNormalization() * TermsIncludingScoreQuery.this.getBoost() * TermsIncludingScoreQuery.this.getBoost();
}
public void normalize(float norm, float topLevelBoost) {
originalWeight.normalize(norm, topLevelBoost * TermsIncludingScoreQuery.this.getBoost());
}
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
Terms terms = context.reader().terms(field);
if (terms == null) {
return null;
}
segmentTermsEnum = terms.iterator(segmentTermsEnum);
if (multipleValuesPerDocument) {
return new MVInnerScorer(this, acceptDocs, segmentTermsEnum, context.reader().maxDoc());
} else {
return new SVInnerScorer(this, acceptDocs, segmentTermsEnum);
}
}
};
}
// in lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
SVInnerScorer scorer = (SVInnerScorer) scorer(context, true, false, context.reader().getLiveDocs());
if (scorer != null) {
if (scorer.advance(doc) == doc) {
return scorer.explain();
}
}
return new ComplexExplanation(false, 0.0f, "Not a match");
}
// in lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java
public float getValueForNormalization() throws IOException {
return originalWeight.getValueForNormalization() * TermsIncludingScoreQuery.this.getBoost() * TermsIncludingScoreQuery.this.getBoost();
}
// in lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
Terms terms = context.reader().terms(field);
if (terms == null) {
return null;
}
segmentTermsEnum = terms.iterator(segmentTermsEnum);
if (multipleValuesPerDocument) {
return new MVInnerScorer(this, acceptDocs, segmentTermsEnum, context.reader().maxDoc());
} else {
return new SVInnerScorer(this, acceptDocs, segmentTermsEnum);
}
}
// in lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java
public float score() throws IOException {
return scores[ords[scoreUpto]];
}
// in lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java
public Explanation explain() throws IOException {
return new ComplexExplanation(true, score(), "Score based on join value " + termsEnum.term().utf8ToString());
}
// in lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java
public int nextDoc() throws IOException {
if (docsEnum != null) {
int docId = docsEnum.nextDoc();
if (docId == DocIdSetIterator.NO_MORE_DOCS) {
docsEnum = null;
} else {
return docId;
}
}
do {
if (upto == terms.size()) {
return DocIdSetIterator.NO_MORE_DOCS;
}
scoreUpto = upto;
TermsEnum.SeekStatus status = termsEnum.seekCeil(terms.get(ords[upto++], spare), true);
if (status == TermsEnum.SeekStatus.FOUND) {
docsEnum = reuse = termsEnum.docs(acceptDocs, reuse, false);
}
} while (docsEnum == null);
return docsEnum.nextDoc();
}
// in lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java
public int advance(int target) throws IOException {
int docId;
do {
docId = nextDoc();
if (docId < target) {
int tempDocId = docsEnum.advance(target);
if (tempDocId == target) {
docId = tempDocId;
break;
}
} else if (docId == target) {
break;
}
docsEnum = null; // goto the next ord.
} while (docId != DocIdSetIterator.NO_MORE_DOCS);
return docId;
}
// in lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java
public int nextDoc() throws IOException {
if (docsEnum != null) {
int docId;
do {
docId = docsEnum.nextDoc();
if (docId == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
} while (alreadyEmittedDocs.get(docId));
if (docId == DocIdSetIterator.NO_MORE_DOCS) {
docsEnum = null;
} else {
alreadyEmittedDocs.set(docId);
return docId;
}
}
for (;;) {
do {
if (upto == terms.size()) {
return DocIdSetIterator.NO_MORE_DOCS;
}
scoreUpto = upto;
TermsEnum.SeekStatus status = termsEnum.seekCeil(terms.get(ords[upto++], spare), true);
if (status == TermsEnum.SeekStatus.FOUND) {
docsEnum = reuse = termsEnum.docs(acceptDocs, reuse, false);
}
} while (docsEnum == null);
int docId;
do {
docId = docsEnum.nextDoc();
if (docId == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
} while (alreadyEmittedDocs.get(docId));
if (docId == DocIdSetIterator.NO_MORE_DOCS) {
docsEnum = null;
} else {
alreadyEmittedDocs.set(docId);
return docId;
}
}
}
// in lucene/join/src/java/org/apache/lucene/search/join/TermsWithScoreCollector.java
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
}
// in lucene/join/src/java/org/apache/lucene/search/join/TermsWithScoreCollector.java
public void collect(int doc) throws IOException {
int ord = collectedTerms.add(fromDocTerms.getTerm(doc, spare));
if (ord < 0) {
ord = -ord - 1;
} else {
if (ord >= scoreSums.length) {
scoreSums = ArrayUtil.grow(scoreSums);
}
}
float current = scorer.score();
float existing = scoreSums[ord];
if (Float.compare(existing, 0.0f) == 0) {
scoreSums[ord] = current;
} else {
switch (scoreMode) {
case Total:
scoreSums[ord] = scoreSums[ord] + current;
break;
case Max:
if (current > existing) {
scoreSums[ord] = current;
}
}
}
}
// in lucene/join/src/java/org/apache/lucene/search/join/TermsWithScoreCollector.java
public void setNextReader(AtomicReaderContext context) throws IOException {
fromDocTerms = FieldCache.DEFAULT.getTerms(context.reader(), field);
}
// in lucene/join/src/java/org/apache/lucene/search/join/TermsWithScoreCollector.java
Override
public void collect(int doc) throws IOException {
int ord = collectedTerms.add(fromDocTerms.getTerm(doc, spare));
if (ord < 0) {
ord = -ord - 1;
} else {
if (ord >= scoreSums.length) {
scoreSums = ArrayUtil.grow(scoreSums);
scoreCounts = ArrayUtil.grow(scoreCounts);
}
}
float current = scorer.score();
float existing = scoreSums[ord];
if (Float.compare(existing, 0.0f) == 0) {
scoreSums[ord] = current;
scoreCounts[ord] = 1;
} else {
scoreSums[ord] = scoreSums[ord] + current;
scoreCounts[ord]++;
}
}
// in lucene/join/src/java/org/apache/lucene/search/join/TermsWithScoreCollector.java
public void collect(int doc) throws IOException {
reuse = fromDocTermOrds.lookup(doc, reuse);
int[] buffer = new int[5];
int chunk;
do {
chunk = reuse.read(buffer);
if (chunk == 0) {
return;
}
for (int idx = 0; idx < chunk; idx++) {
int key = buffer[idx];
docTermsEnum.seekExact((long) key);
int ord = collectedTerms.add(docTermsEnum.term());
if (ord < 0) {
ord = -ord - 1;
} else {
if (ord >= scoreSums.length) {
scoreSums = ArrayUtil.grow(scoreSums);
}
}
final float current = scorer.score();
final float existing = scoreSums[ord];
if (Float.compare(existing, 0.0f) == 0) {
scoreSums[ord] = current;
} else {
switch (scoreMode) {
case Total:
scoreSums[ord] = existing + current;
break;
case Max:
if (current > existing) {
scoreSums[ord] = current;
}
}
}
}
} while (chunk >= buffer.length);
}
// in lucene/join/src/java/org/apache/lucene/search/join/TermsWithScoreCollector.java
public void setNextReader(AtomicReaderContext context) throws IOException {
fromDocTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), field);
docTermsEnum = fromDocTermOrds.getOrdTermsEnum(context.reader());
reuse = null; // LUCENE-3377 needs to be fixed first then this statement can be removed...
}
// in lucene/join/src/java/org/apache/lucene/search/join/TermsWithScoreCollector.java
Override
public void collect(int doc) throws IOException {
reuse = fromDocTermOrds.lookup(doc, reuse);
int[] buffer = new int[5];
int chunk;
do {
chunk = reuse.read(buffer);
if (chunk == 0) {
return;
}
for (int idx = 0; idx < chunk; idx++) {
int key = buffer[idx];
docTermsEnum.seekExact((long) key);
int ord = collectedTerms.add(docTermsEnum.term());
if (ord < 0) {
ord = -ord - 1;
} else {
if (ord >= scoreSums.length) {
scoreSums = ArrayUtil.grow(scoreSums);
scoreCounts = ArrayUtil.grow(scoreCounts);
}
}
float current = scorer.score();
float existing = scoreSums[ord];
if (Float.compare(existing, 0.0f) == 0) {
scoreSums[ord] = current;
scoreCounts[ord] = 1;
} else {
scoreSums[ord] = scoreSums[ord] + current;
scoreCounts[ord]++;
}
}
} while (chunk >= buffer.length);
}
// in lucene/join/src/java/org/apache/lucene/search/join/TermsQuery.java
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
if (this.terms.size() == 0) {
return TermsEnum.EMPTY;
}
return new SeekingTermSetTermsEnum(terms.iterator(null), this.terms);
}
// in lucene/join/src/java/org/apache/lucene/search/join/TermsQuery.java
Override
protected BytesRef nextSeekTerm(BytesRef currentTerm) throws IOException {
BytesRef temp = seekTerm;
seekTerm = null;
return temp;
}
// in lucene/join/src/java/org/apache/lucene/search/join/TermsQuery.java
protected AcceptStatus accept(BytesRef term) throws IOException {
if (comparator.compare(term, lastTerm) > 0) {
return AcceptStatus.END;
}
BytesRef currentTerm = terms.get(ords[upto], spare);
if (comparator.compare(term, currentTerm) == 0) {
if (upto == lastElement) {
return AcceptStatus.YES;
} else {
seekTerm = terms.get(ords[++upto], spare);
return AcceptStatus.YES_AND_SEEK;
}
} else {
if (upto == lastElement) {
return AcceptStatus.NO;
} else { // Our current term doesn't match the the given term.
int cmp;
do { // We maybe are behind the given term by more than one step. Keep incrementing till we're the same or higher.
if (upto == lastElement) {
return AcceptStatus.NO;
}
// typically the terms dict is a superset of query's terms so it's unusual that we have to skip many of
// our terms so we don't do a binary search here
seekTerm = terms.get(ords[++upto], spare);
} while ((cmp = comparator.compare(seekTerm, term)) < 0);
if (cmp == 0) {
if (upto == lastElement) {
return AcceptStatus.YES;
}
seekTerm = terms.get(ords[++upto], spare);
return AcceptStatus.YES_AND_SEEK;
} else {
return AcceptStatus.NO_AND_SEEK;
}
}
}
}
// in lucene/join/src/java/org/apache/lucene/search/join/JoinUtil.java
public static Query createJoinQuery(String fromField,
boolean multipleValuesPerDocument,
String toField,
Query fromQuery,
IndexSearcher fromSearcher,
ScoreMode scoreMode) throws IOException {
switch (scoreMode) {
case None:
TermsCollector termsCollector = TermsCollector.create(fromField, multipleValuesPerDocument);
fromSearcher.search(fromQuery, termsCollector);
return new TermsQuery(toField, termsCollector.getCollectorTerms());
case Total:
case Max:
case Avg:
TermsWithScoreCollector termsWithScoreCollector =
TermsWithScoreCollector.create(fromField, multipleValuesPerDocument, scoreMode);
fromSearcher.search(fromQuery, termsWithScoreCollector);
return new TermsIncludingScoreQuery(
toField,
multipleValuesPerDocument,
termsWithScoreCollector.getCollectedTerms(),
termsWithScoreCollector.getScoresPerTerm(),
fromQuery
);
default:
throw new IllegalArgumentException(String.format("Score mode %s isn't supported.", scoreMode));
}
}
// in lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
Override
public Weight createWeight(IndexSearcher searcher) throws IOException {
return new BlockJoinWeight(this, childQuery.createWeight(searcher), parentsFilter, scoreMode);
}
// in lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
Override
public float getValueForNormalization() throws IOException {
return childWeight.getValueForNormalization() * joinQuery.getBoost() * joinQuery.getBoost();
}
// in lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
Override
public Scorer scorer(AtomicReaderContext readerContext, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException {
// Pass scoreDocsInOrder true, topScorer false to our sub:
final Scorer childScorer = childWeight.scorer(readerContext, true, false, null);
if (childScorer == null) {
// No matches
return null;
}
final int firstChildDoc = childScorer.nextDoc();
if (firstChildDoc == DocIdSetIterator.NO_MORE_DOCS) {
// No matches
return null;
}
// NOTE: we cannot pass acceptDocs here because this
// will (most likely, justifiably) cause the filter to
// not return a FixedBitSet but rather a
// BitsFilteredDocIdSet. Instead, we filter by
// acceptDocs when we score:
final DocIdSet parents = parentsFilter.getDocIdSet(readerContext, null);
if (parents == null) {
// No matches
return null;
}
if (!(parents instanceof FixedBitSet)) {
throw new IllegalStateException("parentFilter must return FixedBitSet; got " + parents);
}
return new BlockJoinScorer(this, childScorer, (FixedBitSet) parents, firstChildDoc, scoreMode, acceptDocs);
}
// in lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
Override
public Explanation explain(AtomicReaderContext reader, int doc) throws IOException {
// TODO
throw new UnsupportedOperationException(getClass().getName() +
" cannot explain match on parent document");
}
// in lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
Override
public int nextDoc() throws IOException {
//System.out.println("Q.nextDoc() nextChildDoc=" + nextChildDoc);
// Loop until we hit a parentDoc that's accepted
while (true) {
if (nextChildDoc == NO_MORE_DOCS) {
//System.out.println(" end");
return parentDoc = NO_MORE_DOCS;
}
// Gather all children sharing the same parent as
// nextChildDoc
parentDoc = parentBits.nextSetBit(nextChildDoc);
//System.out.println(" parentDoc=" + parentDoc);
assert parentDoc != -1;
//System.out.println(" nextChildDoc=" + nextChildDoc);
if (acceptDocs != null && !acceptDocs.get(parentDoc)) {
// Parent doc not accepted; skip child docs until
// we hit a new parent doc:
do {
nextChildDoc = childScorer.nextDoc();
} while (nextChildDoc < parentDoc);
continue;
}
float totalScore = 0;
float maxScore = Float.NEGATIVE_INFINITY;
childDocUpto = 0;
do {
//System.out.println(" c=" + nextChildDoc);
if (pendingChildDocs.length == childDocUpto) {
pendingChildDocs = ArrayUtil.grow(pendingChildDocs);
}
if (scoreMode != ScoreMode.None && pendingChildScores.length == childDocUpto) {
pendingChildScores = ArrayUtil.grow(pendingChildScores);
}
pendingChildDocs[childDocUpto] = nextChildDoc;
if (scoreMode != ScoreMode.None) {
// TODO: specialize this into dedicated classes per-scoreMode
final float childScore = childScorer.score();
pendingChildScores[childDocUpto] = childScore;
maxScore = Math.max(childScore, maxScore);
totalScore += childScore;
}
childDocUpto++;
nextChildDoc = childScorer.nextDoc();
} while (nextChildDoc < parentDoc);
// Parent & child docs are supposed to be orthogonal:
assert nextChildDoc != parentDoc;
switch(scoreMode) {
case Avg:
parentScore = totalScore / childDocUpto;
break;
case Max:
parentScore = maxScore;
break;
case Total:
parentScore = totalScore;
break;
case None:
break;
}
//System.out.println(" return parentDoc=" + parentDoc);
return parentDoc;
}
}
// in lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
Override
public float score() throws IOException {
return parentScore;
}
// in lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
Override
public int advance(int parentTarget) throws IOException {
//System.out.println("Q.advance parentTarget=" + parentTarget);
if (parentTarget == NO_MORE_DOCS) {
return parentDoc = NO_MORE_DOCS;
}
if (parentTarget == 0) {
// Callers should only be passing in a docID from
// the parent space, so this means this parent
// has no children (it got docID 0), so it cannot
// possibly match. We must handle this case
// separately otherwise we pass invalid -1 to
// prevSetBit below:
return nextDoc();
}
final int prevParentDoc = parentBits.prevSetBit(parentTarget-1);
//System.out.println(" rolled back to prevParentDoc=" + prevParentDoc + " vs parentDoc=" + parentDoc);
assert prevParentDoc >= parentDoc;
if (prevParentDoc > nextChildDoc) {
nextChildDoc = childScorer.advance(prevParentDoc);
// System.out.println(" childScorer advanced to child docID=" + nextChildDoc);
//} else {
//System.out.println(" skip childScorer advance");
}
// Parent & child docs are supposed to be orthogonal:
assert nextChildDoc != prevParentDoc;
final int nd = nextDoc();
//System.out.println(" return nextParentDoc=" + nd);
return nd;
}
// in lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
Override
public Query rewrite(IndexReader reader) throws IOException {
final Query childRewrite = childQuery.rewrite(reader);
if (childRewrite != childQuery) {
Query rewritten = new ToParentBlockJoinQuery(childQuery,
childRewrite,
parentsFilter,
scoreMode);
rewritten.setBoost(getBoost());
return rewritten;
} else {
return this;
}
}
// in lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java
Override
public void collect(int parentDoc) throws IOException {
//System.out.println("C parentDoc=" + parentDoc);
totalHitCount++;
float score = Float.NaN;
if (trackMaxScore) {
score = scorer.score();
if (score > maxScore) {
maxScore = score;
}
}
// TODO: we could sweep all joinScorers here and
// aggregate total child hit count, so we can fill this
// in getTopGroups (we wire it to 0 now)
if (queueFull) {
//System.out.println(" queueFull");
// Fastmatch: return if this hit is not competitive
for (int i = 0;; i++) {
final int c = reverseMul[i] * comparators[i].compareBottom(parentDoc);
if (c < 0) {
// Definitely not competitive.
//System.out.println(" skip");
return;
} else if (c > 0) {
// Definitely competitive.
break;
} else if (i == compEnd) {
// Here c=0. If we're at the last comparator, this doc is not
// competitive, since docs are visited in doc Id order, which means
// this doc cannot compete with any other document in the queue.
//System.out.println(" skip");
return;
}
}
//System.out.println(" competes! doc=" + (docBase + parentDoc));
// This hit is competitive - replace bottom element in queue & adjustTop
for (int i = 0; i < comparators.length; i++) {
comparators[i].copy(bottom.slot, parentDoc);
}
if (!trackMaxScore && trackScores) {
score = scorer.score();
}
bottom.doc = docBase + parentDoc;
bottom.readerContext = currentReaderContext;
bottom.score = score;
copyGroups(bottom);
bottom = queue.updateTop();
for (int i = 0; i < comparators.length; i++) {
comparators[i].setBottom(bottom.slot);
}
} else {
// Startup transient: queue is not yet full:
final int comparatorSlot = totalHitCount - 1;
// Copy hit into queue
for (int i = 0; i < comparators.length; i++) {
comparators[i].copy(comparatorSlot, parentDoc);
}
//System.out.println(" startup: new OG doc=" + (docBase+parentDoc));
final OneGroup og = new OneGroup(comparatorSlot, docBase+parentDoc, score, joinScorers.length, trackScores);
og.readerContext = currentReaderContext;
copyGroups(og);
bottom = queue.add(og);
queueFull = totalHitCount == numParentHits;
if (queueFull) {
// End of startup transient: queue just filled up:
for (int i = 0; i < comparators.length; i++) {
comparators[i].setBottom(bottom.slot);
}
}
}
}
// in lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java
Override
public void setNextReader(AtomicReaderContext context) throws IOException {
currentReaderContext = context;
docBase = context.docBase;
for (int compIDX = 0; compIDX < comparators.length; compIDX++) {
queue.setComparator(compIDX, comparators[compIDX].setNextReader(context));
}
}
// in lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/QueryTemplateManager.java
public void addDefaultQueryTemplate(InputStream xslIs)
throws TransformerConfigurationException, ParserConfigurationException, SAXException, IOException {
defaultCompiledTemplates = getTemplates(xslIs);
}
// in lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/QueryTemplateManager.java
public void addQueryTemplate(String name, InputStream xslIs)
throws TransformerConfigurationException, ParserConfigurationException, SAXException, IOException {
compiledTemplatesCache.put(name, getTemplates(xslIs));
}
// in lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/QueryTemplateManager.java
public String getQueryAsXmlString(Properties formProperties, String queryTemplateName)
throws SAXException, IOException, ParserConfigurationException, TransformerException {
Templates ts = compiledTemplatesCache.get(queryTemplateName);
return getQueryAsXmlString(formProperties, ts);
}
// in lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/QueryTemplateManager.java
public Document getQueryAsDOM(Properties formProperties, String queryTemplateName)
throws SAXException, IOException, ParserConfigurationException, TransformerException {
Templates ts = compiledTemplatesCache.get(queryTemplateName);
return getQueryAsDOM(formProperties, ts);
}
// in lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/QueryTemplateManager.java
public String getQueryAsXmlString(Properties formProperties)
throws SAXException, IOException, ParserConfigurationException, TransformerException {
return getQueryAsXmlString(formProperties, defaultCompiledTemplates);
}
// in lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/QueryTemplateManager.java
public Document getQueryAsDOM(Properties formProperties)
throws SAXException, IOException, ParserConfigurationException, TransformerException {
return getQueryAsDOM(formProperties, defaultCompiledTemplates);
}
// in lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/QueryTemplateManager.java
public static String getQueryAsXmlString(Properties formProperties, Templates template)
throws SAXException, IOException, ParserConfigurationException, TransformerException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
StreamResult result = new StreamResult(baos);
transformCriteria(formProperties, template, result);
return baos.toString();
}
// in lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/QueryTemplateManager.java
public static String getQueryAsXmlString(Properties formProperties, InputStream xslIs)
throws SAXException, IOException, ParserConfigurationException, TransformerException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
StreamResult result = new StreamResult(baos);
transformCriteria(formProperties, xslIs, result);
return baos.toString();
}
// in lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/QueryTemplateManager.java
public static Document getQueryAsDOM(Properties formProperties, Templates template)
throws SAXException, IOException, ParserConfigurationException, TransformerException {
DOMResult result = new DOMResult();
transformCriteria(formProperties, template, result);
return (Document) result.getNode();
}
// in lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/QueryTemplateManager.java
public static Document getQueryAsDOM(Properties formProperties, InputStream xslIs)
throws SAXException, IOException, ParserConfigurationException, TransformerException {
DOMResult result = new DOMResult();
transformCriteria(formProperties, xslIs, result);
return (Document) result.getNode();
}
// in lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/QueryTemplateManager.java
public static void transformCriteria(Properties formProperties, InputStream xslIs, Result result)
throws SAXException, IOException, ParserConfigurationException, TransformerException {
dbf.setNamespaceAware(true);
DocumentBuilder builder = dbf.newDocumentBuilder();
org.w3c.dom.Document xslDoc = builder.parse(xslIs);
DOMSource ds = new DOMSource(xslDoc);
Transformer transformer = null;
synchronized (tFactory) {
transformer = tFactory.newTransformer(ds);
}
transformCriteria(formProperties, transformer, result);
}
// in lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/QueryTemplateManager.java
public static void transformCriteria(Properties formProperties, Templates template, Result result)
throws SAXException, IOException, ParserConfigurationException, TransformerException {
transformCriteria(formProperties, template.newTransformer(), result);
}
// in lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/QueryTemplateManager.java
public static void transformCriteria(Properties formProperties, Transformer transformer, Result result)
throws SAXException, IOException, ParserConfigurationException, TransformerException {
dbf.setNamespaceAware(true);
//Create an XML document representing the search index document.
DocumentBuilder db = dbf.newDocumentBuilder();
org.w3c.dom.Document doc = db.newDocument();
Element root = doc.createElement("Document");
doc.appendChild(root);
Enumeration keysEnum = formProperties.keys();
while (keysEnum.hasMoreElements()) {
String propName = (String) keysEnum.nextElement();
String value = formProperties.getProperty(propName);
if ((value != null) && (value.length() > 0)) {
DOMUtils.insertChild(root, propName, value);
}
}
//Use XSLT to to transform into an XML query string using the queryTemplate
DOMSource xml = new DOMSource(doc);
transformer.transform(xml, result);
}
// in lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/QueryTemplateManager.java
public static Templates getTemplates(InputStream xslIs)
throws ParserConfigurationException, SAXException, IOException, TransformerConfigurationException {
dbf.setNamespaceAware(true);
DocumentBuilder builder = dbf.newDocumentBuilder();
org.w3c.dom.Document xslDoc = builder.parse(xslIs);
DOMSource ds = new DOMSource(xslDoc);
return tFactory.newTemplates(ds);
}
// in lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/NumericRangeFilterBuilder.java
Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
return null;
}
// in lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/query/SimpleTermRewriteQuery.java
Override
public Query rewrite(IndexReader reader) throws IOException {
final List<Query> luceneSubQueries = new ArrayList<Query>();
srndQuery.visitMatchingTerms(reader, fieldName,
new SimpleTerm.MatchingTermVisitor() {
public void visitMatchingTerm(Term term) throws IOException {
luceneSubQueries.add(qf.newTermQuery(term));
}
});
return (luceneSubQueries.size() == 0) ? SrndQuery.theEmptyLcnQuery
: (luceneSubQueries.size() == 1) ? luceneSubQueries.get(0)
: SrndBooleanQuery.makeBooleanQuery(
/* luceneSubQueries all have default weight */
luceneSubQueries, BooleanClause.Occur.SHOULD); /* OR the subquery terms */
}
// in lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/query/SimpleTermRewriteQuery.java
public void visitMatchingTerm(Term term) throws IOException {
luceneSubQueries.add(qf.newTermQuery(term));
}
// in lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/query/SrndTruncQuery.java
Override
public void visitMatchingTerms(
IndexReader reader,
String fieldName,
MatchingTermVisitor mtv) throws IOException
{
int prefixLength = prefix.length();
Terms terms = MultiFields.getTerms(reader, fieldName);
if (terms != null) {
Matcher matcher = pattern.matcher("");
try {
TermsEnum termsEnum = terms.iterator(null);
TermsEnum.SeekStatus status = termsEnum.seekCeil(prefixRef);
BytesRef text;
if (status == TermsEnum.SeekStatus.FOUND) {
text = prefixRef;
} else if (status == TermsEnum.SeekStatus.NOT_FOUND) {
text = termsEnum.term();
} else {
text = null;
}
while(text != null) {
if (text != null && StringHelper.startsWith(text, prefixRef)) {
String textString = text.utf8ToString();
matcher.reset(textString.substring(prefixLength));
if (matcher.matches()) {
mtv.visitMatchingTerm(new Term(fieldName, textString));
}
} else {
break;
}
text = termsEnum.next();
}
} finally {
matcher.reset();
}
}
}
// in lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/query/SpanNearClauseFactory.java
public void addTermWeighted(Term t, float weight) throws IOException {
SpanTermQuery stq = qf.newSpanTermQuery(t);
/* CHECKME: wrap in Hashable...? */
addSpanQueryWeighted(stq, weight);
}
// in lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/query/SrndPrefixQuery.java
Override
public void visitMatchingTerms(
IndexReader reader,
String fieldName,
MatchingTermVisitor mtv) throws IOException
{
/* inspired by PrefixQuery.rewrite(): */
Terms terms = MultiFields.getTerms(reader, fieldName);
if (terms != null) {
TermsEnum termsEnum = terms.iterator(null);
boolean skip = false;
TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(getPrefix()));
if (status == TermsEnum.SeekStatus.FOUND) {
mtv.visitMatchingTerm(getLucenePrefixTerm(fieldName));
} else if (status == TermsEnum.SeekStatus.NOT_FOUND) {
if (StringHelper.startsWith(termsEnum.term(), prefixRef)) {
mtv.visitMatchingTerm(new Term(fieldName, termsEnum.term().utf8ToString()));
} else {
skip = true;
}
} else {
// EOF
skip = true;
}
if (!skip) {
while(true) {
BytesRef text = termsEnum.next();
if (text != null && StringHelper.startsWith(text, prefixRef)) {
mtv.visitMatchingTerm(new Term(fieldName, text.utf8ToString()));
} else {
break;
}
}
}
}
}
// in lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/query/OrQuery.java
public void addSpanQueries(SpanNearClauseFactory sncf) throws IOException {
Iterator sqi = getSubQueriesIterator();
while (sqi.hasNext()) {
((DistanceSubQuery)sqi.next()).addSpanQueries(sncf);
}
}
// in lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/query/SimpleTerm.java
Override
public void addSpanQueries(final SpanNearClauseFactory sncf) throws IOException {
visitMatchingTerms(
sncf.getIndexReader(),
sncf.getFieldName(),
new MatchingTermVisitor() {
public void visitMatchingTerm(Term term) throws IOException {
sncf.addTermWeighted(term, getWeight());
}
});
}
// in lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/query/SimpleTerm.java
public void visitMatchingTerm(Term term) throws IOException {
sncf.addTermWeighted(term, getWeight());
}
// in lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/query/DistanceRewriteQuery.java
Override
public Query rewrite(IndexReader reader) throws IOException {
return srndQuery.getSpanNearQuery(reader, fieldName, getBoost(), qf);
}
// in lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/query/SrndTermQuery.java
Override
public void visitMatchingTerms(
IndexReader reader,
String fieldName,
MatchingTermVisitor mtv) throws IOException
{
/* check term presence in index here for symmetry with other SimpleTerm's */
Terms terms = MultiFields.getTerms(reader, fieldName);
if (terms != null) {
TermsEnum termsEnum = terms.iterator(null);
TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(getTermText()));
if (status == TermsEnum.SeekStatus.FOUND) {
mtv.visitMatchingTerm(getLuceneTerm(fieldName));
}
}
}
// in lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/query/DistanceQuery.java
Override
public void addSpanQueries(SpanNearClauseFactory sncf) throws IOException {
Query snq = getSpanNearQuery(sncf.getIndexReader(),
sncf.getFieldName(),
getWeight(),
sncf.getBasicQueryFactory());
sncf.addSpanQuery(snq);
}
// in lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/query/DistanceQuery.java
public Query getSpanNearQuery(
IndexReader reader,
String fieldName,
float boost,
BasicQueryFactory qf) throws IOException {
SpanQuery[] spanClauses = new SpanQuery[getNrSubQueries()];
Iterator<?> sqi = getSubQueriesIterator();
int qi = 0;
while (sqi.hasNext()) {
SpanNearClauseFactory sncf = new SpanNearClauseFactory(reader, fieldName, qf);
((DistanceSubQuery)sqi.next()).addSpanQueries(sncf);
if (sncf.size() == 0) { /* distance operator requires all sub queries */
while (sqi.hasNext()) { /* produce evt. error messages but ignore results */
((DistanceSubQuery)sqi.next()).addSpanQueries(sncf);
sncf.clear();
}
return SrndQuery.theEmptyLcnQuery;
}
spanClauses[qi] = sncf.makeSpanClause();
qi++;
}
SpanNearQuery r = new SpanNearQuery(spanClauses, getOpDistance() - 1, subQueriesOrdered());
r.setBoost(boost);
return r;
}
// in lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/parser/FastCharStream.java
public final char readChar() throws IOException {
if (bufferPosition >= bufferLength)
refill();
return buffer[bufferPosition++];
}
// in lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/parser/FastCharStream.java
private final void refill() throws IOException {
int newPosition = bufferLength - tokenStart;
if (tokenStart == 0) { // token won't fit in buffer
if (buffer == null) { // first time: alloc buffer
buffer = new char[2048];
} else if (bufferLength == buffer.length) { // grow buffer
char[] newBuffer = new char[buffer.length*2];
System.arraycopy(buffer, 0, newBuffer, 0, bufferLength);
buffer = newBuffer;
}
} else { // shift token to front
System.arraycopy(buffer, tokenStart, buffer, 0, newPosition);
}
bufferLength = newPosition; // update state
bufferPosition = newPosition;
bufferStart += tokenStart;
tokenStart = 0;
int charsRead = // fill space in buffer
input.read(buffer, newPosition, buffer.length-newPosition);
if (charsRead == -1)
throw new IOException("read past eof");
else
bufferLength += charsRead;
}
// in lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/parser/FastCharStream.java
public final char BeginToken() throws IOException {
tokenStart = bufferPosition;
return readChar();
}
// in lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/JavaCharStream.java
static final int hexval(char c) throws java.io.IOException {
switch(c)
{
case '0' :
return 0;
case '1' :
return 1;
case '2' :
return 2;
case '3' :
return 3;
case '4' :
return 4;
case '5' :
return 5;
case '6' :
return 6;
case '7' :
return 7;
case '8' :
return 8;
case '9' :
return 9;
case 'a' :
case 'A' :
return 10;
case 'b' :
case 'B' :
return 11;
case 'c' :
case 'C' :
return 12;
case 'd' :
case 'D' :
return 13;
case 'e' :
case 'E' :
return 14;
case 'f' :
case 'F' :
return 15;
}
throw new java.io.IOException(); // Should never come here
}
// in lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/JavaCharStream.java
protected void FillBuff() throws java.io.IOException
{
int i;
if (maxNextCharInd == 4096)
maxNextCharInd = nextCharInd = 0;
try {
if ((i = inputStream.read(nextCharBuf, maxNextCharInd,
4096 - maxNextCharInd)) == -1)
{
inputStream.close();
throw new java.io.IOException();
}
else
maxNextCharInd += i;
return;
}
catch(java.io.IOException e) {
if (bufpos != 0)
{
--bufpos;
backup(0);
}
else
{
bufline[bufpos] = line;
bufcolumn[bufpos] = column;
}
throw e;
}
}
// in lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/JavaCharStream.java
protected char ReadByte() throws java.io.IOException
{
if (++nextCharInd >= maxNextCharInd)
FillBuff();
return nextCharBuf[nextCharInd];
}
// in lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/JavaCharStream.java
public char BeginToken() throws java.io.IOException
{
if (inBuf > 0)
{
--inBuf;
if (++bufpos == bufsize)
bufpos = 0;
tokenBegin = bufpos;
return buffer[bufpos];
}
tokenBegin = 0;
bufpos = -1;
return readChar();
}
// in lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/JavaCharStream.java
public char readChar() throws java.io.IOException
{
if (inBuf > 0)
{
--inBuf;
if (++bufpos == bufsize)
bufpos = 0;
return buffer[bufpos];
}
char c;
if (++bufpos == available)
AdjustBuffSize();
if ((buffer[bufpos] = c = ReadByte()) == '\\')
{
UpdateLineColumn(c);
int backSlashCnt = 1;
for (;;) // Read all the backslashes
{
if (++bufpos == available)
AdjustBuffSize();
try
{
if ((buffer[bufpos] = c = ReadByte()) != '\\')
{
UpdateLineColumn(c);
// found a non-backslash char.
if ((c == 'u') && ((backSlashCnt & 1) == 1))
{
if (--bufpos < 0)
bufpos = bufsize - 1;
break;
}
backup(backSlashCnt);
return '\\';
}
}
catch(java.io.IOException e)
{
if (backSlashCnt > 1)
backup(backSlashCnt-1);
return '\\';
}
UpdateLineColumn(c);
backSlashCnt++;
}
// Here, we have seen an odd number of backslash's followed by a 'u'
try
{
while ((c = ReadByte()) == 'u')
++column;
buffer[bufpos] = c = (char)(hexval(c) << 12 |
hexval(ReadByte()) << 8 |
hexval(ReadByte()) << 4 |
hexval(ReadByte()));
column += 4;
}
catch(java.io.IOException e)
{
throw new Error("Invalid escape character at line " + line +
" column " + column + ".");
}
if (backSlashCnt == 1)
return c;
else
{
backup(backSlashCnt - 1);
return '\\';
}
}
else
{
UpdateLineColumn(c);
return c;
}
}
// in lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/FastCharStream.java
public final char readChar() throws IOException {
if (bufferPosition >= bufferLength)
refill();
return buffer[bufferPosition++];
}
// in lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/FastCharStream.java
private final void refill() throws IOException {
int newPosition = bufferLength - tokenStart;
if (tokenStart == 0) { // token won't fit in buffer
if (buffer == null) { // first time: alloc buffer
buffer = new char[2048];
} else if (bufferLength == buffer.length) { // grow buffer
char[] newBuffer = new char[buffer.length*2];
System.arraycopy(buffer, 0, newBuffer, 0, bufferLength);
buffer = newBuffer;
}
} else { // shift token to front
System.arraycopy(buffer, tokenStart, buffer, 0, newPosition);
}
bufferLength = newPosition; // update state
bufferPosition = newPosition;
bufferStart += tokenStart;
tokenStart = 0;
int charsRead = // fill space in buffer
input.read(buffer, newPosition, buffer.length-newPosition);
if (charsRead == -1)
throw new IOException("read past eof");
else
bufferLength += charsRead;
}
// in lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/FastCharStream.java
public final char BeginToken() throws IOException {
tokenStart = bufferPosition;
return readChar();
}
// in lucene/queryparser/src/java/org/apache/lucene/queryparser/complexPhrase/ComplexPhraseQueryParser.java
Override
public Query rewrite(IndexReader reader) throws IOException {
// ArrayList spanClauses = new ArrayList();
if (contents instanceof TermQuery) {
return contents;
}
// Build a sequence of Span clauses arranged in a SpanNear - child
// clauses can be complex
// Booleans e.g. nots and ors etc
int numNegatives = 0;
if (!(contents instanceof BooleanQuery)) {
throw new IllegalArgumentException("Unknown query type \""
+ contents.getClass().getName()
+ "\" found in phrase query string \"" + phrasedQueryStringContents
+ "\"");
}
BooleanQuery bq = (BooleanQuery) contents;
BooleanClause[] bclauses = bq.getClauses();
SpanQuery[] allSpanClauses = new SpanQuery[bclauses.length];
// For all clauses e.g. one* two~
for (int i = 0; i < bclauses.length; i++) {
// HashSet bclauseterms=new HashSet();
Query qc = bclauses[i].getQuery();
// Rewrite this clause e.g one* becomes (one OR onerous)
qc = qc.rewrite(reader);
if (bclauses[i].getOccur().equals(BooleanClause.Occur.MUST_NOT)) {
numNegatives++;
}
if (qc instanceof BooleanQuery) {
ArrayList<SpanQuery> sc = new ArrayList<SpanQuery>();
addComplexPhraseClause(sc, (BooleanQuery) qc);
if (sc.size() > 0) {
allSpanClauses[i] = sc.get(0);
} else {
// Insert fake term e.g. phrase query was for "Fred Smithe*" and
// there were no "Smithe*" terms - need to
// prevent match on just "Fred".
allSpanClauses[i] = new SpanTermQuery(new Term(field,
"Dummy clause because no terms found - must match nothing"));
}
} else {
if (qc instanceof TermQuery) {
TermQuery tq = (TermQuery) qc;
allSpanClauses[i] = new SpanTermQuery(tq.getTerm());
} else {
throw new IllegalArgumentException("Unknown query type \""
+ qc.getClass().getName()
+ "\" found in phrase query string \""
+ phrasedQueryStringContents + "\"");
}
}
}
if (numNegatives == 0) {
// The simple case - no negative elements in phrase
return new SpanNearQuery(allSpanClauses, slopFactor, true);
}
// Complex case - we have mixed positives and negatives in the
// sequence.
// Need to return a SpanNotQuery
ArrayList<SpanQuery> positiveClauses = new ArrayList<SpanQuery>();
for (int j = 0; j < allSpanClauses.length; j++) {
if (!bclauses[j].getOccur().equals(BooleanClause.Occur.MUST_NOT)) {
positiveClauses.add(allSpanClauses[j]);
}
}
SpanQuery[] includeClauses = positiveClauses
.toArray(new SpanQuery[positiveClauses.size()]);
SpanQuery include = null;
if (includeClauses.length == 1) {
include = includeClauses[0]; // only one positive clause
} else {
// need to increase slop factor based on gaps introduced by
// negatives
include = new SpanNearQuery(includeClauses, slopFactor + numNegatives,
true);
}
// Use sequence of positive and negative values as the exclude.
SpanNearQuery exclude = new SpanNearQuery(allSpanClauses, slopFactor,
true);
SpanNotQuery snot = new SpanNotQuery(include, exclude);
return snot;
}
// in lucene/misc/src/java/org/apache/lucene/document/LazyDocument.java
Override
public TokenStream tokenStream(Analyzer analyzer) throws IOException {
if (num == 0) {
return getDocument().getField(name).tokenStream(analyzer);
} else {
return getDocument().getFields(name)[num].tokenStream(analyzer);
}
}
// in lucene/misc/src/java/org/apache/lucene/misc/IndexMergeTool.java
public static void main(String[] args) throws IOException {
if (args.length < 3) {
System.err.println("Usage: IndexMergeTool <mergedIndex> <index1> <index2> [index3] ...");
System.exit(1);
}
FSDirectory mergedIndex = FSDirectory.open(new File(args[0]));
IndexWriter writer = new IndexWriter(mergedIndex, new IndexWriterConfig(
Version.LUCENE_CURRENT, null)
.setOpenMode(OpenMode.CREATE));
Directory[] indexes = new Directory[args.length - 1];
for (int i = 1; i < args.length; i++) {
indexes[i - 1] = FSDirectory.open(new File(args[i]));
}
System.out.println("Merging...");
writer.addIndexes(indexes);
System.out.println("Full merge...");
writer.forceMerge(1);
writer.close();
System.out.println("Done.");
}
// in lucene/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java
public static long getTotalTermFreq(IndexReader reader, final String field, final BytesRef termText) throws Exception {
final long totalTF[] = new long[1];
new ReaderUtil.Gather(reader) {
@Override
protected void add(int base, AtomicReader r) throws IOException {
Bits liveDocs = r.getLiveDocs();
if (liveDocs == null) {
// TODO: we could do this up front, during the scan
// (next()), instead of after-the-fact here w/ seek,
// if the codec supports it and there are no del
// docs...
final long totTF = r.totalTermFreq(field, termText);
if (totTF != -1) {
totalTF[0] += totTF;
return;
}
}
DocsEnum de = r.termDocsEnum(liveDocs, field, termText, true);
if (de != null) {
while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS)
totalTF[0] += de.freq();
}
}
}.run();
return totalTF[0];
}
// in lucene/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java
Override
protected void add(int base, AtomicReader r) throws IOException {
Bits liveDocs = r.getLiveDocs();
if (liveDocs == null) {
// TODO: we could do this up front, during the scan
// (next()), instead of after-the-fact here w/ seek,
// if the codec supports it and there are no del
// docs...
final long totTF = r.totalTermFreq(field, termText);
if (totTF != -1) {
totalTF[0] += totTF;
return;
}
}
DocsEnum de = r.termDocsEnum(liveDocs, field, termText, true);
if (de != null) {
while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS)
totalTF[0] += de.freq();
}
}
// in lucene/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java
protected void fill(String field, TermsEnum termsEnum) throws IOException {
while (true) {
BytesRef term = termsEnum.next();
if (term != null) {
insertWithOverflow(new TermStats(field, term, termsEnum.docFreq()));
} else {
break;
}
}
}
// in lucene/misc/src/java/org/apache/lucene/store/WindowsDirectory.java
Override
public IndexInput openInput(String name, IOContext context) throws IOException {
ensureOpen();
return new WindowsIndexInput(new File(getDirectory(), name), Math.max(BufferedIndexInput.bufferSize(context), DEFAULT_BUFFERSIZE));
}
// in lucene/misc/src/java/org/apache/lucene/store/WindowsDirectory.java
Override
protected void readInternal(byte[] b, int offset, int length) throws IOException {
int bytesRead;
try {
bytesRead = WindowsDirectory.read(fd, b, offset, length, getFilePointer());
} catch (IOException ioe) {
throw new IOException(ioe.getMessage() + ": " + this, ioe);
}
if (bytesRead != length) {
throw new EOFException("read past EOF: " + this);
}
}
// in lucene/misc/src/java/org/apache/lucene/store/WindowsDirectory.java
Override
protected void seekInternal(long pos) throws IOException {
}
// in lucene/misc/src/java/org/apache/lucene/store/WindowsDirectory.java
Override
public synchronized void close() throws IOException {
// NOTE: we synchronize and track "isOpen" because Lucene sometimes closes IIs twice!
if (!isClone && isOpen) {
WindowsDirectory.close(fd);
isOpen = false;
}
}
// in lucene/misc/src/java/org/apache/lucene/store/NativePosixUtil.java
public static void advise(FileDescriptor fd, long offset, long len, int advise) throws IOException {
final int code = posix_fadvise(fd, offset, len, advise);
if (code != 0) {
throw new RuntimeException("posix_fadvise failed code=" + code);
}
}
// in lucene/misc/src/java/org/apache/lucene/store/NativeUnixDirectory.java
Override
public IndexInput openInput(String name, IOContext context) throws IOException {
ensureOpen();
if (context.context != Context.MERGE || context.mergeInfo.estimatedMergeBytes < minBytesDirect || fileLength(name) < minBytesDirect) {
return delegate.openInput(name, context);
} else {
return new NativeUnixIndexInput(new File(getDirectory(), name), mergeBufferSize);
}
}
// in lucene/misc/src/java/org/apache/lucene/store/NativeUnixDirectory.java
Override
public IndexOutput createOutput(String name, IOContext context) throws IOException {
ensureOpen();
if (context.context != Context.MERGE || context.mergeInfo.estimatedMergeBytes < minBytesDirect) {
return delegate.createOutput(name, context);
} else {
ensureCanWrite(name);
return new NativeUnixIndexOutput(new File(getDirectory(), name), mergeBufferSize);
}
}
// in lucene/misc/src/java/org/apache/lucene/store/NativeUnixDirectory.java
Override
public void writeByte(byte b) throws IOException {
assert bufferPos == buffer.position(): "bufferPos=" + bufferPos + " vs buffer.position()=" + buffer.position();
buffer.put(b);
if (++bufferPos == bufferSize) {
dump();
}
}
// in lucene/misc/src/java/org/apache/lucene/store/NativeUnixDirectory.java
Override
public void writeBytes(byte[] src, int offset, int len) throws IOException {
int toWrite = len;
while(true) {
final int left = bufferSize - bufferPos;
if (left <= toWrite) {
buffer.put(src, offset, left);
toWrite -= left;
offset += left;
bufferPos = bufferSize;
dump();
} else {
buffer.put(src, offset, toWrite);
bufferPos += toWrite;
break;
}
}
}
// in lucene/misc/src/java/org/apache/lucene/store/NativeUnixDirectory.java
Override
public void flush() throws IOException {
// TODO -- I don't think this method is necessary?
}
// in lucene/misc/src/java/org/apache/lucene/store/NativeUnixDirectory.java
private void dump() throws IOException {
buffer.flip();
final long limit = filePos + buffer.limit();
if (limit > fileLength) {
// this dump extends the file
fileLength = limit;
} else {
// we had seek'd back & wrote some changes
}
// must always round to next block
buffer.limit((int) ((buffer.limit() + ALIGN - 1) & ALIGN_NOT_MASK));
assert (buffer.limit() & ALIGN_NOT_MASK) == buffer.limit() : "limit=" + buffer.limit() + " vs " + (buffer.limit() & ALIGN_NOT_MASK);
assert (filePos & ALIGN_NOT_MASK) == filePos;
//System.out.println(Thread.currentThread().getName() + ": dump to " + filePos + " limit=" + buffer.limit() + " fos=" + fos);
channel.write(buffer, filePos);
filePos += bufferPos;
bufferPos = 0;
buffer.clear();
//System.out.println("dump: done");
// TODO: the case where we'd seek'd back, wrote an
// entire buffer, we must here read the next buffer;
// likely Lucene won't trip on this since we only
// write smallish amounts on seeking back
}
// in lucene/misc/src/java/org/apache/lucene/store/NativeUnixDirectory.java
Override
public void seek(long pos) throws IOException {
if (pos != getFilePointer()) {
dump();
final long alignedPos = pos & ALIGN_NOT_MASK;
filePos = alignedPos;
int n = (int) NativePosixUtil.pread(fos.getFD(), filePos, buffer);
if (n < bufferSize) {
buffer.limit(n);
}
//System.out.println("seek refill=" + n);
final int delta = (int) (pos - alignedPos);
buffer.position(delta);
bufferPos = delta;
}
}
// in lucene/misc/src/java/org/apache/lucene/store/NativeUnixDirectory.java
Override
public long length() throws IOException {
return fileLength + bufferPos;
}
// in lucene/misc/src/java/org/apache/lucene/store/NativeUnixDirectory.java
Override
public void close() throws IOException {
if (isOpen) {
isOpen = false;
try {
dump();
} finally {
try {
//System.out.println("direct close set len=" + fileLength + " vs " + channel.size() + " path=" + path);
channel.truncate(fileLength);
//System.out.println(" now: " + channel.size());
} finally {
try {
channel.close();
} finally {
fos.close();
//System.out.println(" final len=" + path.length());
}
}
}
}
}
// in lucene/misc/src/java/org/apache/lucene/store/NativeUnixDirectory.java
Override
public void close() throws IOException {
if (isOpen && !isClone) {
try {
channel.close();
} finally {
if (!isClone) {
fis.close();
}
}
}
}
// in lucene/misc/src/java/org/apache/lucene/store/NativeUnixDirectory.java
Override
public void seek(long pos) throws IOException {
if (pos != getFilePointer()) {
final long alignedPos = pos & ALIGN_NOT_MASK;
filePos = alignedPos-bufferSize;
final int delta = (int) (pos - alignedPos);
if (delta != 0) {
refill();
buffer.position(delta);
bufferPos = delta;
} else {
// force refill on next read
bufferPos = bufferSize;
}
}
}
// in lucene/misc/src/java/org/apache/lucene/store/NativeUnixDirectory.java
Override
public byte readByte() throws IOException {
// NOTE: we don't guard against EOF here... ie the
// "final" buffer will typically be filled to less
// than bufferSize
if (bufferPos == bufferSize) {
refill();
}
assert bufferPos == buffer.position() : "bufferPos=" + bufferPos + " vs buffer.position()=" + buffer.position();
bufferPos++;
return buffer.get();
}
// in lucene/misc/src/java/org/apache/lucene/store/NativeUnixDirectory.java
private void refill() throws IOException {
buffer.clear();
filePos += bufferSize;
bufferPos = 0;
assert (filePos & ALIGN_NOT_MASK) == filePos : "filePos=" + filePos + " anded=" + (filePos & ALIGN_NOT_MASK);
//System.out.println("X refill filePos=" + filePos);
int n;
try {
n = channel.read(buffer, filePos);
} catch (IOException ioe) {
throw new IOException(ioe.getMessage() + ": " + this, ioe);
}
if (n < 0) {
throw new EOFException("read past EOF: " + this);
}
buffer.rewind();
}
// in lucene/misc/src/java/org/apache/lucene/store/NativeUnixDirectory.java
Override
public void readBytes(byte[] dst, int offset, int len) throws IOException {
int toRead = len;
//System.out.println("\nX readBytes len=" + len + " fp=" + getFilePointer() + " size=" + length() + " this=" + this);
while(true) {
final int left = bufferSize - bufferPos;
if (left < toRead) {
//System.out.println(" copy " + left);
buffer.get(dst, offset, left);
toRead -= left;
offset += left;
refill();
} else {
//System.out.println(" copy " + toRead);
buffer.get(dst, offset, toRead);
bufferPos += toRead;
//System.out.println(" readBytes done");
break;
}
}
}
// in lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java
public void listSegments() throws IOException {
DecimalFormat formatter = new DecimalFormat("###,###.###");
for (int x = 0; x < infos.size(); x++) {
SegmentInfoPerCommit info = infos.info(x);
String sizeStr = formatter.format(info.sizeInBytes());
System.out.println(info.info.name + " " + sizeStr);
}
}
// in lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java
public void remove(String[] segs) throws IOException {
for (String n : segs) {
int idx = getIdx(n);
infos.remove(idx);
}
infos.changed();
infos.commit(fsDir);
}
// in lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java
public void split(File destDir, String[] segs) throws IOException {
destDir.mkdirs();
FSDirectory destFSDir = FSDirectory.open(destDir);
SegmentInfos destInfos = new SegmentInfos();
destInfos.counter = infos.counter;
for (String n : segs) {
SegmentInfoPerCommit infoPerCommit = getInfo(n);
SegmentInfo info = infoPerCommit.info;
// Same info just changing the dir:
SegmentInfo newInfo = new SegmentInfo(destFSDir, info.getVersion(), info.name, info.getDocCount(),
info.getUseCompoundFile(),
info.getCodec(), info.getDiagnostics(), info.attributes());
destInfos.add(new SegmentInfoPerCommit(newInfo, infoPerCommit.getDelCount(), infoPerCommit.getDelGen()));
// now copy files over
Collection<String> files = infoPerCommit.files();
for (final String srcName : files) {
File srcFile = new File(dir, srcName);
File destFile = new File(destDir, srcName);
copyFile(srcFile, destFile);
}
}
destInfos.changed();
destInfos.commit(destFSDir);
// System.out.println("destDir:"+destDir.getAbsolutePath());
}
// in lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java
private static void copyFile(File src, File dst) throws IOException {
InputStream in = new FileInputStream(src);
OutputStream out = new FileOutputStream(dst);
int len;
while ((len = in.read(copyBuffer)) > 0) {
out.write(copyBuffer, 0, len);
}
in.close();
out.close();
}
// in lucene/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java
public void split(Version version, IndexReader in, Directory[] outputs, boolean seq) throws IOException {
if (outputs == null || outputs.length < 2) {
throw new IOException("Invalid number of outputs.");
}
if (in == null || in.numDocs() < 2) {
throw new IOException("Not enough documents for splitting");
}
int numParts = outputs.length;
// wrap a potentially read-only input
// this way we don't have to preserve original deletions because neither
// deleteDocument(int) or undeleteAll() is applied to the wrapped input index.
FakeDeleteIndexReader input = new FakeDeleteIndexReader(in);
int maxDoc = input.maxDoc();
int partLen = maxDoc / numParts;
for (int i = 0; i < numParts; i++) {
input.undeleteAll();
if (seq) { // sequential range
int lo = partLen * i;
int hi = lo + partLen;
// below range
for (int j = 0; j < lo; j++) {
input.deleteDocument(j);
}
// above range - last part collects all id-s that remained due to
// integer rounding errors
if (i < numParts - 1) {
for (int j = hi; j < maxDoc; j++) {
input.deleteDocument(j);
}
}
} else {
// round-robin
for (int j = 0; j < maxDoc; j++) {
if ((j + numParts - i) % numParts != 0) {
input.deleteDocument(j);
}
}
}
IndexWriter w = new IndexWriter(outputs[i], new IndexWriterConfig(
version,
null)
.setOpenMode(OpenMode.CREATE));
System.err.println("Writing part " + (i + 1) + " ...");
// pass the subreaders directly, as our wrapper's numDocs/hasDeletetions are not up-to-date
w.addIndexes(input.getSequentialSubReaders());
w.close();
}
System.err.println("Done.");
}
// in lucene/misc/src/java/org/apache/lucene/index/MultiPassIndexSplitter.java
private static AtomicReader[] initSubReaders(IndexReader reader) throws IOException {
final ArrayList<AtomicReader> subs = new ArrayList<AtomicReader>();
new ReaderUtil.Gather(reader) {
@Override
protected void add(int base, AtomicReader r) {
subs.add(new FakeDeleteAtomicIndexReader(r));
}
}.run();
return subs.toArray(new AtomicReader[subs.size()]);
}
// in lucene/misc/src/java/org/apache/lucene/index/PKIndexSplitter.java
public void split() throws IOException {
boolean success = false;
DirectoryReader reader = DirectoryReader.open(input);
try {
// pass an individual config in here since one config can not be reused!
createIndex(config1, dir1, reader, docsInFirstIndex, false);
createIndex(config2, dir2, reader, docsInFirstIndex, true);
success = true;
} finally {
if (success) {
IOUtils.close(reader);
} else {
IOUtils.closeWhileHandlingException(reader);
}
}
}
// in lucene/misc/src/java/org/apache/lucene/index/PKIndexSplitter.java
private void createIndex(IndexWriterConfig config, Directory target, IndexReader reader, Filter preserveFilter, boolean negateFilter) throws IOException {
boolean success = false;
final IndexWriter w = new IndexWriter(target, config);
try {
final AtomicReaderContext[] leaves = reader.getTopReaderContext().leaves();
final IndexReader[] subReaders = new IndexReader[leaves.length];
for (int i = 0; i < leaves.length; i++) {
subReaders[i] = new DocumentFilteredAtomicIndexReader(leaves[i], preserveFilter, negateFilter);
}
w.addIndexes(subReaders);
success = true;
} finally {
if (success) {
IOUtils.close(w);
} else {
IOUtils.closeWhileHandlingException(w);
}
}
}
// in lucene/misc/src/java/org/apache/lucene/index/BalancedSegmentMergePolicy.java
Override
protected long size(SegmentInfoPerCommit info) throws IOException {
long byteSize = info.sizeInBytes();
float delRatio = (info.info.getDocCount() <= 0 ? 0.0f : ((float)info.getDelCount() / (float)info.info.getDocCount()));
return (info.info.getDocCount() <= 0 ? byteSize : (long)((1.0f - delRatio) * byteSize));
}
// in lucene/misc/src/java/org/apache/lucene/index/BalancedSegmentMergePolicy.java
Override
public MergeSpecification findForcedMerges(SegmentInfos infos, int maxNumSegments, Map<SegmentInfoPerCommit,Boolean> segmentsToMerge) throws IOException {
assert maxNumSegments > 0;
MergeSpecification spec = null;
if (!isMerged(infos, maxNumSegments, segmentsToMerge)) {
// Find the newest (rightmost) segment that needs to
// be merged (other segments may have been flushed
// since the merge started):
int last = infos.size();
while(last > 0) {
final SegmentInfoPerCommit info = infos.info(--last);
if (segmentsToMerge.containsKey(info)) {
last++;
break;
}
}
if (last > 0) {
if (maxNumSegments == 1) {
// Since we must merge down to 1 segment, the
// choice is simple:
if (last > 1 || !isMerged(infos.info(0))) {
spec = new MergeSpecification();
spec.add(new OneMerge(infos.asList().subList(0, last)));
}
} else if (last > maxNumSegments) {
// find most balanced merges
spec = findBalancedMerges(infos, last, maxNumSegments, _partialExpunge);
}
}
}
return spec;
}
// in lucene/misc/src/java/org/apache/lucene/index/BalancedSegmentMergePolicy.java
private MergeSpecification findBalancedMerges(SegmentInfos infos, int infoLen, int maxNumSegments, boolean partialExpunge)
throws IOException {
if (infoLen <= maxNumSegments) return null;
MergeSpecification spec = new MergeSpecification();
// use Viterbi algorithm to find the best segmentation.
// we will try to minimize the size variance of resulting segments.
double[][] variance = createVarianceTable(infos, infoLen, maxNumSegments);
final int maxMergeSegments = infoLen - maxNumSegments + 1;
double[] sumVariance = new double[maxMergeSegments];
int[][] backLink = new int[maxNumSegments][maxMergeSegments];
for(int i = (maxMergeSegments - 1); i >= 0; i--) {
sumVariance[i] = variance[0][i];
backLink[0][i] = 0;
}
for(int i = 1; i < maxNumSegments; i++) {
for(int j = (maxMergeSegments - 1); j >= 0; j--) {
double minV = Double.MAX_VALUE;
int minK = 0;
for(int k = j; k >= 0; k--) {
double v = sumVariance[k] + variance[i + k][j - k];
if(v < minV) {
minV = v;
minK = k;
}
}
sumVariance[j] = minV;
backLink[i][j] = minK;
}
}
// now, trace back the back links to find all merges,
// also find a candidate for partial expunge if requested
int mergeEnd = infoLen;
int prev = maxMergeSegments - 1;
int expungeCandidate = -1;
int maxDelCount = 0;
for(int i = maxNumSegments - 1; i >= 0; i--) {
prev = backLink[i][prev];
int mergeStart = i + prev;
if((mergeEnd - mergeStart) > 1) {
spec.add(new OneMerge(infos.asList().subList(mergeStart, mergeEnd)));
} else {
if(partialExpunge) {
SegmentInfoPerCommit info = infos.info(mergeStart);
int delCount = info.getDelCount();
if(delCount > maxDelCount) {
expungeCandidate = mergeStart;
maxDelCount = delCount;
}
}
}
mergeEnd = mergeStart;
}
if(partialExpunge && maxDelCount > 0) {
// expunge deletes
spec.add(new OneMerge(Collections.singletonList(infos.info(expungeCandidate))));
}
return spec;
}
// in lucene/misc/src/java/org/apache/lucene/index/BalancedSegmentMergePolicy.java
private double[][] createVarianceTable(SegmentInfos infos, int last, int maxNumSegments) throws IOException {
int maxMergeSegments = last - maxNumSegments + 1;
double[][] variance = new double[last][maxMergeSegments];
// compute the optimal segment size
long optSize = 0;
long[] sizeArr = new long[last];
for(int i = 0; i < sizeArr.length; i++) {
sizeArr[i] = size(infos.info(i));
optSize += sizeArr[i];
}
optSize = (optSize / maxNumSegments);
for(int i = 0; i < last; i++) {
long size = 0;
for(int j = 0; j < maxMergeSegments; j++) {
if((i + j) < last) {
size += sizeArr[i + j];
double residual = ((double)size/(double)optSize) - 1.0d;
variance[i][j] = residual * residual;
} else {
variance[i][j] = Double.NaN;
}
}
}
return variance;
}
// in lucene/misc/src/java/org/apache/lucene/index/BalancedSegmentMergePolicy.java
Override
public MergeSpecification findForcedDeletesMerges(SegmentInfos infos)
throws CorruptIndexException, IOException {
final int numSegs = infos.size();
final int numLargeSegs = (numSegs < _numLargeSegments ? numSegs : _numLargeSegments);
MergeSpecification spec = null;
if(numLargeSegs < numSegs) {
// hack to create a shallow sub-range as SegmentInfos instance,
// it does not clone all metadata, but LogMerge does not need it
final SegmentInfos smallSegments = new SegmentInfos();
smallSegments.rollbackSegmentInfos(infos.asList().subList(numLargeSegs, numSegs));
spec = super.findForcedDeletesMerges(smallSegments);
}
if(spec == null) spec = new MergeSpecification();
for(int i = 0; i < numLargeSegs; i++) {
SegmentInfoPerCommit info = infos.info(i);
if (info.hasDeletions()) {
spec.add(new OneMerge(Collections.singletonList(infos.info(i))));
}
}
return spec;
}
// in lucene/misc/src/java/org/apache/lucene/index/BalancedSegmentMergePolicy.java
Override
public MergeSpecification findMerges(SegmentInfos infos) throws IOException {
final int numSegs = infos.size();
final int numLargeSegs = _numLargeSegments;
if (numSegs <= numLargeSegs) {
return null;
}
long totalLargeSegSize = 0;
long totalSmallSegSize = 0;
SegmentInfoPerCommit info;
// compute the total size of large segments
for(int i = 0; i < numLargeSegs; i++) {
info = infos.info(i);
totalLargeSegSize += size(info);
}
// compute the total size of small segments
for(int i = numLargeSegs; i < numSegs; i++) {
info = infos.info(i);
totalSmallSegSize += size(info);
}
long targetSegSize = (totalLargeSegSize / (numLargeSegs - 1));
if(targetSegSize <= totalSmallSegSize) {
// the total size of small segments is big enough,
// promote the small segments to a large segment and do balanced merge,
if(totalSmallSegSize < targetSegSize * 2) {
MergeSpecification spec = findBalancedMerges(infos, numLargeSegs, (numLargeSegs - 1), _partialExpunge);
if(spec == null) spec = new MergeSpecification(); // should not happen
spec.add(new OneMerge(infos.asList().subList(numLargeSegs, numSegs)));
return spec;
} else {
return findBalancedMerges(infos, numSegs, numLargeSegs, _partialExpunge);
}
} else if (_maxSegments < numSegs) {
// we have more than _maxSegments, merge small segments smaller than targetSegSize/4
MergeSpecification spec = new MergeSpecification();
int startSeg = numLargeSegs;
long sizeThreshold = (targetSegSize / 4);
while(startSeg < numSegs) {
info = infos.info(startSeg);
if(size(info) < sizeThreshold) break;
startSeg++;
}
spec.add(new OneMerge(infos.asList().subList(startSeg, numSegs)));
return spec;
} else {
// hack to create a shallow sub-range as SegmentInfos instance,
// it does not clone all metadata, but LogMerge does not need it
final SegmentInfos smallSegments = new SegmentInfos();
smallSegments.rollbackSegmentInfos(infos.asList().subList(numLargeSegs, numSegs));
MergeSpecification spec = super.findMerges(smallSegments);
if(_partialExpunge) {
OneMerge expunge = findOneSegmentToExpunge(infos, numLargeSegs);
if(expunge != null) {
if(spec == null) spec = new MergeSpecification();
spec.add(expunge);
}
}
return spec;
}
}
// in lucene/misc/src/java/org/apache/lucene/index/BalancedSegmentMergePolicy.java
private OneMerge findOneSegmentToExpunge(SegmentInfos infos, int maxNumSegments) throws IOException {
int expungeCandidate = -1;
int maxDelCount = 0;
for(int i = maxNumSegments - 1; i >= 0; i--) {
SegmentInfoPerCommit info = infos.info(i);
int delCount = info.getDelCount();
if (delCount > maxDelCount) {
expungeCandidate = i;
maxDelCount = delCount;
}
}
if (maxDelCount > 0) {
return new OneMerge(Collections.singletonList(infos.info(expungeCandidate)));
}
return null;
}
// in lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeFilter.java
Override
public DocIdSet getDocIdSet(AtomicReaderContext ctx, Bits acceptDocs) throws IOException {
AtomicReader reader = ctx.reader();
OpenBitSet bits = new OpenBitSet(reader.maxDoc());
Terms terms = reader.terms(fieldName);
if (terms == null)
return null;
TermsEnum termsEnum = terms.iterator(null);
DocsEnum docsEnum = null;//cached for termsEnum.docs() calls
Node scanCell = null;
//cells is treated like a stack. LinkedList conveniently has bulk add to beginning. It's in sorted order so that we
// always advance forward through the termsEnum index.
LinkedList<Node> cells = new LinkedList<Node>(
grid.getWorldNode().getSubCells(queryShape) );
//This is a recursive algorithm that starts with one or more "big" cells, and then recursively dives down into the
// first such cell that intersects with the query shape. It's a depth first traversal because we don't move onto
// the next big cell (breadth) until we're completely done considering all smaller cells beneath it. For a given
// cell, if it's *within* the query shape then we can conveniently short-circuit the depth traversal and
// grab all documents assigned to this cell/term. For an intersection of the cell and query shape, we either
// recursively step down another grid level or we decide heuristically (via prefixGridScanLevel) that there aren't
// that many points, and so we scan through all terms within this cell (i.e. the term starts with the cell's term),
// seeing which ones are within the query shape.
while(!cells.isEmpty()) {
final Node cell = cells.removeFirst();
final BytesRef cellTerm = new BytesRef(cell.getTokenBytes());
TermsEnum.SeekStatus seekStat = termsEnum.seekCeil(cellTerm);
if (seekStat == TermsEnum.SeekStatus.END)
break;
if (seekStat == TermsEnum.SeekStatus.NOT_FOUND)
continue;
if (cell.getLevel() == detailLevel || cell.isLeaf()) {
docsEnum = termsEnum.docs(acceptDocs, docsEnum, false);
addDocs(docsEnum,bits);
} else {//any other intersection
//If the next indexed term is the leaf marker, then add all of them
BytesRef nextCellTerm = termsEnum.next();
assert StringHelper.startsWith(nextCellTerm, cellTerm);
scanCell = grid.getNode(nextCellTerm.bytes, nextCellTerm.offset, nextCellTerm.length, scanCell);
if (scanCell.isLeaf()) {
docsEnum = termsEnum.docs(acceptDocs, docsEnum, false);
addDocs(docsEnum,bits);
termsEnum.next();//move pointer to avoid potential redundant addDocs() below
}
//Decide whether to continue to divide & conquer, or whether it's time to scan through terms beneath this cell.
// Scanning is a performance optimization trade-off.
boolean scan = cell.getLevel() >= prefixGridScanLevel;//simple heuristic
if (!scan) {
//Divide & conquer
cells.addAll(0, cell.getSubCells(queryShape));//add to beginning
} else {
//Scan through all terms within this cell to see if they are within the queryShape. No seek()s.
for(BytesRef term = termsEnum.term(); term != null && StringHelper.startsWith(term,cellTerm); term = termsEnum.next()) {
scanCell = grid.getNode(term.bytes, term.offset, term.length, scanCell);
int termLevel = scanCell.getLevel();
if (termLevel > detailLevel)
continue;
if (termLevel == detailLevel || scanCell.isLeaf()) {
//TODO should put more thought into implications of box vs point
Shape cShape = termLevel == grid.getMaxLevels() ? scanCell.getCenter() : scanCell.getShape();
if(queryShape.relate(cShape, grid.getSpatialContext()) == SpatialRelation.DISJOINT)
continue;
docsEnum = termsEnum.docs(acceptDocs, docsEnum, false);
addDocs(docsEnum,bits);
}
}//term loop
}
}
}//cell loop
return bits;
}
// in lucene/spatial/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeFilter.java
private void addDocs(DocsEnum docsEnum, OpenBitSet bits) throws IOException {
int docid;
while ((docid = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
bits.fastSet(docid);
}
}
// in lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PrefixTreeStrategy.java
Override
public boolean incrementToken() throws IOException {
clearAttributes();
if (nextTokenStringNeedingLeaf != null) {
termAtt.append(nextTokenStringNeedingLeaf);
termAtt.append((char) Node.LEAF_BYTE);
nextTokenStringNeedingLeaf = null;
return true;
}
if (iter.hasNext()) {
Node cell = iter.next();
CharSequence token = cell.getTokenString();
termAtt.append(token);
if (cell.isLeaf())
nextTokenStringNeedingLeaf = token;
return true;
}
return false;
}
// in lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PrefixCellsTokenizer.java
Override
public final boolean incrementToken() throws IOException {
clearAttributes();
int length = 0;
char[] buffer = termAtt.buffer();
while (true) {
char c = (char) input.read();
if (c < 0) break;
if (c == 'a' || c == 'A') {
buffer[length++] = 'A';
continue;
}
if (c == 'b' || c == 'B') {
buffer[length++] = 'B';
continue;
}
if (c == 'c' || c == 'C') {
buffer[length++] = 'C';
continue;
}
if (c == 'd' || c == 'D') {
buffer[length++] = 'D';
continue;
}
if (c == '*') {
buffer[length++] = '*';
continue;
}
if (c == '+') {
buffer[length++] = '+';
continue;
}
if (length > 0) {
// Skip any other character
break;
}
}
termAtt.setLength(length);
return length > 0; // should only happen at the end
}
// in lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PrefixCellsTokenizer.java
Override
public void reset(Reader input) throws IOException {
super.reset(input);
}
// in lucene/spatial/src/java/org/apache/lucene/spatial/vector/DistanceValueSource.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
AtomicReader reader = readerContext.reader();
final double[] ptX = FieldCache.DEFAULT.getDoubles(reader, fields.getFieldNameX(), true);
final double[] ptY = FieldCache.DEFAULT.getDoubles(reader, fields.getFieldNameY(), true);
final Bits validX = FieldCache.DEFAULT.getDocsWithField(reader, fields.getFieldNameX());
final Bits validY = FieldCache.DEFAULT.getDocsWithField(reader, fields.getFieldNameY());
return new FunctionValues() {
@Override
public float floatVal(int doc) {
return (float) doubleVal(doc);
}
@Override
public double doubleVal(int doc) {
// make sure it has minX and area
if (validX.get(doc) && validY.get(doc)) {
PointImpl pt = new PointImpl( ptX[doc], ptY[doc] );
return calculator.distance(from, pt);
}
return 0;
}
@Override
public String toString(int doc) {
return description() + "=" + floatVal(doc);
}
};
}
// in lucene/spatial/src/java/org/apache/lucene/spatial/util/StringListTokenizer.java
Override
public void reset() throws IOException {
super.reset();
iter = tokens.iterator();
}
// in lucene/spatial/src/java/org/apache/lucene/spatial/util/TruncateFilter.java
Override
public final boolean incrementToken() throws IOException {
if (!input.incrementToken()) {
return false;
}
if (termAttr.length() > maxTokenLength) {
termAttr.setLength(maxTokenLength);
}
return true;
}
// in lucene/spatial/src/java/org/apache/lucene/spatial/util/ValueSourceFilter.java
Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
final FunctionValues values = source.getValues( null, context );
return new FilteredDocIdSet(startingFilter.getDocIdSet(context, acceptDocs)) {
@Override
public boolean match(int doc) {
double val = values.doubleVal( doc );
return val > min && val < max;
}
};
}
// in lucene/spatial/src/java/org/apache/lucene/spatial/util/CachingDoubleValueSource.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
final int base = readerContext.docBase;
final FunctionValues vals = source.getValues(context,readerContext);
return new FunctionValues() {
@Override
public double doubleVal(int doc) {
Integer key = Integer.valueOf( base+doc );
Double v = cache.get( key );
if( v == null ) {
v = Double.valueOf( vals.doubleVal(doc) );
cache.put( key, v );
}
return v.doubleValue();
}
@Override
public float floatVal(int doc) {
return (float)doubleVal(doc);
}
@Override
public String toString(int doc) {
return doubleVal(doc)+"";
}
};
}
// in lucene/spatial/src/java/org/apache/lucene/spatial/util/CachedDistanceValueSource.java
Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
final ShapeFieldCache<Point> cache =
provider.getCache(readerContext.reader());
return new FunctionValues() {
@Override
public float floatVal(int doc) {
return (float) doubleVal(doc);
}
@Override
public double doubleVal(int doc) {
List<Point> vals = cache.getShapes( doc );
if( vals != null ) {
double v = calculator.distance(from, vals.get(0));
for( int i=1; i<vals.size(); i++ ) {
v = Math.min(v, calculator.distance(from, vals.get(i)));
}
return v;
}
return Double.NaN; // ?? maybe max?
}
@Override
public String toString(int doc) {
return description() + "=" + floatVal(doc);
}
};
}
// in lucene/spatial/src/java/org/apache/lucene/spatial/util/ShapeFieldCacheProvider.java
public synchronized ShapeFieldCache<T> getCache(AtomicReader reader) throws IOException {
ShapeFieldCache<T> idx = sidx.get(reader);
if (idx != null) {
return idx;
}
long startTime = System.currentTimeMillis();
log.fine("Building Cache [" + reader.maxDoc() + "]");
idx = new ShapeFieldCache<T>(reader.maxDoc(),defaultSize);
int count = 0;
DocsEnum docs = null;
Terms terms = reader.terms(shapeField);
TermsEnum te = null;
if (terms != null) {
te = terms.iterator(te);
BytesRef term = te.next();
while (term != null) {
T shape = readShape(term);
if( shape != null ) {
docs = te.docs(null, docs, false);
Integer docid = docs.nextDoc();
while (docid != DocIdSetIterator.NO_MORE_DOCS) {
idx.add( docid, shape );
docid = docs.nextDoc();
count++;
}
}
term = te.next();
}
}
sidx.put(reader, idx);
long elapsed = System.currentTimeMillis() - startTime;
log.fine("Cached: [" + count + " in " + elapsed + "ms] " + idx);
return idx;
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java
Override
public BytesRef next() throws IOException {
if (done) {
return null;
}
line = in.readLine();
if (line != null) {
String[] fields = line.split("\t");
if (fields.length > 1) {
// keep reading floats for bw compat
try {
curFreq = Long.parseLong(fields[1]);
} catch (NumberFormatException e) {
curFreq = (long)Double.parseDouble(fields[1]);
}
spare.copyChars(fields[0]);
} else {
spare.copyChars(line);
curFreq = 1;
}
return spare;
} else {
done = true;
IOUtils.close(in);
return null;
}
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java
Override
public BytesRef next() throws IOException {
boolean success = false;
if (done) {
return null;
}
try {
ByteArrayDataInput input = new ByteArrayDataInput();
if (reader.read(scratch)) {
weight = decode(scratch, input);
success = true;
return scratch;
}
close();
success = done = true;
return null;
} finally {
if (!success) {
done = true;
close();
}
}
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java
private Sort.ByteSequencesReader sort(Comparator<BytesRef> comparator) throws IOException {
String prefix = getClass().getSimpleName();
File directory = Sort.defaultTempDir();
tempInput = File.createTempFile(prefix, ".input", directory);
tempSorted = File.createTempFile(prefix, ".sorted", directory);
final Sort.ByteSequencesWriter writer = new Sort.ByteSequencesWriter(tempInput);
boolean success = false;
try {
BytesRef spare;
byte[] buffer = new byte[0];
ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
while ((spare = source.next()) != null) {
encode(writer, output, buffer, spare, source.weight());
}
writer.close();
new Sort(comparator).sort(tempInput, tempSorted);
ByteSequencesReader reader = new Sort.ByteSequencesReader(tempSorted);
success = true;
return reader;
} finally {
if (success) {
IOUtils.close(writer);
} else {
try {
IOUtils.closeWhileHandlingException(writer);
} finally {
close();
}
}
}
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java
private void close() throws IOException {
if (tempInput != null) {
tempInput.delete();
}
if (tempSorted != null) {
tempSorted.delete();
}
IOUtils.close(reader);
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedTermFreqIteratorWrapper.java
protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, long weight) throws IOException {
if (spare.length + 8 >= buffer.length) {
buffer = ArrayUtil.grow(buffer, spare.length + 8);
}
output.reset(buffer);
output.writeBytes(spare.bytes, spare.offset, spare.length);
output.writeLong(weight);
writer.write(buffer, 0, output.getPosition());
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/BytesRefList.java
public BytesRefIterator iterator(final Comparator<BytesRef> comp) {
final BytesRef spare = new BytesRef();
final int size = size();
final int[] ords = comp == null ? null : sort(comp);
return new BytesRefIterator() {
int pos = 0;
@Override
public BytesRef next() throws IOException {
if (pos < size) {
return get(spare, ords == null ? pos++ : ords[pos++]);
}
return null;
}
@Override
public Comparator<BytesRef> getComparator() {
return comp;
}
};
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/BytesRefList.java
Override
public BytesRef next() throws IOException {
if (pos < size) {
return get(spare, ords == null ? pos++ : ords[pos++]);
}
return null;
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedTermFreqIteratorWrapper.java
Override
public BytesRef next() throws IOException {
if (++curPos < entries.size()) {
return entries.get(spare, (currentOrd = ords[curPos]));
}
return null;
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java
Override
public void build(TermFreqIterator tfit) throws IOException {
root = new TernaryTreeNode();
// buffer first
if (tfit.getComparator() != BytesRef.getUTF8SortedAsUTF16Comparator()) {
// make sure it's sorted and the comparator uses UTF16 sort order
tfit = new SortedTermFreqIteratorWrapper(tfit, BytesRef.getUTF8SortedAsUTF16Comparator());
}
ArrayList<String> tokens = new ArrayList<String>();
ArrayList<Number> vals = new ArrayList<Number>();
BytesRef spare;
CharsRef charsSpare = new CharsRef();
while ((spare = tfit.next()) != null) {
charsSpare.grow(spare.length);
UnicodeUtil.UTF8toUTF16(spare.bytes, spare.offset, spare.length, charsSpare);
tokens.add(charsSpare.toString());
vals.add(Long.valueOf(tfit.weight()));
}
autocomplete.balancedTree(tokens.toArray(), vals.toArray(), 0, tokens.size() - 1, root);
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java
private void readRecursively(DataInputStream in, TernaryTreeNode node) throws IOException {
node.splitchar = in.readChar();
byte mask = in.readByte();
if ((mask & HAS_TOKEN) != 0) {
node.token = in.readUTF();
}
if ((mask & HAS_VALUE) != 0) {
node.val = Long.valueOf(in.readLong());
}
if ((mask & LO_KID) != 0) {
node.loKid = new TernaryTreeNode();
readRecursively(in, node.loKid);
}
if ((mask & EQ_KID) != 0) {
node.eqKid = new TernaryTreeNode();
readRecursively(in, node.eqKid);
}
if ((mask & HI_KID) != 0) {
node.hiKid = new TernaryTreeNode();
readRecursively(in, node.hiKid);
}
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java
private void writeRecursively(DataOutputStream out, TernaryTreeNode node) throws IOException {
// write out the current node
out.writeChar(node.splitchar);
// prepare a mask of kids
byte mask = 0;
if (node.eqKid != null) mask |= EQ_KID;
if (node.loKid != null) mask |= LO_KID;
if (node.hiKid != null) mask |= HI_KID;
if (node.token != null) mask |= HAS_TOKEN;
if (node.val != null) mask |= HAS_VALUE;
out.writeByte(mask);
if (node.token != null) out.writeUTF(node.token);
if (node.val != null) out.writeLong(((Number)node.val).longValue());
// recurse and write kids
if (node.loKid != null) {
writeRecursively(out, node.loKid);
}
if (node.eqKid != null) {
writeRecursively(out, node.eqKid);
}
if (node.hiKid != null) {
writeRecursively(out, node.hiKid);
}
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java
Override
public synchronized boolean store(OutputStream output) throws IOException {
DataOutputStream out = new DataOutputStream(output);
try {
writeRecursively(out, root);
out.flush();
} finally {
IOUtils.close(output);
}
return true;
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java
Override
public synchronized boolean load(InputStream input) throws IOException {
DataInputStream in = new DataInputStream(input);
root = new TernaryTreeNode();
try {
readRecursively(in, root);
} finally {
IOUtils.close(in);
}
return true;
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferingTermFreqIteratorWrapper.java
Override
public BytesRef next() throws IOException {
if (++curPos < entries.size()) {
entries.get(spare, curPos);
return spare;
}
return null;
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java
public void add(BytesRef utf8, int bucket) throws IOException {
if (bucket < 0 || bucket >= buckets) {
throw new IllegalArgumentException(
"Bucket outside of the allowed range [0, " + buckets + "): " + bucket);
}
if (scratch.bytes.length < utf8.length + 1) {
scratch.grow(utf8.length + 10);
}
scratch.length = 1;
scratch.bytes[0] = (byte) bucket;
scratch.append(utf8);
sorter.add(scratch);
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java
public FSTCompletion build() throws IOException {
this.automaton = buildAutomaton(sorter);
if (sorter instanceof Closeable) {
((Closeable) sorter).close();
}
return new FSTCompletion(automaton);
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java
private FST<Object> buildAutomaton(BytesRefSorter sorter) throws IOException {
// Build the automaton.
final Outputs<Object> outputs = NoOutputs.getSingleton();
final Object empty = outputs.getNoOutput();
final Builder<Object> builder = new Builder<Object>(
FST.INPUT_TYPE.BYTE1, 0, 0, true, true,
shareMaxTailLength, outputs, null, false);
BytesRef scratch = new BytesRef();
BytesRef entry;
final IntsRef scratchIntsRef = new IntsRef();
int count = 0;
BytesRefIterator iter = sorter.iterator();
while((entry = iter.next()) != null) {
count++;
if (scratch.compareTo(entry) != 0) {
builder.add(Util.toIntsRef(entry, scratchIntsRef), empty);
scratch.copyBytes(entry);
}
}
return count == 0 ? null : builder.finish();
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletion.java
private List<Completion> lookupSortedAlphabetically(BytesRef key, int num)
throws IOException {
// Greedily get num results from each weight branch.
List<Completion> res = lookupSortedByWeight(key, num, true);
// Sort and trim.
Collections.sort(res);
if (res.size() > num) {
res = res.subList(0, num);
}
return res;
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletion.java
private ArrayList<Completion> lookupSortedByWeight(BytesRef key,
int num, boolean collectAll) throws IOException {
// Don't overallocate the results buffers. This also serves the purpose of
// allowing the user of this class to request all matches using Integer.MAX_VALUE as
// the number of results.
final ArrayList<Completion> res = new ArrayList<Completion>(Math.min(10, num));
final BytesRef output = BytesRef.deepCopyOf(key);
for (int i = 0; i < rootArcs.length; i++) {
final FST.Arc<Object> rootArc = rootArcs[i];
final FST.Arc<Object> arc = new FST.Arc<Object>().copyFrom(rootArc);
// Descend into the automaton using the key as prefix.
if (descendWithPrefix(arc, key)) {
// A subgraph starting from the current node has the completions
// of the key prefix. The arc we're at is the last key's byte,
// so we will collect it too.
output.length = key.length - 1;
if (collect(res, num, rootArc.label, output, arc) && !collectAll) {
// We have enough suggestions to return immediately. Keep on looking
// for an
// exact match, if requested.
if (exactFirst) {
if (!checkExistingAndReorder(res, key)) {
int exactMatchBucket = getExactMatchStartingFromRootArc(i, key);
if (exactMatchBucket != -1) {
// Insert as the first result and truncate at num.
while (res.size() >= num) {
res.remove(res.size() - 1);
}
res.add(0, new Completion(key, exactMatchBucket));
}
}
}
break;
}
}
}
return res;
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletion.java
private boolean descendWithPrefix(Arc<Object> arc, BytesRef utf8)
throws IOException {
final int max = utf8.offset + utf8.length;
// Cannot save as instance var since multiple threads
// can use FSTCompletion at once...
final FST.BytesReader fstReader = automaton.getBytesReader(0);
for (int i = utf8.offset; i < max; i++) {
if (automaton.findTargetArc(utf8.bytes[i] & 0xff, arc, arc, fstReader) == null) {
// No matching prefixes, return an empty result.
return false;
}
}
return true;
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletion.java
private boolean collect(List<Completion> res, int num, int bucket,
BytesRef output, Arc<Object> arc) throws IOException {
if (output.length == output.bytes.length) {
output.bytes = ArrayUtil.grow(output.bytes);
}
assert output.offset == 0;
output.bytes[output.length++] = (byte) arc.label;
automaton.readFirstTargetArc(arc, arc);
while (true) {
if (arc.label == FST.END_LABEL) {
res.add(new Completion(output, bucket));
if (res.size() >= num) return true;
} else {
int save = output.length;
if (collect(res, num, bucket, output, new Arc<Object>().copyFrom(arc))) {
return true;
}
output.length = save;
}
if (arc.isLast()) {
break;
}
automaton.readNextArc(arc);
}
return false;
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/ExternalRefSorter.java
Override
public void add(BytesRef utf8) throws IOException {
if (writer == null) throw new IllegalStateException();
writer.write(utf8);
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/ExternalRefSorter.java
public BytesRefIterator iterator() throws IOException {
if (sorted == null) {
closeWriter();
sorted = File.createTempFile("RefSorter-", ".sorted",
Sort.defaultTempDir());
sort.sort(input, sorted);
input.delete();
input = null;
}
return new ByteSequenceIterator(new Sort.ByteSequencesReader(sorted),
sort.getComparator());
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/ExternalRefSorter.java
private void closeWriter() throws IOException {
if (writer != null) {
writer.close();
writer = null;
}
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/ExternalRefSorter.java
Override
public void close() throws IOException {
try {
closeWriter();
} finally {
if (input != null) input.delete();
if (sorted != null) sorted.delete();
}
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/ExternalRefSorter.java
Override
public BytesRef next() throws IOException {
if (scratch == null) {
return null;
}
boolean success = false;
try {
byte[] next = reader.read();
if (next != null) {
scratch.bytes = next;
scratch.length = next.length;
scratch.offset = 0;
} else {
IOUtils.close(reader);
scratch = null;
}
success = true;
return scratch;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(reader);
}
}
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/Sort.java
public SortInfo sort(File input, File output) throws IOException {
sortInfo = new SortInfo();
sortInfo.totalTime = System.currentTimeMillis();
output.delete();
ArrayList<File> merges = new ArrayList<File>();
ByteSequencesReader is = new ByteSequencesReader(input);
boolean success = false;
try {
int lines = 0;
while ((lines = readPartition(is)) > 0) {
merges.add(sortPartition(lines));
sortInfo.tempMergeFiles++;
sortInfo.lines += lines;
// Handle intermediate merges.
if (merges.size() == maxTempFiles) {
File intermediate = File.createTempFile("sort", "intermediate", tempDirectory);
mergePartitions(merges, intermediate);
for (File file : merges) {
file.delete();
}
merges.clear();
merges.add(intermediate);
sortInfo.tempMergeFiles++;
}
}
success = true;
} finally {
if (success)
IOUtils.close(is);
else
IOUtils.closeWhileHandlingException(is);
}
// One partition, try to rename or copy if unsuccessful.
if (merges.size() == 1) {
// If simple rename doesn't work this means the output is
// on a different volume or something. Copy the input then.
if (!merges.get(0).renameTo(output)) {
copy(merges.get(0), output);
}
} else {
// otherwise merge the partitions with a priority queue.
mergePartitions(merges, output);
for (File file : merges) {
file.delete();
}
}
sortInfo.totalTime = (System.currentTimeMillis() - sortInfo.totalTime);
return sortInfo;
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/Sort.java
public static File defaultTempDir() throws IOException {
String tempDirPath = System.getProperty("java.io.tmpdir");
if (tempDirPath == null)
throw new IOException("Java has no temporary folder property (java.io.tmpdir)?");
File tempDirectory = new File(tempDirPath);
if (!tempDirectory.exists() || !tempDirectory.canWrite()) {
throw new IOException("Java's temporary folder not present or writeable?: "
+ tempDirectory.getAbsolutePath());
}
return tempDirectory;
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/Sort.java
private static void copy(File file, File output) throws IOException {
// 64kb copy buffer (empirical pick).
byte [] buffer = new byte [16 * 1024];
InputStream is = null;
OutputStream os = null;
try {
is = new FileInputStream(file);
os = new FileOutputStream(output);
int length;
while ((length = is.read(buffer)) > 0) {
os.write(buffer, 0, length);
}
} finally {
IOUtils.close(is, os);
}
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/Sort.java
protected File sortPartition(int len) throws IOException {
BytesRefList data = this.buffer;
File tempFile = File.createTempFile("sort", "partition", tempDirectory);
long start = System.currentTimeMillis();
sortInfo.sortTime += (System.currentTimeMillis() - start);
final ByteSequencesWriter out = new ByteSequencesWriter(tempFile);
BytesRef spare;
try {
BytesRefIterator iter = buffer.iterator(comparator);
while((spare = iter.next()) != null) {
assert spare.length <= Short.MAX_VALUE;
out.write(spare);
}
out.close();
// Clean up the buffer for the next partition.
data.clear();
return tempFile;
} finally {
IOUtils.close(out);
}
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/Sort.java
void mergePartitions(List<File> merges, File outputFile) throws IOException {
long start = System.currentTimeMillis();
ByteSequencesWriter out = new ByteSequencesWriter(outputFile);
PriorityQueue<FileAndTop> queue = new PriorityQueue<FileAndTop>(merges.size()) {
protected boolean lessThan(FileAndTop a, FileAndTop b) {
return comparator.compare(a.current, b.current) < 0;
}
};
ByteSequencesReader [] streams = new ByteSequencesReader [merges.size()];
try {
// Open streams and read the top for each file
for (int i = 0; i < merges.size(); i++) {
streams[i] = new ByteSequencesReader(merges.get(i));
byte line[] = streams[i].read();
if (line != null) {
queue.insertWithOverflow(new FileAndTop(i, line));
}
}
// Unix utility sort() uses ordered array of files to pick the next line from, updating
// it as it reads new lines. The PQ used here is a more elegant solution and has
// a nicer theoretical complexity bound :) The entire sorting process is I/O bound anyway
// so it shouldn't make much of a difference (didn't check).
FileAndTop top;
while ((top = queue.top()) != null) {
out.write(top.current);
if (!streams[top.fd].read(top.current)) {
queue.pop();
} else {
queue.updateTop();
}
}
sortInfo.mergeTime += System.currentTimeMillis() - start;
sortInfo.mergeRounds++;
} finally {
// The logic below is: if an exception occurs in closing out, it has a priority over exceptions
// happening in closing streams.
try {
IOUtils.close(streams);
} finally {
IOUtils.close(out);
}
}
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/Sort.java
int readPartition(ByteSequencesReader reader) throws IOException {
long start = System.currentTimeMillis();
final BytesRef scratch = new BytesRef();
while ((scratch.bytes = reader.read()) != null) {
scratch.length = scratch.bytes.length;
buffer.append(scratch);
// Account for the created objects.
// (buffer slots do not account to buffer size.)
if (ramBufferSize.bytes < buffer.bytesUsed()) {
break;
}
}
sortInfo.readTime += (System.currentTimeMillis() - start);
return buffer.size();
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/Sort.java
public void write(BytesRef ref) throws IOException {
assert ref != null;
write(ref.bytes, ref.offset, ref.length);
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/Sort.java
public void write(byte [] bytes) throws IOException {
write(bytes, 0, bytes.length);
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/Sort.java
public void write(byte [] bytes, int off, int len) throws IOException {
assert bytes != null;
assert off >= 0 && off + len <= bytes.length;
assert len >= 0;
os.writeShort(len);
os.write(bytes, off, len);
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/Sort.java
Override
public void close() throws IOException {
if (os instanceof Closeable) {
((Closeable) os).close();
}
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/Sort.java
public boolean read(BytesRef ref) throws IOException {
short length;
try {
length = is.readShort();
} catch (EOFException e) {
return false;
}
ref.grow(length);
ref.offset = 0;
ref.length = length;
is.readFully(ref.bytes, 0, length);
return true;
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/Sort.java
public byte[] read() throws IOException {
short length;
try {
length = is.readShort();
} catch (EOFException e) {
return null;
}
assert length >= 0 : "Sanity: sequence length < 0: " + length;
byte [] result = new byte [length];
is.readFully(result);
return result;
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/Sort.java
Override
public void close() throws IOException {
if (is instanceof Closeable) {
((Closeable) is).close();
}
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java
Override
public void build(TermFreqIterator tfit) throws IOException {
File tempInput = File.createTempFile(
FSTCompletionLookup.class.getSimpleName(), ".input", Sort.defaultTempDir());
File tempSorted = File.createTempFile(
FSTCompletionLookup.class.getSimpleName(), ".sorted", Sort.defaultTempDir());
Sort.ByteSequencesWriter writer = new Sort.ByteSequencesWriter(tempInput);
Sort.ByteSequencesReader reader = null;
// Push floats up front before sequences to sort them. For now, assume they are non-negative.
// If negative floats are allowed some trickery needs to be done to find their byte order.
boolean success = false;
try {
byte [] buffer = new byte [0];
ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
BytesRef spare;
while ((spare = tfit.next()) != null) {
if (spare.length + 4 >= buffer.length) {
buffer = ArrayUtil.grow(buffer, spare.length + 4);
}
output.reset(buffer);
output.writeInt(encodeWeight(tfit.weight()));
output.writeBytes(spare.bytes, spare.offset, spare.length);
writer.write(buffer, 0, output.getPosition());
}
writer.close();
// We don't know the distribution of scores and we need to bucket them, so we'll sort
// and divide into equal buckets.
SortInfo info = new Sort().sort(tempInput, tempSorted);
tempInput.delete();
FSTCompletionBuilder builder = new FSTCompletionBuilder(
buckets, new ExternalRefSorter(new Sort()), sharedTailLength);
final int inputLines = info.lines;
reader = new Sort.ByteSequencesReader(tempSorted);
long line = 0;
int previousBucket = 0;
int previousScore = 0;
ByteArrayDataInput input = new ByteArrayDataInput();
BytesRef tmp1 = new BytesRef();
BytesRef tmp2 = new BytesRef();
while (reader.read(tmp1)) {
input.reset(tmp1.bytes);
int currentScore = input.readInt();
int bucket;
if (line > 0 && currentScore == previousScore) {
bucket = previousBucket;
} else {
bucket = (int) (line * buckets / inputLines);
}
previousScore = currentScore;
previousBucket = bucket;
// Only append the input, discard the weight.
tmp2.bytes = tmp1.bytes;
tmp2.offset = input.getPosition();
tmp2.length = tmp1.length - input.getPosition();
builder.add(tmp2, bucket);
line++;
}
// The two FSTCompletions share the same automaton.
this.higherWeightsCompletion = builder.build();
this.normalCompletion = new FSTCompletion(
higherWeightsCompletion.getFST(), false, exactMatchFirst);
success = true;
} finally {
if (success)
IOUtils.close(reader, writer);
else
IOUtils.closeWhileHandlingException(reader, writer);
tempInput.delete();
tempSorted.delete();
}
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java
Override
public synchronized boolean store(OutputStream output) throws IOException {
try {
if (this.normalCompletion == null || normalCompletion.getFST() == null)
return false;
normalCompletion.getFST().save(new OutputStreamDataOutput(output));
} finally {
IOUtils.close(output);
}
return true;
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java
Override
public synchronized boolean load(InputStream input) throws IOException {
try {
this.higherWeightsCompletion = new FSTCompletion(new FST<Object>(
new InputStreamDataInput(input), NoOutputs.getSingleton()));
this.normalCompletion = new FSTCompletion(
higherWeightsCompletion.getFST(), false, exactMatchFirst);
} finally {
IOUtils.close(input);
}
return true;
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java
Override
public void build(TermFreqIterator iterator) throws IOException {
BytesRef scratch = new BytesRef();
TermFreqIterator iter = new WFSTTermFreqIteratorWrapper(iterator,
BytesRef.getUTF8SortedAsUnicodeComparator());
IntsRef scratchInts = new IntsRef();
BytesRef previous = null;
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
while ((scratch = iter.next()) != null) {
long cost = iter.weight();
if (previous == null) {
previous = new BytesRef();
} else if (scratch.equals(previous)) {
continue; // for duplicate suggestions, the best weight is actually
// added
}
Util.toIntsRef(scratch, scratchInts);
builder.add(scratchInts, cost);
previous.copyBytes(scratch);
}
fst = builder.finish();
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java
Override
public boolean store(OutputStream output) throws IOException {
try {
if (fst == null) {
return false;
}
fst.save(new OutputStreamDataOutput(output));
} finally {
IOUtils.close(output);
}
return true;
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java
Override
public boolean load(InputStream input) throws IOException {
try {
this.fst = new FST<Long>(new InputStreamDataInput(input), PositiveIntOutputs.getSingleton(true));
} finally {
IOUtils.close(input);
}
return true;
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java
private Long lookupPrefix(BytesRef scratch, Arc<Long> arc) throws /*Bogus*/IOException {
assert 0 == fst.outputs.getNoOutput().longValue();
long output = 0;
BytesReader bytesReader = fst.getBytesReader(0);
fst.getFirstArc(arc);
byte[] bytes = scratch.bytes;
int pos = scratch.offset;
int end = pos + scratch.length;
while (pos < end) {
if (fst.findTargetArc(bytes[pos++] & 0xff, arc, arc, bytesReader) == null) {
return null;
} else {
output += arc.output.longValue();
}
}
return output;
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java
Override
protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, long weight) throws IOException {
if (spare.length + 5 >= buffer.length) {
buffer = ArrayUtil.grow(buffer, spare.length + 5);
}
output.reset(buffer);
output.writeBytes(spare.bytes, spare.offset, spare.length);
output.writeByte((byte)0); // separator: not used, just for sort order
output.writeInt(encodeWeight(weight));
writer.write(buffer, 0, output.getPosition());
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java
Override
public void build(TermFreqIterator tfit) throws IOException {
if (tfit.getComparator() != null) {
// make sure it's unsorted
// WTF - this could result in yet another sorted iteration....
tfit = new UnsortedTermFreqIteratorWrapper(tfit);
}
trie = new JaspellTernarySearchTrie();
trie.setMatchAlmostDiff(editDistance);
BytesRef spare;
final CharsRef charsSpare = new CharsRef();
while ((spare = tfit.next()) != null) {
final long weight = tfit.weight();
if (spare.length == 0) {
continue;
}
charsSpare.grow(spare.length);
UnicodeUtil.UTF8toUTF16(spare.bytes, spare.offset, spare.length, charsSpare);
trie.put(charsSpare.toString(), Long.valueOf(weight));
}
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java
private void readRecursively(DataInputStream in, TSTNode node) throws IOException {
node.splitchar = in.readChar();
byte mask = in.readByte();
if ((mask & HAS_VALUE) != 0) {
node.data = Long.valueOf(in.readLong());
}
if ((mask & LO_KID) != 0) {
TSTNode kid = trie.new TSTNode('\0', node);
node.relatives[TSTNode.LOKID] = kid;
readRecursively(in, kid);
}
if ((mask & EQ_KID) != 0) {
TSTNode kid = trie.new TSTNode('\0', node);
node.relatives[TSTNode.EQKID] = kid;
readRecursively(in, kid);
}
if ((mask & HI_KID) != 0) {
TSTNode kid = trie.new TSTNode('\0', node);
node.relatives[TSTNode.HIKID] = kid;
readRecursively(in, kid);
}
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java
private void writeRecursively(DataOutputStream out, TSTNode node) throws IOException {
if (node == null) {
return;
}
out.writeChar(node.splitchar);
byte mask = 0;
if (node.relatives[TSTNode.LOKID] != null) mask |= LO_KID;
if (node.relatives[TSTNode.EQKID] != null) mask |= EQ_KID;
if (node.relatives[TSTNode.HIKID] != null) mask |= HI_KID;
if (node.data != null) mask |= HAS_VALUE;
out.writeByte(mask);
if (node.data != null) {
out.writeLong(((Number)node.data).longValue());
}
writeRecursively(out, node.relatives[TSTNode.LOKID]);
writeRecursively(out, node.relatives[TSTNode.EQKID]);
writeRecursively(out, node.relatives[TSTNode.HIKID]);
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java
Override
public boolean store(OutputStream output) throws IOException {
TSTNode root = trie.getRoot();
if (root == null) { // empty tree
return false;
}
DataOutputStream out = new DataOutputStream(output);
try {
writeRecursively(out, root);
out.flush();
} finally {
IOUtils.close(out);
}
return true;
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java
Override
public boolean load(InputStream input) throws IOException {
DataInputStream in = new DataInputStream(input);
TSTNode root = trie.new TSTNode('\0', null);
try {
readRecursively(in, root);
trie.setRoot(root);
} finally {
IOUtils.close(in);
}
return true;
}
// in lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java
public void build(Dictionary dict) throws IOException {
BytesRefIterator it = dict.getWordsIterator();
TermFreqIterator tfit;
if (it instanceof TermFreqIterator) {
tfit = (TermFreqIterator)it;
} else {
tfit = new TermFreqIterator.TermFreqIteratorWrapper(it);
}
build(tfit);
}
// in lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java
public final BytesRefIterator getWordsIterator() throws IOException {
return new HighFrequencyIterator();
}
// in lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java
Override
public BytesRef next() throws IOException {
if (termsEnum != null) {
BytesRef next;
while((next = termsEnum.next()) != null) {
if (isFrequent(termsEnum.docFreq())) {
freq = termsEnum.docFreq();
spare.copyBytes(next);
return spare;
}
}
}
return null;
}
// in lucene/suggest/src/java/org/apache/lucene/search/spell/TermFreqIterator.java
public BytesRef next() throws IOException {
return wrapped.next();
}
// in lucene/suggest/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java
public SuggestWord[] suggestSimilar(Term term, int numSug, IndexReader ir)
throws IOException {
return suggestSimilar(term, numSug, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
}
// in lucene/suggest/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java
public SuggestWord[] suggestSimilar(Term term, int numSug, IndexReader ir,
SuggestMode suggestMode) throws IOException {
return suggestSimilar(term, numSug, ir, suggestMode, this.accuracy);
}
// in lucene/suggest/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java
public SuggestWord[] suggestSimilar(Term term, int numSug, IndexReader ir,
SuggestMode suggestMode, float accuracy) throws IOException {
final CharsRef spare = new CharsRef();
String text = term.text();
if (minQueryLength > 0 && text.codePointCount(0, text.length()) < minQueryLength)
return new SuggestWord[0];
if (lowerCaseTerms) {
term = new Term(term.field(), text.toLowerCase(Locale.ENGLISH));
}
int docfreq = ir.docFreq(term);
if (suggestMode==SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX && docfreq > 0) {
return new SuggestWord[0];
}
int maxDoc = ir.maxDoc();
if (maxQueryFrequency >= 1f && docfreq > maxQueryFrequency) {
return new SuggestWord[0];
} else if (docfreq > (int) Math.ceil(maxQueryFrequency * (float)maxDoc)) {
return new SuggestWord[0];
}
if (suggestMode!=SuggestMode.SUGGEST_MORE_POPULAR) docfreq = 0;
if (thresholdFrequency >= 1f) {
docfreq = Math.max(docfreq, (int) thresholdFrequency);
} else if (thresholdFrequency > 0f) {
docfreq = Math.max(docfreq, (int)(thresholdFrequency * (float)maxDoc)-1);
}
Collection<ScoreTerm> terms = null;
int inspections = numSug * maxInspections;
// try ed=1 first, in case we get lucky
terms = suggestSimilar(term, inspections, ir, docfreq, 1, accuracy, spare);
if (maxEdits > 1 && terms.size() < inspections) {
HashSet<ScoreTerm> moreTerms = new HashSet<ScoreTerm>();
moreTerms.addAll(terms);
moreTerms.addAll(suggestSimilar(term, inspections, ir, docfreq, maxEdits, accuracy, spare));
terms = moreTerms;
}
// create the suggestword response, sort it, and trim it to size.
SuggestWord suggestions[] = new SuggestWord[terms.size()];
int index = suggestions.length - 1;
for (ScoreTerm s : terms) {
SuggestWord suggestion = new SuggestWord();
if (s.termAsString == null) {
UnicodeUtil.UTF8toUTF16(s.term, spare);
s.termAsString = spare.toString();
}
suggestion.string = s.termAsString;
suggestion.score = s.score;
suggestion.freq = s.docfreq;
suggestions[index--] = suggestion;
}
ArrayUtil.mergeSort(suggestions, Collections.reverseOrder(comparator));
if (numSug < suggestions.length) {
SuggestWord trimmed[] = new SuggestWord[numSug];
System.arraycopy(suggestions, 0, trimmed, 0, numSug);
suggestions = trimmed;
}
return suggestions;
}
// in lucene/suggest/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java
private Collection<ScoreTerm> suggestSimilar(Term term, int numSug,
IndexReader ir, int docfreq, int editDistance, float accuracy, final CharsRef spare) throws IOException {
AttributeSource atts = new AttributeSource();
MaxNonCompetitiveBoostAttribute maxBoostAtt =
atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
Terms terms = MultiFields.getTerms(ir, term.field());
if (terms == null) {
return Collections.emptyList();
}
FuzzyTermsEnum e = new FuzzyTermsEnum(terms, atts, term, editDistance, Math.max(minPrefix, editDistance-1), true);
final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<ScoreTerm>();
BytesRef queryTerm = new BytesRef(term.text());
BytesRef candidateTerm;
ScoreTerm st = new ScoreTerm();
BoostAttribute boostAtt =
e.attributes().addAttribute(BoostAttribute.class);
while ((candidateTerm = e.next()) != null) {
final float boost = boostAtt.getBoost();
// ignore uncompetitive hits
if (stQueue.size() >= numSug && boost <= stQueue.peek().boost)
continue;
// ignore exact match of the same term
if (queryTerm.bytesEquals(candidateTerm))
continue;
int df = e.docFreq();
// check docFreq if required
if (df <= docfreq)
continue;
final float score;
final String termAsString;
if (distance == INTERNAL_LEVENSHTEIN) {
// delay creating strings until the end
termAsString = null;
// undo FuzzyTermsEnum's scale factor for a real scaled lev score
score = boost / e.getScaleFactor() + e.getMinSimilarity();
} else {
UnicodeUtil.UTF8toUTF16(candidateTerm, spare);
termAsString = spare.toString();
score = distance.getDistance(term.text(), termAsString);
}
if (score < accuracy)
continue;
// add new entry in PQ
st.term = BytesRef.deepCopyOf(candidateTerm);
st.boost = boost;
st.docfreq = df;
st.termAsString = termAsString;
st.score = score;
stQueue.offer(st);
// possibly drop entries from queue
st = (stQueue.size() > numSug) ? stQueue.poll() : new ScoreTerm();
maxBoostAtt.setMaxNonCompetitiveBoost((stQueue.size() >= numSug) ? stQueue.peek().boost : Float.NEGATIVE_INFINITY);
}
return stQueue;
}
// in lucene/suggest/src/java/org/apache/lucene/search/spell/WordBreakSpellChecker.java
public CombineSuggestion[] suggestWordCombinations(Term[] terms,
int maxSuggestions, IndexReader ir, SuggestMode suggestMode)
throws IOException {
if (maxSuggestions < 1) {
return new CombineSuggestion[0];
}
int[] origFreqs = null;
if (suggestMode != SuggestMode.SUGGEST_ALWAYS) {
origFreqs = new int[terms.length];
for (int i = 0; i < terms.length; i++) {
origFreqs[i] = ir.docFreq(terms[i]);
}
}
int queueInitialCapacity = maxSuggestions > 10 ? 10 : maxSuggestions;
Comparator<CombineSuggestionWrapper> queueComparator = new CombinationsThenFreqComparator();
Queue<CombineSuggestionWrapper> suggestions = new PriorityQueue<CombineSuggestionWrapper>(
queueInitialCapacity, queueComparator);
int thisTimeEvaluations = 0;
BytesRef reuse = new BytesRef();
for (int i = 0; i < terms.length - 1; i++) {
if (terms[i].equals(SEPARATOR_TERM)) {
continue;
}
int byteLength = terms[i].bytes().length;
if (byteLength > maxCombineWordLength) {
continue;
}
reuse.grow(byteLength);
reuse.length = byteLength;
System.arraycopy(terms[i].bytes().bytes, terms[i].bytes().offset,
reuse.bytes, 0, byteLength);
int maxFreq = 0;
int minFreq = Integer.MAX_VALUE;
if (origFreqs != null) {
maxFreq = origFreqs[i];
minFreq = origFreqs[i];
}
for (int j = i + 1; j < terms.length && j - i <= maxChanges; j++) {
if (terms[j].equals(SEPARATOR_TERM)) {
break;
}
byteLength += terms[j].bytes().length;
if (byteLength > maxCombineWordLength) {
break;
}
if (origFreqs != null) {
maxFreq = Math.max(maxFreq, origFreqs[j]);
minFreq = Math.min(minFreq, origFreqs[j]);
}
reuse.grow(byteLength);
System.arraycopy(terms[j].bytes().bytes, terms[j].bytes().offset,
reuse.bytes, reuse.length, terms[j].bytes().length);
reuse.length = byteLength;
Term combinedTerm = new Term(terms[0].field(), reuse);
int combinedTermFreq = ir.docFreq(combinedTerm);
if (suggestMode != SuggestMode.SUGGEST_MORE_POPULAR
|| combinedTermFreq >= maxFreq) {
if (suggestMode != SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX
|| minFreq == 0) {
if (combinedTermFreq >= minSuggestionFrequency) {
int[] origIndexes = new int[j - i + 1];
origIndexes[0] = i;
for (int k = 1; k < origIndexes.length; k++) {
origIndexes[k] = i + k;
}
SuggestWord word = new SuggestWord();
word.freq = combinedTermFreq;
word.score = origIndexes.length - 1;
word.string = combinedTerm.text();
CombineSuggestionWrapper suggestion = new CombineSuggestionWrapper(
new CombineSuggestion(word, origIndexes),
(origIndexes.length - 1));
suggestions.offer(suggestion);
if (suggestions.size() > maxSuggestions) {
suggestions.poll();
}
}
}
}
thisTimeEvaluations++;
if (thisTimeEvaluations == maxEvaluations) {
break;
}
}
}
CombineSuggestion[] combineSuggestions = new CombineSuggestion[suggestions
.size()];
for (int i = suggestions.size() - 1; i >= 0; i--) {
combineSuggestions[i] = suggestions.remove().combineSuggestion;
}
return combineSuggestions;
}
// in lucene/suggest/src/java/org/apache/lucene/search/spell/WordBreakSpellChecker.java
private int generateBreakUpSuggestions(Term term, IndexReader ir,
int numberBreaks, int maxSuggestions, int useMinSuggestionFrequency,
SuggestWord[] prefix, Queue<SuggestWordArrayWrapper> suggestions,
int totalEvaluations, BreakSuggestionSortMethod sortMethod)
throws IOException {
int termLength = term.bytes().length;
int useMinBreakWordLength = minBreakWordLength;
if (useMinBreakWordLength < 1) {
useMinBreakWordLength = 1;
}
if (termLength <= (useMinBreakWordLength * 2)) {
return 0;
}
int thisTimeEvaluations = 0;
BytesRef termBytes = term.bytes().clone();
for (int i = useMinBreakWordLength; i < (termLength - useMinBreakWordLength); i++) {
SuggestWord leftWord = generateSuggestWord(ir, termBytes, 0, i, term
.field());
if (leftWord.freq >= useMinSuggestionFrequency) {
SuggestWord rightWord = generateSuggestWord(ir, termBytes, i,
termLength - i, term.field());
if (rightWord.freq >= useMinSuggestionFrequency) {
SuggestWordArrayWrapper suggestion = new SuggestWordArrayWrapper(
newSuggestion(prefix, leftWord, rightWord));
suggestions.offer(suggestion);
if (suggestions.size() > maxSuggestions) {
suggestions.poll();
}
}
int newNumberBreaks = numberBreaks + 1;
if (newNumberBreaks <= maxChanges) {
int evaluations = generateBreakUpSuggestions(new Term(term.field(),
rightWord.string), ir, newNumberBreaks, maxSuggestions,
useMinSuggestionFrequency, newPrefix(prefix, leftWord),
suggestions, totalEvaluations, sortMethod);
totalEvaluations += evaluations;
}
}
thisTimeEvaluations++;
totalEvaluations++;
if (totalEvaluations >= maxEvaluations) {
break;
}
}
return thisTimeEvaluations;
}
// in lucene/suggest/src/java/org/apache/lucene/search/spell/WordBreakSpellChecker.java
private SuggestWord generateSuggestWord(IndexReader ir, BytesRef bytes,
int offset, int length, String fieldname) throws IOException {
bytes.offset = offset;
bytes.length = length;
Term term = new Term(fieldname, bytes);
int freq = ir.docFreq(term);
SuggestWord word = new SuggestWord();
word.freq = freq;
word.score = 1;
word.string = term.text();
return word;
}
// in lucene/suggest/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java
public BytesRefIterator getWordsIterator() throws IOException {
return new FileIterator();
}
// in lucene/suggest/src/java/org/apache/lucene/search/spell/PlainTextDictionary.java
Override
public BytesRef next() throws IOException {
if (done) {
return null;
}
boolean success = false;
BytesRef result;
try {
String line;
if ((line = in.readLine()) != null) {
spare.copyChars(line);
result = spare;
} else {
done = true;
IOUtils.close(in);
result = null;
}
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(in);
}
}
return result;
}
// in lucene/suggest/src/java/org/apache/lucene/search/spell/LuceneDictionary.java
public final BytesRefIterator getWordsIterator() throws IOException {
final Terms terms = MultiFields.getTerms(reader, field);
if (terms != null) {
return terms.iterator(null);
} else {
return BytesRefIterator.EMPTY;
}
}
// in lucene/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java
public void setSpellIndex(Directory spellIndexDir) throws IOException {
// this could be the same directory as the current spellIndex
// modifications to the directory should be synchronized
synchronized (modifyCurrentIndexLock) {
ensureOpen();
if (!DirectoryReader.indexExists(spellIndexDir)) {
IndexWriter writer = new IndexWriter(spellIndexDir,
new IndexWriterConfig(Version.LUCENE_CURRENT,
null));
writer.close();
}
swapSearcher(spellIndexDir);
}
}
// in lucene/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java
public String[] suggestSimilar(String word, int numSug) throws IOException {
return this.suggestSimilar(word, numSug, null, null, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
}
// in lucene/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java
public String[] suggestSimilar(String word, int numSug, float accuracy) throws IOException {
return this.suggestSimilar(word, numSug, null, null, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, accuracy);
}
// in lucene/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java
public String[] suggestSimilar(String word, int numSug, IndexReader ir,
String field, SuggestMode suggestMode) throws IOException {
return suggestSimilar(word, numSug, ir, field, suggestMode, this.accuracy);
}
// in lucene/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java
public String[] suggestSimilar(String word, int numSug, IndexReader ir,
String field, SuggestMode suggestMode, float accuracy) throws IOException {
// obtainSearcher calls ensureOpen
final IndexSearcher indexSearcher = obtainSearcher();
try {
if (ir == null || field == null) {
suggestMode = SuggestMode.SUGGEST_ALWAYS;
}
if (suggestMode == SuggestMode.SUGGEST_ALWAYS) {
ir = null;
field = null;
}
final int lengthWord = word.length();
final int freq = (ir != null && field != null) ? ir.docFreq(new Term(field, word)) : 0;
final int goalFreq = suggestMode==SuggestMode.SUGGEST_MORE_POPULAR ? freq : 0;
// if the word exists in the real index and we don't care for word frequency, return the word itself
if (suggestMode==SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX && freq > 0) {
return new String[] { word };
}
BooleanQuery query = new BooleanQuery();
String[] grams;
String key;
for (int ng = getMin(lengthWord); ng <= getMax(lengthWord); ng++) {
key = "gram" + ng; // form key
grams = formGrams(word, ng); // form word into ngrams (allow dups too)
if (grams.length == 0) {
continue; // hmm
}
if (bStart > 0) { // should we boost prefixes?
add(query, "start" + ng, grams[0], bStart); // matches start of word
}
if (bEnd > 0) { // should we boost suffixes
add(query, "end" + ng, grams[grams.length - 1], bEnd); // matches end of word
}
for (int i = 0; i < grams.length; i++) {
add(query, key, grams[i]);
}
}
int maxHits = 10 * numSug;
// System.out.println("Q: " + query);
ScoreDoc[] hits = indexSearcher.search(query, null, maxHits).scoreDocs;
// System.out.println("HITS: " + hits.length());
SuggestWordQueue sugQueue = new SuggestWordQueue(numSug, comparator);
// go thru more than 'maxr' matches in case the distance filter triggers
int stop = Math.min(hits.length, maxHits);
SuggestWord sugWord = new SuggestWord();
for (int i = 0; i < stop; i++) {
sugWord.string = indexSearcher.doc(hits[i].doc).get(F_WORD); // get orig word
// don't suggest a word for itself, that would be silly
if (sugWord.string.equals(word)) {
continue;
}
// edit distance
sugWord.score = sd.getDistance(word,sugWord.string);
if (sugWord.score < accuracy) {
continue;
}
if (ir != null && field != null) { // use the user index
sugWord.freq = ir.docFreq(new Term(field, sugWord.string)); // freq in the index
// don't suggest a word that is not present in the field
if ((suggestMode==SuggestMode.SUGGEST_MORE_POPULAR && goalFreq > sugWord.freq) || sugWord.freq < 1) {
continue;
}
}
sugQueue.insertWithOverflow(sugWord);
if (sugQueue.size() == numSug) {
// if queue full, maintain the minScore score
accuracy = sugQueue.top().score;
}
sugWord = new SuggestWord();
}
// convert to array string
String[] list = new String[sugQueue.size()];
for (int i = sugQueue.size() - 1; i >= 0; i--) {
list[i] = sugQueue.pop().string;
}
return list;
} finally {
releaseSearcher(indexSearcher);
}
}
// in lucene/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java
public void clearIndex() throws IOException {
synchronized (modifyCurrentIndexLock) {
ensureOpen();
final Directory dir = this.spellIndex;
final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
Version.LUCENE_CURRENT,
null)
.setOpenMode(OpenMode.CREATE));
writer.close();
swapSearcher(dir);
}
}
// in lucene/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java
public boolean exist(String word) throws IOException {
// obtainSearcher calls ensureOpen
final IndexSearcher indexSearcher = obtainSearcher();
try{
// TODO: we should use ReaderUtil+seekExact, we dont care about the docFreq
// this is just an existence check
return indexSearcher.getIndexReader().docFreq(new Term(F_WORD, word)) > 0;
} finally {
releaseSearcher(indexSearcher);
}
}
// in lucene/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java
public final void indexDictionary(Dictionary dict, IndexWriterConfig config, boolean fullMerge) throws IOException {
synchronized (modifyCurrentIndexLock) {
ensureOpen();
final Directory dir = this.spellIndex;
final IndexWriter writer = new IndexWriter(dir, config);
IndexSearcher indexSearcher = obtainSearcher();
final List<TermsEnum> termsEnums = new ArrayList<TermsEnum>();
final IndexReader reader = searcher.getIndexReader();
if (reader.maxDoc() > 0) {
new ReaderUtil.Gather(reader) {
@Override
protected void add(int base, AtomicReader r) throws IOException {
Terms terms = r.terms(F_WORD);
if (terms != null)
termsEnums.add(terms.iterator(null));
}
}.run();
}
boolean isEmpty = termsEnums.isEmpty();
try {
BytesRefIterator iter = dict.getWordsIterator();
BytesRef currentTerm;
terms: while ((currentTerm = iter.next()) != null) {
String word = currentTerm.utf8ToString();
int len = word.length();
if (len < 3) {
continue; // too short we bail but "too long" is fine...
}
if (!isEmpty) {
for (TermsEnum te : termsEnums) {
if (te.seekExact(currentTerm, false)) {
continue terms;
}
}
}
// ok index the word
Document doc = createDocument(word, getMin(len), getMax(len));
writer.addDocument(doc);
}
} finally {
releaseSearcher(indexSearcher);
}
if (fullMerge) {
writer.forceMerge(1);
}
// close writer
writer.close();
// TODO: this isn't that great, maybe in the future SpellChecker should take
// IWC in its ctor / keep its writer open?
// also re-open the spell index to see our own changes when the next suggestion
// is fetched:
swapSearcher(dir);
}
}
// in lucene/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java
Override
protected void add(int base, AtomicReader r) throws IOException {
Terms terms = r.terms(F_WORD);
if (terms != null)
termsEnums.add(terms.iterator(null));
}
// in lucene/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java
private void releaseSearcher(final IndexSearcher aSearcher) throws IOException{
// don't check if open - always decRef
// don't decrement the private searcher - could have been swapped
aSearcher.getIndexReader().decRef();
}
// in lucene/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java
public void close() throws IOException {
synchronized (searcherLock) {
ensureOpen();
closed = true;
if (searcher != null) {
searcher.getIndexReader().close();
}
searcher = null;
}
}
// in lucene/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java
private void swapSearcher(final Directory dir) throws IOException {
/*
* opening a searcher is possibly very expensive.
* We rather close it again if the Spellchecker was closed during
* this operation than block access to the current searcher while opening.
*/
final IndexSearcher indexSearcher = createSearcher(dir);
synchronized (searcherLock) {
if(closed){
indexSearcher.getIndexReader().close();
throw new AlreadyClosedException("Spellchecker has been closed");
}
if (searcher != null) {
searcher.getIndexReader().close();
}
// set the spellindex in the sync block - ensure consistency.
searcher = indexSearcher;
this.spellIndex = dir;
}
}
// in lucene/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java
IndexSearcher createSearcher(final Directory dir) throws IOException{
return new IndexSearcher(DirectoryReader.open(dir));
}
// in lucene/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
public final String getBestFragment(Analyzer analyzer, String fieldName,String text)
throws IOException, InvalidTokenOffsetsException
{
TokenStream tokenStream = analyzer.tokenStream(fieldName, new StringReader(text));
return getBestFragment(tokenStream, text);
}
// in lucene/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
public final String getBestFragment(TokenStream tokenStream, String text)
throws IOException, InvalidTokenOffsetsException
{
String[] results = getBestFragments(tokenStream,text, 1);
if (results.length > 0)
{
return results[0];
}
return null;
}
// in lucene/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
public final String[] getBestFragments(
Analyzer analyzer,
String fieldName,
String text,
int maxNumFragments)
throws IOException, InvalidTokenOffsetsException
{
TokenStream tokenStream = analyzer.tokenStream(fieldName, new StringReader(text));
return getBestFragments(tokenStream, text, maxNumFragments);
}
// in lucene/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
public final String[] getBestFragments(
TokenStream tokenStream,
String text,
int maxNumFragments)
throws IOException, InvalidTokenOffsetsException
{
maxNumFragments = Math.max(1, maxNumFragments); //sanity check
TextFragment[] frag =getBestTextFragments(tokenStream,text, true,maxNumFragments);
//Get text
ArrayList<String> fragTexts = new ArrayList<String>();
for (int i = 0; i < frag.length; i++)
{
if ((frag[i] != null) && (frag[i].getScore() > 0))
{
fragTexts.add(frag[i].toString());
}
}
return fragTexts.toArray(new String[0]);
}
// in lucene/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
public final TextFragment[] getBestTextFragments(
TokenStream tokenStream,
String text,
boolean mergeContiguousFragments,
int maxNumFragments)
throws IOException, InvalidTokenOffsetsException
{
ArrayList<TextFragment> docFrags = new ArrayList<TextFragment>();
StringBuilder newText=new StringBuilder();
CharTermAttribute termAtt = tokenStream.addAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);
tokenStream.addAttribute(PositionIncrementAttribute.class);
tokenStream.reset();
TextFragment currentFrag = new TextFragment(newText,newText.length(), docFrags.size());
if (fragmentScorer instanceof QueryScorer) {
((QueryScorer) fragmentScorer).setMaxDocCharsToAnalyze(maxDocCharsToAnalyze);
}
TokenStream newStream = fragmentScorer.init(tokenStream);
if(newStream != null) {
tokenStream = newStream;
}
fragmentScorer.startFragment(currentFrag);
docFrags.add(currentFrag);
FragmentQueue fragQueue = new FragmentQueue(maxNumFragments);
try
{
String tokenText;
int startOffset;
int endOffset;
int lastEndOffset = 0;
textFragmenter.start(text, tokenStream);
TokenGroup tokenGroup=new TokenGroup(tokenStream);
for (boolean next = tokenStream.incrementToken(); next && (offsetAtt.startOffset()< maxDocCharsToAnalyze);
next = tokenStream.incrementToken())
{
if( (offsetAtt.endOffset()>text.length())
||
(offsetAtt.startOffset()>text.length())
)
{
throw new InvalidTokenOffsetsException("Token "+ termAtt.toString()
+" exceeds length of provided text sized "+text.length());
}
if((tokenGroup.numTokens>0)&&(tokenGroup.isDistinct()))
{
//the current token is distinct from previous tokens -
// markup the cached token group info
startOffset = tokenGroup.matchStartOffset;
endOffset = tokenGroup.matchEndOffset;
tokenText = text.substring(startOffset, endOffset);
String markedUpText=formatter.highlightTerm(encoder.encodeText(tokenText), tokenGroup);
//store any whitespace etc from between this and last group
if (startOffset > lastEndOffset)
newText.append(encoder.encodeText(text.substring(lastEndOffset, startOffset)));
newText.append(markedUpText);
lastEndOffset=Math.max(endOffset, lastEndOffset);
tokenGroup.clear();
//check if current token marks the start of a new fragment
if(textFragmenter.isNewFragment())
{
currentFrag.setScore(fragmentScorer.getFragmentScore());
//record stats for a new fragment
currentFrag.textEndPos = newText.length();
currentFrag =new TextFragment(newText, newText.length(), docFrags.size());
fragmentScorer.startFragment(currentFrag);
docFrags.add(currentFrag);
}
}
tokenGroup.addToken(fragmentScorer.getTokenScore());
// if(lastEndOffset>maxDocBytesToAnalyze)
// {
// break;
// }
}
currentFrag.setScore(fragmentScorer.getFragmentScore());
if(tokenGroup.numTokens>0)
{
//flush the accumulated text (same code as in above loop)
startOffset = tokenGroup.matchStartOffset;
endOffset = tokenGroup.matchEndOffset;
tokenText = text.substring(startOffset, endOffset);
String markedUpText=formatter.highlightTerm(encoder.encodeText(tokenText), tokenGroup);
//store any whitespace etc from between this and last group
if (startOffset > lastEndOffset)
newText.append(encoder.encodeText(text.substring(lastEndOffset, startOffset)));
newText.append(markedUpText);
lastEndOffset=Math.max(lastEndOffset,endOffset);
}
//Test what remains of the original text beyond the point where we stopped analyzing
if (
// if there is text beyond the last token considered..
(lastEndOffset < text.length())
&&
// and that text is not too large...
(text.length()<= maxDocCharsToAnalyze)
)
{
//append it to the last fragment
newText.append(encoder.encodeText(text.substring(lastEndOffset)));
}
currentFrag.textEndPos = newText.length();
//sort the most relevant sections of the text
for (Iterator<TextFragment> i = docFrags.iterator(); i.hasNext();)
{
currentFrag = i.next();
//If you are running with a version of Lucene before 11th Sept 03
// you do not have PriorityQueue.insert() - so uncomment the code below
/*
if (currentFrag.getScore() >= minScore)
{
fragQueue.put(currentFrag);
if (fragQueue.size() > maxNumFragments)
{ // if hit queue overfull
fragQueue.pop(); // remove lowest in hit queue
minScore = ((TextFragment) fragQueue.top()).getScore(); // reset minScore
}
}
*/
//The above code caused a problem as a result of Christoph Goller's 11th Sept 03
//fix to PriorityQueue. The correct method to use here is the new "insert" method
// USE ABOVE CODE IF THIS DOES NOT COMPILE!
fragQueue.insertWithOverflow(currentFrag);
}
//return the most relevant fragments
TextFragment frag[] = new TextFragment[fragQueue.size()];
for (int i = frag.length - 1; i >= 0; i--)
{
frag[i] = fragQueue.pop();
}
//merge any contiguous fragments to improve readability
if(mergeContiguousFragments)
{
mergeContiguousFragments(frag);
ArrayList<TextFragment> fragTexts = new ArrayList<TextFragment>();
for (int i = 0; i < frag.length; i++)
{
if ((frag[i] != null) && (frag[i].getScore() > 0))
{
fragTexts.add(frag[i]);
}
}
frag= fragTexts.toArray(new TextFragment[0]);
}
return frag;
}
finally
{
if (tokenStream != null)
{
try
{
tokenStream.end();
tokenStream.close();
}
catch (Exception e)
{
}
}
}
}
// in lucene/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
public final String getBestFragments(
TokenStream tokenStream,
String text,
int maxNumFragments,
String separator)
throws IOException, InvalidTokenOffsetsException
{
String sections[] = getBestFragments(tokenStream,text, maxNumFragments);
StringBuilder result = new StringBuilder();
for (int i = 0; i < sections.length; i++)
{
if (i > 0)
{
result.append(separator);
}
result.append(sections[i]);
}
return result.toString();
}
// in lucene/highlighter/src/java/org/apache/lucene/search/highlight/OffsetLimitTokenFilter.java
Override
public boolean incrementToken() throws IOException {
if (offsetCount < offsetLimit && input.incrementToken()) {
int offsetLength = offsetAttrib.endOffset() - offsetAttrib.startOffset();
offsetCount += offsetLength;
return true;
}
return false;
}
// in lucene/highlighter/src/java/org/apache/lucene/search/highlight/OffsetLimitTokenFilter.java
Override
public void reset() throws IOException {
super.reset();
offsetCount = 0;
}
// in lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java
Override
public boolean incrementToken() throws IOException {
if (this.tokensAtCurrentPosition.hasNext()) {
final Token next = this.tokensAtCurrentPosition.next();
clearAttributes();
termAttribute.setEmpty().append(next);
positionIncrementAttribute.setPositionIncrement(next
.getPositionIncrement());
offsetAttribute.setOffset(next.startOffset(), next.endOffset());
return true;
}
return false;
}
// in lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java
Override
public void reset() throws IOException {
this.tokensAtCurrentPosition = this.positionedTokens.iterator();
}
// in lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
public static TokenStream getAnyTokenStream(IndexReader reader, int docId,
String field, Document doc, Analyzer analyzer) throws IOException {
TokenStream ts = null;
Fields vectors = reader.getTermVectors(docId);
if (vectors != null) {
Terms vector = vectors.terms(field);
if (vector != null) {
ts = getTokenStream(vector);
}
}
// No token info stored so fall back to analyzing raw content
if (ts == null) {
ts = getTokenStream(doc, field, analyzer);
}
return ts;
}
// in lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
public static TokenStream getAnyTokenStream(IndexReader reader, int docId,
String field, Analyzer analyzer) throws IOException {
TokenStream ts = null;
Fields vectors = reader.getTermVectors(docId);
if (vectors != null) {
Terms vector = vectors.terms(field);
if (vector != null) {
ts = getTokenStream(vector);
}
}
// No token info stored so fall back to analyzing raw content
if (ts == null) {
ts = getTokenStream(reader, docId, field, analyzer);
}
return ts;
}
// in lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
public static TokenStream getTokenStream(Terms vector) throws IOException {
// assumes the worst and makes no assumptions about token position
// sequences.
return getTokenStream(vector, false);
}
// in lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
private static boolean hasPositions(Terms vector) throws IOException {
final TermsEnum termsEnum = vector.iterator(null);
if (termsEnum.next() != null) {
DocsAndPositionsEnum dpEnum = termsEnum.docsAndPositions(null, null, false);
if (dpEnum != null) {
int pos = dpEnum.nextPosition();
if (pos >= 0) {
return true;
}
}
}
return false;
}
// in lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
public static TokenStream getTokenStream(Terms tpv,
boolean tokenPositionsGuaranteedContiguous)
throws IOException {
if (!tokenPositionsGuaranteedContiguous && hasPositions(tpv)) {
return new TokenStreamFromTermPositionVector(tpv);
}
// an object used to iterate across an array of tokens
final class StoredTokenStream extends TokenStream {
Token tokens[];
int currentToken = 0;
CharTermAttribute termAtt;
OffsetAttribute offsetAtt;
PositionIncrementAttribute posincAtt;
StoredTokenStream(Token tokens[]) {
this.tokens = tokens;
termAtt = addAttribute(CharTermAttribute.class);
offsetAtt = addAttribute(OffsetAttribute.class);
posincAtt = addAttribute(PositionIncrementAttribute.class);
}
@Override
public boolean incrementToken() throws IOException {
if (currentToken >= tokens.length) {
return false;
}
Token token = tokens[currentToken++];
clearAttributes();
termAtt.setEmpty().append(token);
offsetAtt.setOffset(token.startOffset(), token.endOffset());
posincAtt
.setPositionIncrement(currentToken <= 1
|| tokens[currentToken - 1].startOffset() > tokens[currentToken - 2]
.startOffset() ? 1 : 0);
return true;
}
}
// code to reconstruct the original sequence of Tokens
TermsEnum termsEnum = tpv.iterator(null);
int totalTokens = 0;
while(termsEnum.next() != null) {
totalTokens += (int) termsEnum.totalTermFreq();
}
Token tokensInOriginalOrder[] = new Token[totalTokens];
ArrayList<Token> unsortedTokens = null;
termsEnum = tpv.iterator(null);
BytesRef text;
DocsAndPositionsEnum dpEnum = null;
while ((text = termsEnum.next()) != null) {
dpEnum = termsEnum.docsAndPositions(null, dpEnum, true);
if (dpEnum == null) {
throw new IllegalArgumentException(
"Required TermVector Offset information was not found");
}
final String term = text.utf8ToString();
dpEnum.nextDoc();
final int freq = dpEnum.freq();
for(int posUpto=0;posUpto<freq;posUpto++) {
final int pos = dpEnum.nextPosition();
final Token token = new Token(term,
dpEnum.startOffset(),
dpEnum.endOffset());
if (tokenPositionsGuaranteedContiguous && pos != -1) {
// We have positions stored and a guarantee that the token position
// information is contiguous
// This may be fast BUT wont work if Tokenizers used which create >1
// token in same position or
// creates jumps in position numbers - this code would fail under those
// circumstances
// tokens stored with positions - can use this to index straight into
// sorted array
tokensInOriginalOrder[pos] = token;
} else {
// tokens NOT stored with positions or not guaranteed contiguous - must
// add to list and sort later
if (unsortedTokens == null) {
unsortedTokens = new ArrayList<Token>();
}
unsortedTokens.add(token);
}
}
}
// If the field has been stored without position data we must perform a sort
if (unsortedTokens != null) {
tokensInOriginalOrder = unsortedTokens.toArray(new Token[unsortedTokens
.size()]);
ArrayUtil.mergeSort(tokensInOriginalOrder, new Comparator<Token>() {
public int compare(Token t1, Token t2) {
if (t1.startOffset() == t2.startOffset()) return t1.endOffset()
- t2.endOffset();
else return t1.startOffset() - t2.startOffset();
}
});
}
// in lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
Override
public boolean incrementToken() throws IOException {
if (currentToken >= tokens.length) {
return false;
}
Token token = tokens[currentToken++];
clearAttributes();
termAtt.setEmpty().append(token);
offsetAtt.setOffset(token.startOffset(), token.endOffset());
posincAtt
.setPositionIncrement(currentToken <= 1
|| tokens[currentToken - 1].startOffset() > tokens[currentToken - 2]
.startOffset() ? 1 : 0);
return true;
}
// in lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
public static TokenStream getTokenStream(IndexReader reader, int docId,
String field) throws IOException {
Fields vectors = reader.getTermVectors(docId);
if (vectors == null) {
throw new IllegalArgumentException(field + " in doc #" + docId
+ "does not have any term position data stored");
}
Terms vector = vectors.terms(field);
if (vector == null) {
throw new IllegalArgumentException(field + " in doc #" + docId
+ "does not have any term position data stored");
}
if (!hasPositions(vector)) {
throw new IllegalArgumentException(field + " in doc #" + docId
+ "does not have any term position data stored");
}
return getTokenStream(vector);
}
// in lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
public static TokenStream getTokenStream(IndexReader reader, int docId,
String field, Analyzer analyzer) throws IOException {
Document doc = reader.document(docId);
return getTokenStream(doc, field, analyzer);
}
// in lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
protected void extract(Query query, Map<String,WeightedSpanTerm> terms) throws IOException {
if (query instanceof BooleanQuery) {
BooleanClause[] queryClauses = ((BooleanQuery) query).getClauses();
for (int i = 0; i < queryClauses.length; i++) {
if (!queryClauses[i].isProhibited()) {
extract(queryClauses[i].getQuery(), terms);
}
}
} else if (query instanceof PhraseQuery) {
PhraseQuery phraseQuery = ((PhraseQuery) query);
Term[] phraseQueryTerms = phraseQuery.getTerms();
SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
for (int i = 0; i < phraseQueryTerms.length; i++) {
clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
}
int slop = phraseQuery.getSlop();
int[] positions = phraseQuery.getPositions();
// add largest position increment to slop
if (positions.length > 0) {
int lastPos = positions[0];
int largestInc = 0;
int sz = positions.length;
for (int i = 1; i < sz; i++) {
int pos = positions[i];
int inc = pos - lastPos;
if (inc > largestInc) {
largestInc = inc;
}
lastPos = pos;
}
if(largestInc > 1) {
slop += largestInc;
}
}
boolean inorder = false;
if (slop == 0) {
inorder = true;
}
SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder);
sp.setBoost(query.getBoost());
extractWeightedSpanTerms(terms, sp);
} else if (query instanceof TermQuery) {
extractWeightedTerms(terms, query);
} else if (query instanceof SpanQuery) {
extractWeightedSpanTerms(terms, (SpanQuery) query);
} else if (query instanceof FilteredQuery) {
extract(((FilteredQuery) query).getQuery(), terms);
} else if (query instanceof DisjunctionMaxQuery) {
for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) {
extract(iterator.next(), terms);
}
} else if (query instanceof MultiTermQuery && expandMultiTermQuery) {
MultiTermQuery mtq = ((MultiTermQuery)query);
if(mtq.getRewriteMethod() != MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE) {
mtq = (MultiTermQuery) mtq.clone();
mtq.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
query = mtq;
}
if (mtq.getField() != null) {
IndexReader ir = getLeafContextForField(mtq.getField()).reader();
extract(query.rewrite(ir), terms);
}
} else if (query instanceof MultiPhraseQuery) {
final MultiPhraseQuery mpq = (MultiPhraseQuery) query;
final List<Term[]> termArrays = mpq.getTermArrays();
final int[] positions = mpq.getPositions();
if (positions.length > 0) {
int maxPosition = positions[positions.length - 1];
for (int i = 0; i < positions.length - 1; ++i) {
if (positions[i] > maxPosition) {
maxPosition = positions[i];
}
}
@SuppressWarnings("unchecked")
final List<SpanQuery>[] disjunctLists = new List[maxPosition + 1];
int distinctPositions = 0;
for (int i = 0; i < termArrays.size(); ++i) {
final Term[] termArray = termArrays.get(i);
List<SpanQuery> disjuncts = disjunctLists[positions[i]];
if (disjuncts == null) {
disjuncts = (disjunctLists[positions[i]] = new ArrayList<SpanQuery>(termArray.length));
++distinctPositions;
}
for (int j = 0; j < termArray.length; ++j) {
disjuncts.add(new SpanTermQuery(termArray[j]));
}
}
int positionGaps = 0;
int position = 0;
final SpanQuery[] clauses = new SpanQuery[distinctPositions];
for (int i = 0; i < disjunctLists.length; ++i) {
List<SpanQuery> disjuncts = disjunctLists[i];
if (disjuncts != null) {
clauses[position++] = new SpanOrQuery(disjuncts
.toArray(new SpanQuery[disjuncts.size()]));
} else {
++positionGaps;
}
}
final int slop = mpq.getSlop();
final boolean inorder = (slop == 0);
SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
sp.setBoost(query.getBoost());
extractWeightedSpanTerms(terms, sp);
}
}
extractUnknownQuery(query, terms);
}
// in lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
protected void extractUnknownQuery(Query query,
Map<String, WeightedSpanTerm> terms) throws IOException {
// for sub-classing to extract custom queries
}
// in lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
protected void extractWeightedSpanTerms(Map<String,WeightedSpanTerm> terms, SpanQuery spanQuery) throws IOException {
Set<String> fieldNames;
if (fieldName == null) {
fieldNames = new HashSet<String>();
collectSpanQueryFields(spanQuery, fieldNames);
} else {
fieldNames = new HashSet<String>(1);
fieldNames.add(fieldName);
}
// To support the use of the default field name
if (defaultField != null) {
fieldNames.add(defaultField);
}
Map<String, SpanQuery> queries = new HashMap<String, SpanQuery>();
Set<Term> nonWeightedTerms = new HashSet<Term>();
final boolean mustRewriteQuery = mustRewriteQuery(spanQuery);
if (mustRewriteQuery) {
for (final String field : fieldNames) {
final SpanQuery rewrittenQuery = (SpanQuery) spanQuery.rewrite(getLeafContextForField(field).reader());
queries.put(field, rewrittenQuery);
rewrittenQuery.extractTerms(nonWeightedTerms);
}
} else {
spanQuery.extractTerms(nonWeightedTerms);
}
List<PositionSpan> spanPositions = new ArrayList<PositionSpan>();
for (final String field : fieldNames) {
final SpanQuery q;
if (mustRewriteQuery) {
q = queries.get(field);
} else {
q = spanQuery;
}
AtomicReaderContext context = getLeafContextForField(field);
Map<Term,TermContext> termContexts = new HashMap<Term,TermContext>();
TreeSet<Term> extractedTerms = new TreeSet<Term>();
q.extractTerms(extractedTerms);
for (Term term : extractedTerms) {
termContexts.put(term, TermContext.build(context, term, true));
}
Bits acceptDocs = context.reader().getLiveDocs();
final Spans spans = q.getSpans(context, acceptDocs, termContexts);
// collect span positions
while (spans.next()) {
spanPositions.add(new PositionSpan(spans.start(), spans.end() - 1));
}
}
if (spanPositions.size() == 0) {
// no spans found
return;
}
for (final Term queryTerm : nonWeightedTerms) {
if (fieldNameComparator(queryTerm.field())) {
WeightedSpanTerm weightedSpanTerm = terms.get(queryTerm.text());
if (weightedSpanTerm == null) {
weightedSpanTerm = new WeightedSpanTerm(spanQuery.getBoost(), queryTerm.text());
weightedSpanTerm.addPositionSpans(spanPositions);
weightedSpanTerm.positionSensitive = true;
terms.put(queryTerm.text(), weightedSpanTerm);
} else {
if (spanPositions.size() > 0) {
weightedSpanTerm.addPositionSpans(spanPositions);
}
}
}
}
}
// in lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
protected void extractWeightedTerms(Map<String,WeightedSpanTerm> terms, Query query) throws IOException {
Set<Term> nonWeightedTerms = new HashSet<Term>();
query.extractTerms(nonWeightedTerms);
for (final Term queryTerm : nonWeightedTerms) {
if (fieldNameComparator(queryTerm.field())) {
WeightedSpanTerm weightedSpanTerm = new WeightedSpanTerm(query.getBoost(), queryTerm.text());
terms.put(queryTerm.text(), weightedSpanTerm);
}
}
}
// in lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
protected AtomicReaderContext getLeafContextForField(String field) throws IOException {
if(wrapToCaching && !cachedTokenStream && !(tokenStream instanceof CachingTokenFilter)) {
tokenStream = new CachingTokenFilter(new OffsetLimitTokenFilter(tokenStream, maxDocCharsToAnalyze));
cachedTokenStream = true;
}
AtomicReaderContext context = readers.get(field);
if (context == null) {
MemoryIndex indexer = new MemoryIndex();
indexer.addField(field, new OffsetLimitTokenFilter(tokenStream, maxDocCharsToAnalyze));
tokenStream.reset();
IndexSearcher searcher = indexer.createSearcher();
// MEM index has only atomic ctx
context = (AtomicReaderContext) searcher.getTopReaderContext();
readers.put(field, context);
}
return context;
}
// in lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
public Map<String,WeightedSpanTerm> getWeightedSpanTerms(Query query, TokenStream tokenStream)
throws IOException {
return getWeightedSpanTerms(query, tokenStream, null);
}
// in lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
public Map<String,WeightedSpanTerm> getWeightedSpanTerms(Query query, TokenStream tokenStream,
String fieldName) throws IOException {
if (fieldName != null) {
this.fieldName = fieldName;
} else {
this.fieldName = null;
}
Map<String,WeightedSpanTerm> terms = new PositionCheckingMap<String>();
this.tokenStream = tokenStream;
try {
extract(query, terms);
} finally {
closeReaders();
}
return terms;
}
// in lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
public Map<String,WeightedSpanTerm> getWeightedSpanTermsWithScores(Query query, TokenStream tokenStream, String fieldName,
IndexReader reader) throws IOException {
if (fieldName != null) {
this.fieldName = fieldName;
} else {
this.fieldName = null;
}
this.tokenStream = tokenStream;
Map<String,WeightedSpanTerm> terms = new PositionCheckingMap<String>();
extract(query, terms);
int totalNumDocs = reader.numDocs();
Set<String> weightedTerms = terms.keySet();
Iterator<String> it = weightedTerms.iterator();
try {
while (it.hasNext()) {
WeightedSpanTerm weightedSpanTerm = terms.get(it.next());
int docFreq = reader.docFreq(new Term(fieldName, weightedSpanTerm.term));
// docFreq counts deletes
if(totalNumDocs < docFreq) {
docFreq = totalNumDocs;
}
// IDF algorithm taken from DefaultSimilarity class
float idf = (float) (Math.log((float) totalNumDocs / (double) (docFreq + 1)) + 1.0);
weightedSpanTerm.weight *= idf;
}
} finally {
closeReaders();
}
return terms;
}
// in lucene/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java
public TokenStream init(TokenStream tokenStream) throws IOException {
position = -1;
termAtt = tokenStream.addAttribute(CharTermAttribute.class);
posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class);
if(!skipInitExtractor) {
if(fieldWeightedSpanTerms != null) {
fieldWeightedSpanTerms.clear();
}
return initExtractor(tokenStream);
}
return null;
}
// in lucene/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java
private TokenStream initExtractor(TokenStream tokenStream) throws IOException {
WeightedSpanTermExtractor qse = newTermExtractor(defaultField);
qse.setMaxDocCharsToAnalyze(maxCharsToAnalyze);
qse.setExpandMultiTermQuery(expandMultiTermQuery);
qse.setWrapIfNotCachingTokenFilter(wrapToCaching);
if (reader == null) {
this.fieldWeightedSpanTerms = qse.getWeightedSpanTerms(query,
tokenStream, field);
} else {
this.fieldWeightedSpanTerms = qse.getWeightedSpanTermsWithScores(query,
tokenStream, field, reader);
}
if(qse.isCachedTokenStream()) {
return qse.getTokenStream();
}
return null;
}
// in lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java
void flatten( Query sourceQuery, IndexReader reader, Collection<Query> flatQueries ) throws IOException{
if( sourceQuery instanceof BooleanQuery ){
BooleanQuery bq = (BooleanQuery)sourceQuery;
for( BooleanClause clause : bq.getClauses() ){
if( !clause.isProhibited() )
flatten( clause.getQuery(), reader, flatQueries );
}
}
else if( sourceQuery instanceof DisjunctionMaxQuery ){
DisjunctionMaxQuery dmq = (DisjunctionMaxQuery)sourceQuery;
for( Query query : dmq ){
flatten( query, reader, flatQueries );
}
}
else if( sourceQuery instanceof TermQuery ){
if( !flatQueries.contains( sourceQuery ) )
flatQueries.add( sourceQuery );
}
else if (sourceQuery instanceof MultiTermQuery && reader != null) {
MultiTermQuery copy = (MultiTermQuery) sourceQuery.clone();
copy.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(MAX_MTQ_TERMS));
BooleanQuery mtqTerms = (BooleanQuery) copy.rewrite(reader);
flatten(mtqTerms, reader, flatQueries);
}
else if( sourceQuery instanceof PhraseQuery ){
if( !flatQueries.contains( sourceQuery ) ){
PhraseQuery pq = (PhraseQuery)sourceQuery;
if( pq.getTerms().length > 1 )
flatQueries.add( pq );
else if( pq.getTerms().length == 1 ){
flatQueries.add( new TermQuery( pq.getTerms()[0] ) );
}
}
}
// else discard queries
}
// in lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java
void saveTerms( Collection<Query> flatQueries, IndexReader reader ) throws IOException{
for( Query query : flatQueries ){
Set<String> termSet = getTermSet( query );
if( query instanceof TermQuery )
termSet.add( ((TermQuery)query).getTerm().text() );
else if( query instanceof PhraseQuery ){
for( Term term : ((PhraseQuery)query).getTerms() )
termSet.add( term.text() );
}
else if (query instanceof MultiTermQuery && reader != null) {
BooleanQuery mtqTerms = (BooleanQuery) query.rewrite(reader);
for (BooleanClause clause : mtqTerms.getClauses()) {
termSet.add (((TermQuery) clause.getQuery()).getTerm().text());
}
}
else
throw new RuntimeException( "query \"" + query.toString() + "\" must be flatten first." );
}
}
// in lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java
void add( Query query, IndexReader reader ) throws IOException {
if( query instanceof TermQuery ){
addTerm( ((TermQuery)query).getTerm(), query.getBoost() );
}
else if( query instanceof PhraseQuery ){
PhraseQuery pq = (PhraseQuery)query;
Term[] terms = pq.getTerms();
Map<String, QueryPhraseMap> map = subMap;
QueryPhraseMap qpm = null;
for( Term term : terms ){
qpm = getOrNewMap( map, term.text() );
map = qpm.subMap;
}
qpm.markTerminal( pq.getSlop(), pq.getBoost() );
}
else
throw new RuntimeException( "query \"" + query.toString() + "\" must be flatten first." );
}
// in lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java
public String createFragment( IndexReader reader, int docId,
String fieldName, FieldFragList fieldFragList ) throws IOException {
return createFragment( reader, docId, fieldName, fieldFragList,
preTags, postTags, NULL_ENCODER );
}
// in lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java
public String[] createFragments( IndexReader reader, int docId,
String fieldName, FieldFragList fieldFragList, int maxNumFragments )
throws IOException {
return createFragments( reader, docId, fieldName, fieldFragList, maxNumFragments,
preTags, postTags, NULL_ENCODER );
}
// in lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java
public String createFragment( IndexReader reader, int docId,
String fieldName, FieldFragList fieldFragList, String[] preTags, String[] postTags,
Encoder encoder ) throws IOException {
String[] fragments = createFragments( reader, docId, fieldName, fieldFragList, 1,
preTags, postTags, encoder );
if( fragments == null || fragments.length == 0 ) return null;
return fragments[0];
}
// in lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java
public String[] createFragments( IndexReader reader, int docId,
String fieldName, FieldFragList fieldFragList, int maxNumFragments,
String[] preTags, String[] postTags, Encoder encoder ) throws IOException {
if( maxNumFragments < 0 )
throw new IllegalArgumentException( "maxNumFragments(" + maxNumFragments + ") must be positive number." );
List<WeightedFragInfo> fragInfos = getWeightedFragInfoList( fieldFragList.getFragInfos() );
List<String> fragments = new ArrayList<String>( maxNumFragments );
Field[] values = getFields( reader, docId, fieldName );
if( values.length == 0 ) return null;
StringBuilder buffer = new StringBuilder();
int[] nextValueIndex = { 0 };
for( int n = 0; n < maxNumFragments && n < fragInfos.size(); n++ ){
WeightedFragInfo fragInfo = fragInfos.get( n );
fragments.add( makeFragment( buffer, nextValueIndex, values, fragInfo, preTags, postTags, encoder ) );
}
return fragments.toArray( new String[fragments.size()] );
}
// in lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java
protected Field[] getFields( IndexReader reader, int docId, final String fieldName) throws IOException {
// according to javadoc, doc.getFields(fieldName) cannot be used with lazy loaded field???
final List<Field> fields = new ArrayList<Field>();
reader.document(docId, new StoredFieldVisitor() {
@Override
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
FieldType ft = new FieldType(TextField.TYPE_STORED);
ft.setStoreTermVectors(fieldInfo.hasVectors());
fields.add(new Field(fieldInfo.name, value, ft));
}
@Override
public Status needsField(FieldInfo fieldInfo) throws IOException {
return fieldInfo.name.equals(fieldName) ? Status.YES : Status.NO;
}
});
return fields.toArray(new Field[fields.size()]);
}
// in lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java
Override
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
FieldType ft = new FieldType(TextField.TYPE_STORED);
ft.setStoreTermVectors(fieldInfo.hasVectors());
fields.add(new Field(fieldInfo.name, value, ft));
}
// in lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java
Override
public Status needsField(FieldInfo fieldInfo) throws IOException {
return fieldInfo.name.equals(fieldName) ? Status.YES : Status.NO;
}
// in lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FastVectorHighlighter.java
public FieldQuery getFieldQuery( Query query, IndexReader reader ) throws IOException {
return new FieldQuery( query, reader, phraseHighlight, fieldMatch );
}
// in lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FastVectorHighlighter.java
public final String getBestFragment( final FieldQuery fieldQuery, IndexReader reader, int docId,
String fieldName, int fragCharSize ) throws IOException {
FieldFragList fieldFragList =
getFieldFragList( fragListBuilder, fieldQuery, reader, docId, fieldName, fragCharSize );
return fragmentsBuilder.createFragment( reader, docId, fieldName, fieldFragList );
}
// in lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FastVectorHighlighter.java
public final String[] getBestFragments( final FieldQuery fieldQuery, IndexReader reader, int docId,
String fieldName, int fragCharSize, int maxNumFragments ) throws IOException {
FieldFragList fieldFragList =
getFieldFragList( fragListBuilder, fieldQuery, reader, docId, fieldName, fragCharSize );
return fragmentsBuilder.createFragments( reader, docId, fieldName, fieldFragList, maxNumFragments );
}
// in lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FastVectorHighlighter.java
public final String getBestFragment( final FieldQuery fieldQuery, IndexReader reader, int docId,
String fieldName, int fragCharSize,
FragListBuilder fragListBuilder, FragmentsBuilder fragmentsBuilder,
String[] preTags, String[] postTags, Encoder encoder ) throws IOException {
FieldFragList fieldFragList = getFieldFragList( fragListBuilder, fieldQuery, reader, docId, fieldName, fragCharSize );
return fragmentsBuilder.createFragment( reader, docId, fieldName, fieldFragList, preTags, postTags, encoder );
}
// in lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FastVectorHighlighter.java
public final String[] getBestFragments( final FieldQuery fieldQuery, IndexReader reader, int docId,
String fieldName, int fragCharSize, int maxNumFragments,
FragListBuilder fragListBuilder, FragmentsBuilder fragmentsBuilder,
String[] preTags, String[] postTags, Encoder encoder ) throws IOException {
FieldFragList fieldFragList =
getFieldFragList( fragListBuilder, fieldQuery, reader, docId, fieldName, fragCharSize );
return fragmentsBuilder.createFragments( reader, docId, fieldName, fieldFragList, maxNumFragments,
preTags, postTags, encoder );
}
// in lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FastVectorHighlighter.java
private FieldFragList getFieldFragList( FragListBuilder fragListBuilder,
final FieldQuery fieldQuery, IndexReader reader, int docId,
String fieldName, int fragCharSize ) throws IOException {
FieldTermStack fieldTermStack = new FieldTermStack( reader, docId, fieldName, fieldQuery );
FieldPhraseList fieldPhraseList = new FieldPhraseList( fieldTermStack, fieldQuery, phraseLimit );
return fragListBuilder.createFieldFragList( fieldPhraseList, fragCharSize );
}
// in lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java
static void indexDocs(IndexWriter writer, File file)
throws IOException {
// do not try to index files that cannot be read
if (file.canRead()) {
if (file.isDirectory()) {
String[] files = file.list();
// an IO error could occur
if (files != null) {
for (int i = 0; i < files.length; i++) {
indexDocs(writer, new File(file, files[i]));
}
}
} else {
FileInputStream fis;
try {
fis = new FileInputStream(file);
} catch (FileNotFoundException fnfe) {
// at least on windows, some temporary files raise this exception with an "access denied" message
// checking if the file can be read doesn't help
return;
}
try {
// make a new, empty document
Document doc = new Document();
// Add the path of the file as a field named "path". Use a
// field that is indexed (i.e. searchable), but don't tokenize
// the field into separate words and don't index term frequency
// or positional information:
Field pathField = new Field("path", file.getPath(), StringField.TYPE_STORED);
doc.add(pathField);
// Add the last modified date of the file a field named "modified".
// Use a LongField that is indexed (i.e. efficiently filterable with
// NumericRangeFilter). This indexes to milli-second resolution, which
// is often too fine. You could instead create a number based on
// year/month/day/hour/minutes/seconds, down the resolution you require.
// For example the long value 2011021714 would mean
// February 17, 2011, 2-3 PM.
doc.add(new LongField("modified", file.lastModified()));
// Add the contents of the file to a field named "contents". Specify a Reader,
// so that the text of the file is tokenized and indexed, but not stored.
// Note that FileReader expects the file to be in UTF-8 encoding.
// If that's not the case searching for special characters will fail.
doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));
if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
// New index, so we just add the document (no old document can be there):
System.out.println("adding " + file);
writer.addDocument(doc);
} else {
// Existing index (an old copy of this document may have been indexed) so
// we use updateDocument instead to replace the old one matching the exact
// path, if present:
System.out.println("updating " + file);
writer.updateDocument(new Term("path", file.getPath()), doc);
}
} finally {
fis.close();
}
}
}
}
// in lucene/demo/src/java/org/apache/lucene/demo/SearchFiles.java
public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query,
int hitsPerPage, boolean raw, boolean interactive) throws IOException {
// Collect enough docs to show 5 pages
TopDocs results = searcher.search(query, 5 * hitsPerPage);
ScoreDoc[] hits = results.scoreDocs;
int numTotalHits = results.totalHits;
System.out.println(numTotalHits + " total matching documents");
int start = 0;
int end = Math.min(numTotalHits, hitsPerPage);
while (true) {
if (end > hits.length) {
System.out.println("Only results 1 - " + hits.length +" of " + numTotalHits + " total matching documents collected.");
System.out.println("Collect more (y/n) ?");
String line = in.readLine();
if (line.length() == 0 || line.charAt(0) == 'n') {
break;
}
hits = searcher.search(query, numTotalHits).scoreDocs;
}
end = Math.min(hits.length, start + hitsPerPage);
for (int i = start; i < end; i++) {
if (raw) { // output raw format
System.out.println("doc="+hits[i].doc+" score="+hits[i].score);
continue;
}
Document doc = searcher.doc(hits[i].doc);
String path = doc.get("path");
if (path != null) {
System.out.println((i+1) + ". " + path);
String title = doc.get("title");
if (title != null) {
System.out.println(" Title: " + doc.get("title"));
}
} else {
System.out.println((i+1) + ". " + "No path for this document");
}
}
if (!interactive || end == 0) {
break;
}
if (numTotalHits >= end) {
boolean quit = false;
while (true) {
System.out.print("Press ");
if (start - hitsPerPage >= 0) {
System.out.print("(p)revious page, ");
}
if (start + hitsPerPage < numTotalHits) {
System.out.print("(n)ext page, ");
}
System.out.println("(q)uit or enter number to jump to a page.");
String line = in.readLine();
if (line.length() == 0 || line.charAt(0)=='q') {
quit = true;
break;
}
if (line.charAt(0) == 'p') {
start = Math.max(0, start - hitsPerPage);
break;
} else if (line.charAt(0) == 'n') {
if (start + hitsPerPage < numTotalHits) {
start+=hitsPerPage;
}
break;
} else {
int page = Integer.parseInt(line);
if ((page - 1) * hitsPerPage < numTotalHits) {
start = (page - 1) * hitsPerPage;
break;
} else {
System.out.println("No such page");
}
}
}
if (quit) break;
end = Math.min(numTotalHits, start + hitsPerPage);
}
}
}
// in lucene/demo/src/java/org/apache/lucene/demo/xmlparser/FormBasedXmlQueryDemo.java
Override
protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
//Take all completed form fields and add to a Properties object
Properties completedFormFields = new Properties();
Enumeration<?> pNames = request.getParameterNames();
while (pNames.hasMoreElements()) {
String propName = (String) pNames.nextElement();
String value = request.getParameter(propName);
if ((value != null) && (value.trim().length() > 0)) {
completedFormFields.setProperty(propName, value);
}
}
try {
//Create an XML query by populating template with given user criteria
org.w3c.dom.Document xmlQuery = queryTemplateManager.getQueryAsDOM(completedFormFields);
//Parse the XML to produce a Lucene query
Query query = xmlParser.getQuery(xmlQuery.getDocumentElement());
//Run the query
TopDocs topDocs = searcher.search(query, 10);
//and package the results and forward to JSP
if (topDocs != null) {
ScoreDoc[] sd = topDocs.scoreDocs;
Document[] results = new Document[sd.length];
for (int i = 0; i < results.length; i++) {
results[i] = searcher.doc(sd[i].doc);
request.setAttribute("results", results);
}
}
RequestDispatcher dispatcher = getServletContext().getRequestDispatcher("/index.jsp");
dispatcher.forward(request, response);
}
catch (Exception e) {
throw new ServletException("Error processing query", e);
}
}
// in lucene/demo/src/java/org/apache/lucene/demo/xmlparser/FormBasedXmlQueryDemo.java
private void openExampleIndex() throws CorruptIndexException, IOException {
//Create a RAM-based index from our test data file
RAMDirectory rd = new RAMDirectory();
IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_40, analyzer);
IndexWriter writer = new IndexWriter(rd, iwConfig);
InputStream dataIn = getServletContext().getResourceAsStream("/WEB-INF/data.tsv");
BufferedReader br = new BufferedReader(new InputStreamReader(dataIn));
String line = br.readLine();
final FieldType textNoNorms = new FieldType(TextField.TYPE_STORED);
textNoNorms.setOmitNorms(true);
while (line != null) {
line = line.trim();
if (line.length() > 0) {
//parse row and create a document
StringTokenizer st = new StringTokenizer(line, "\t");
Document doc = new Document();
doc.add(new Field("location", st.nextToken(), textNoNorms));
doc.add(new Field("salary", st.nextToken(), textNoNorms));
doc.add(new Field("type", st.nextToken(), textNoNorms));
doc.add(new Field("description", st.nextToken(), textNoNorms));
writer.addDocument(doc);
}
line = br.readLine();
}
writer.close();
//open searcher
// this example never closes it reader!
IndexReader reader = DirectoryReader.open(rd);
searcher = new IndexSearcher(reader);
}
// in lucene/core/src/java/org/apache/lucene/analysis/TokenFilter.java
Override
public void end() throws IOException {
input.end();
}
// in lucene/core/src/java/org/apache/lucene/analysis/TokenFilter.java
Override
public void close() throws IOException {
input.close();
}
// in lucene/core/src/java/org/apache/lucene/analysis/TokenFilter.java
Override
public void reset() throws IOException {
input.reset();
}
// in lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java
public void end() throws IOException {
// do nothing by default
}
// in lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java
public void reset() throws IOException {}
// in lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java
public void close() throws IOException {}
// in lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java
Override
public void close() throws IOException {
if (input != null) {
input.close();
// LUCENE-2387: don't hold onto Reader after close, so
// GC can reclaim
input = null;
}
}
// in lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java
public void reset(Reader input) throws IOException {
assert input != null: "input must not be null";
this.input = input;
}
// in lucene/core/src/java/org/apache/lucene/analysis/CharReader.java
Override
public void close() throws IOException {
input.close();
}
// in lucene/core/src/java/org/apache/lucene/analysis/CharReader.java
Override
public int read(char[] cbuf, int off, int len) throws IOException {
return input.read(cbuf, off, len);
}
// in lucene/core/src/java/org/apache/lucene/analysis/CharReader.java
Override
public int read() throws IOException {
return input.read();
}
// in lucene/core/src/java/org/apache/lucene/analysis/CharReader.java
Override
public void mark( int readAheadLimit ) throws IOException {
input.mark(readAheadLimit);
}
// in lucene/core/src/java/org/apache/lucene/analysis/CharReader.java
Override
public void reset() throws IOException {
input.reset();
}
// in lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java
public final TokenStream tokenStream(final String fieldName,
final Reader reader) throws IOException {
TokenStreamComponents components = reuseStrategy.getReusableComponents(fieldName);
final Reader r = initReader(reader);
if (components == null) {
components = createComponents(fieldName, r);
reuseStrategy.setReusableComponents(fieldName, components);
} else {
components.reset(r);
}
return components.getTokenStream();
}
// in lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java
protected void reset(final Reader reader) throws IOException {
source.reset(reader);
}
// in lucene/core/src/java/org/apache/lucene/analysis/CachingTokenFilter.java
Override
public final boolean incrementToken() throws IOException {
if (cache == null) {
// fill cache lazily
cache = new LinkedList<AttributeSource.State>();
fillCache();
iterator = cache.iterator();
}
if (!iterator.hasNext()) {
// the cache is exhausted, return false
return false;
}
// Since the TokenFilter can be reset, the tokens need to be preserved as immutable.
restoreState(iterator.next());
return true;
}
// in lucene/core/src/java/org/apache/lucene/analysis/CachingTokenFilter.java
Override
public final void end() throws IOException {
if (finalState != null) {
restoreState(finalState);
}
}
// in lucene/core/src/java/org/apache/lucene/analysis/CachingTokenFilter.java
Override
public void reset() throws IOException {
if(cache != null) {
iterator = cache.iterator();
}
}
// in lucene/core/src/java/org/apache/lucene/analysis/CachingTokenFilter.java
private void fillCache() throws IOException {
while(input.incrementToken()) {
cache.add(captureState());
}
// capture final state
input.end();
finalState = captureState();
}
// in lucene/core/src/java/org/apache/lucene/document/DocumentStoredFieldVisitor.java
Override
public void binaryField(FieldInfo fieldInfo, byte[] value, int offset, int length) throws IOException {
doc.add(new StoredField(fieldInfo.name, value));
}
// in lucene/core/src/java/org/apache/lucene/document/DocumentStoredFieldVisitor.java
Override
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
final FieldType ft = new FieldType(TextField.TYPE_STORED);
ft.setStoreTermVectors(fieldInfo.hasVectors());
ft.setIndexed(fieldInfo.isIndexed());
ft.setOmitNorms(fieldInfo.omitsNorms());
ft.setIndexOptions(fieldInfo.getIndexOptions());
doc.add(new Field(fieldInfo.name, value, ft));
}
// in lucene/core/src/java/org/apache/lucene/document/DocumentStoredFieldVisitor.java
Override
public Status needsField(FieldInfo fieldInfo) throws IOException {
return fieldsToAdd == null || fieldsToAdd.contains(fieldInfo.name) ? Status.YES : Status.NO;
}
// in lucene/core/src/java/org/apache/lucene/document/Field.java
public TokenStream tokenStream(Analyzer analyzer) throws IOException {
if (!fieldType().indexed()) {
return null;
}
final NumericType numericType = fieldType().numericType();
if (numericType != null) {
if (numericTokenStream == null) {
// lazy init the TokenStream as it is heavy to instantiate
// (attributes,...) if not needed (stored field loading)
numericTokenStream = new NumericTokenStream(type.numericPrecisionStep());
// initialize value in TokenStream
final Number val = (Number) fieldsData;
switch (numericType) {
case INT:
numericTokenStream.setIntValue(val.intValue());
break;
case LONG:
numericTokenStream.setLongValue(val.longValue());
break;
case FLOAT:
numericTokenStream.setFloatValue(val.floatValue());
break;
case DOUBLE:
numericTokenStream.setDoubleValue(val.doubleValue());
break;
default:
assert false : "Should never get here";
}
} else {
// OK -- previously cached and we already updated if
// setters were called.
}
return numericTokenStream;
}
if (!fieldType().tokenized()) {
if (stringValue() == null) {
throw new IllegalArgumentException("Non-Tokenized Fields must have a String value");
}
return new TokenStream() {
CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
boolean used;
@Override
public boolean incrementToken() throws IOException {
if (used) {
return false;
}
termAttribute.setEmpty().append(stringValue());
offsetAttribute.setOffset(0, stringValue().length());
used = true;
return true;
}
@Override
public void reset() throws IOException {
used = false;
}
};
}
if (tokenStream != null) {
return tokenStream;
} else if (readerValue() != null) {
return analyzer.tokenStream(name(), readerValue());
} else if (stringValue() != null) {
return analyzer.tokenStream(name(), new StringReader(stringValue()));
}
throw new IllegalArgumentException("Field must have either TokenStream, String, Reader or Number value");
}
// in lucene/core/src/java/org/apache/lucene/document/Field.java
Override
public boolean incrementToken() throws IOException {
if (used) {
return false;
}
termAttribute.setEmpty().append(stringValue());
offsetAttribute.setOffset(0, stringValue().length());
used = true;
return true;
}
// in lucene/core/src/java/org/apache/lucene/document/Field.java
Override
public void reset() throws IOException {
used = false;
}
// in lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java
Override
public Weight createWeight(final IndexSearcher searcher) throws IOException {
final Weight weight = query.createWeight (searcher);
return new Weight() {
@Override
public boolean scoresDocsOutOfOrder() {
// TODO: Support out-of-order scoring!
// For now we return false here, as we always get the scorer in order
return false;
}
@Override
public float getValueForNormalization() throws IOException {
return weight.getValueForNormalization() * getBoost() * getBoost(); // boost sub-weight
}
@Override
public void normalize (float norm, float topLevelBoost) {
weight.normalize(norm, topLevelBoost * getBoost()); // incorporate boost
}
@Override
public Explanation explain (AtomicReaderContext ir, int i) throws IOException {
Explanation inner = weight.explain (ir, i);
Filter f = FilteredQuery.this.filter;
DocIdSet docIdSet = f.getDocIdSet(ir, ir.reader().getLiveDocs());
DocIdSetIterator docIdSetIterator = docIdSet == null ? DocIdSet.EMPTY_DOCIDSET.iterator() : docIdSet.iterator();
if (docIdSetIterator == null) {
docIdSetIterator = DocIdSet.EMPTY_DOCIDSET.iterator();
}
if (docIdSetIterator.advance(i) == i) {
return inner;
} else {
Explanation result = new Explanation
(0.0f, "failure to match filter: " + f.toString());
result.addDetail(inner);
return result;
}
}
// return this query
@Override
public Query getQuery() { return FilteredQuery.this; }
// return a filtering scorer
@Override
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
assert filter != null;
final DocIdSet filterDocIdSet = filter.getDocIdSet(context, acceptDocs);
if (filterDocIdSet == null) {
// this means the filter does not accept any documents.
return null;
}
final DocIdSetIterator filterIter = filterDocIdSet.iterator();
if (filterIter == null) {
// this means the filter does not accept any documents.
return null;
}
final int firstFilterDoc = filterIter.nextDoc();
if (firstFilterDoc == DocIdSetIterator.NO_MORE_DOCS) {
return null;
}
final Bits filterAcceptDocs = filterDocIdSet.bits();
final boolean useRandomAccess = (filterAcceptDocs != null && FilteredQuery.this.useRandomAccess(filterAcceptDocs, firstFilterDoc));
if (useRandomAccess) {
// if we are using random access, we return the inner scorer, just with other acceptDocs
// TODO, replace this by when BooleanWeight is fixed to be consistent with its scorer implementations:
// return weight.scorer(context, scoreDocsInOrder, topScorer, filterAcceptDocs);
return weight.scorer(context, true, topScorer, filterAcceptDocs);
} else {
assert firstFilterDoc > -1;
// we are gonna advance() this scorer, so we set inorder=true/toplevel=false
// we pass null as acceptDocs, as our filter has already respected acceptDocs, no need to do twice
final Scorer scorer = weight.scorer(context, true, false, null);
return (scorer == null) ? null : new Scorer(this) {
private int scorerDoc = -1, filterDoc = firstFilterDoc;
// optimization: we are topScorer and collect directly using short-circuited algo
@Override
public void score(Collector collector) throws IOException {
int filterDoc = firstFilterDoc;
int scorerDoc = scorer.advance(filterDoc);
// the normalization trick already applies the boost of this query,
// so we can use the wrapped scorer directly:
collector.setScorer(scorer);
for (;;) {
if (scorerDoc == filterDoc) {
// Check if scorer has exhausted, only before collecting.
if (scorerDoc == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
collector.collect(scorerDoc);
filterDoc = filterIter.nextDoc();
scorerDoc = scorer.advance(filterDoc);
} else if (scorerDoc > filterDoc) {
filterDoc = filterIter.advance(scorerDoc);
} else {
scorerDoc = scorer.advance(filterDoc);
}
}
}
private int advanceToNextCommonDoc() throws IOException {
for (;;) {
if (scorerDoc < filterDoc) {
scorerDoc = scorer.advance(filterDoc);
} else if (scorerDoc == filterDoc) {
return scorerDoc;
} else {
filterDoc = filterIter.advance(scorerDoc);
}
}
}
@Override
public int nextDoc() throws IOException {
// don't go to next doc on first call
// (because filterIter is already on first doc):
if (scorerDoc != -1) {
filterDoc = filterIter.nextDoc();
}
return advanceToNextCommonDoc();
}
@Override
public int advance(int target) throws IOException {
if (target > filterDoc) {
filterDoc = filterIter.advance(target);
}
return advanceToNextCommonDoc();
}
@Override
public int docID() {
return scorerDoc;
}
@Override
public float score() throws IOException {
return scorer.score();
}
};
}
}
};
}
// in lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java
Override
public float getValueForNormalization() throws IOException {
return weight.getValueForNormalization() * getBoost() * getBoost(); // boost sub-weight
}
// in lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java
Override
public Explanation explain (AtomicReaderContext ir, int i) throws IOException {
Explanation inner = weight.explain (ir, i);
Filter f = FilteredQuery.this.filter;
DocIdSet docIdSet = f.getDocIdSet(ir, ir.reader().getLiveDocs());
DocIdSetIterator docIdSetIterator = docIdSet == null ? DocIdSet.EMPTY_DOCIDSET.iterator() : docIdSet.iterator();
if (docIdSetIterator == null) {
docIdSetIterator = DocIdSet.EMPTY_DOCIDSET.iterator();
}
if (docIdSetIterator.advance(i) == i) {
return inner;
} else {
Explanation result = new Explanation
(0.0f, "failure to match filter: " + f.toString());
result.addDetail(inner);
return result;
}
}
// in lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java
Override
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
assert filter != null;
final DocIdSet filterDocIdSet = filter.getDocIdSet(context, acceptDocs);
if (filterDocIdSet == null) {
// this means the filter does not accept any documents.
return null;
}
final DocIdSetIterator filterIter = filterDocIdSet.iterator();
if (filterIter == null) {
// this means the filter does not accept any documents.
return null;
}
final int firstFilterDoc = filterIter.nextDoc();
if (firstFilterDoc == DocIdSetIterator.NO_MORE_DOCS) {
return null;
}
final Bits filterAcceptDocs = filterDocIdSet.bits();
final boolean useRandomAccess = (filterAcceptDocs != null && FilteredQuery.this.useRandomAccess(filterAcceptDocs, firstFilterDoc));
if (useRandomAccess) {
// if we are using random access, we return the inner scorer, just with other acceptDocs
// TODO, replace this by when BooleanWeight is fixed to be consistent with its scorer implementations:
// return weight.scorer(context, scoreDocsInOrder, topScorer, filterAcceptDocs);
return weight.scorer(context, true, topScorer, filterAcceptDocs);
} else {
assert firstFilterDoc > -1;
// we are gonna advance() this scorer, so we set inorder=true/toplevel=false
// we pass null as acceptDocs, as our filter has already respected acceptDocs, no need to do twice
final Scorer scorer = weight.scorer(context, true, false, null);
return (scorer == null) ? null : new Scorer(this) {
private int scorerDoc = -1, filterDoc = firstFilterDoc;
// optimization: we are topScorer and collect directly using short-circuited algo
@Override
public void score(Collector collector) throws IOException {
int filterDoc = firstFilterDoc;
int scorerDoc = scorer.advance(filterDoc);
// the normalization trick already applies the boost of this query,
// so we can use the wrapped scorer directly:
collector.setScorer(scorer);
for (;;) {
if (scorerDoc == filterDoc) {
// Check if scorer has exhausted, only before collecting.
if (scorerDoc == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
collector.collect(scorerDoc);
filterDoc = filterIter.nextDoc();
scorerDoc = scorer.advance(filterDoc);
} else if (scorerDoc > filterDoc) {
filterDoc = filterIter.advance(scorerDoc);
} else {
scorerDoc = scorer.advance(filterDoc);
}
}
}
private int advanceToNextCommonDoc() throws IOException {
for (;;) {
if (scorerDoc < filterDoc) {
scorerDoc = scorer.advance(filterDoc);
} else if (scorerDoc == filterDoc) {
return scorerDoc;
} else {
filterDoc = filterIter.advance(scorerDoc);
}
}
}
@Override
public int nextDoc() throws IOException {
// don't go to next doc on first call
// (because filterIter is already on first doc):
if (scorerDoc != -1) {
filterDoc = filterIter.nextDoc();
}
return advanceToNextCommonDoc();
}
@Override
public int advance(int target) throws IOException {
if (target > filterDoc) {
filterDoc = filterIter.advance(target);
}
return advanceToNextCommonDoc();
}
@Override
public int docID() {
return scorerDoc;
}
@Override
public float score() throws IOException {
return scorer.score();
}
};
}
}
// in lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java
Override
public void score(Collector collector) throws IOException {
int filterDoc = firstFilterDoc;
int scorerDoc = scorer.advance(filterDoc);
// the normalization trick already applies the boost of this query,
// so we can use the wrapped scorer directly:
collector.setScorer(scorer);
for (;;) {
if (scorerDoc == filterDoc) {
// Check if scorer has exhausted, only before collecting.
if (scorerDoc == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
collector.collect(scorerDoc);
filterDoc = filterIter.nextDoc();
scorerDoc = scorer.advance(filterDoc);
} else if (scorerDoc > filterDoc) {
filterDoc = filterIter.advance(scorerDoc);
} else {
scorerDoc = scorer.advance(filterDoc);
}
}
}
// in lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java
private int advanceToNextCommonDoc() throws IOException {
for (;;) {
if (scorerDoc < filterDoc) {
scorerDoc = scorer.advance(filterDoc);
} else if (scorerDoc == filterDoc) {
return scorerDoc;
} else {
filterDoc = filterIter.advance(scorerDoc);
}
}
}
// in lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java
Override
public int nextDoc() throws IOException {
// don't go to next doc on first call
// (because filterIter is already on first doc):
if (scorerDoc != -1) {
filterDoc = filterIter.nextDoc();
}
return advanceToNextCommonDoc();
}
// in lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java
Override
public int advance(int target) throws IOException {
if (target > filterDoc) {
filterDoc = filterIter.advance(target);
}
return advanceToNextCommonDoc();
}
// in lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java
Override
public float score() throws IOException {
return scorer.score();
}
// in lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java
Override
public Query rewrite(IndexReader reader) throws IOException {
final Query queryRewritten = query.rewrite(reader);
if (queryRewritten instanceof MatchAllDocsQuery) {
// Special case: If the query is a MatchAllDocsQuery, we only
// return a CSQ(filter).
final Query rewritten = new ConstantScoreQuery(filter);
// Combine boost of MatchAllDocsQuery and the wrapped rewritten query:
rewritten.setBoost(this.getBoost() * queryRewritten.getBoost());
return rewritten;
}
if (queryRewritten != query) {
// rewrite to a new FilteredQuery wrapping the rewritten query
final Query rewritten = new FilteredQuery(queryRewritten, filter);
rewritten.setBoost(this.getBoost());
return rewritten;
} else {
// nothing to rewrite, we are done!
return this;
}
}
// in lucene/core/src/java/org/apache/lucene/search/ScoreCachingWrappingScorer.java
Override
public boolean score(Collector collector, int max, int firstDocID) throws IOException {
return scorer.score(collector, max, firstDocID);
}
// in lucene/core/src/java/org/apache/lucene/search/ScoreCachingWrappingScorer.java
Override
public float score() throws IOException {
int doc = scorer.docID();
if (doc != curDoc) {
curScore = scorer.score();
curDoc = doc;
}
return curScore;
}
// in lucene/core/src/java/org/apache/lucene/search/ScoreCachingWrappingScorer.java
Override
public int nextDoc() throws IOException {
return scorer.nextDoc();
}
// in lucene/core/src/java/org/apache/lucene/search/ScoreCachingWrappingScorer.java
Override
public void score(Collector collector) throws IOException {
scorer.score(collector);
}
// in lucene/core/src/java/org/apache/lucene/search/ScoreCachingWrappingScorer.java
Override
public int advance(int target) throws IOException {
return scorer.advance(target);
}
// in lucene/core/src/java/org/apache/lucene/search/SearcherFactory.java
public IndexSearcher newSearcher(IndexReader reader) throws IOException {
return new IndexSearcher(reader);
}
// in lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java
Override
public int nextDoc() throws IOException {
return reqScorer.nextDoc();
}
// in lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java
Override
public int advance(int target) throws IOException {
return reqScorer.advance(target);
}
// in lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java
Override
public float score() throws IOException {
int curDoc = reqScorer.docID();
float reqScore = reqScorer.score();
if (optScorer == null) {
return reqScore;
}
int optScorerDoc = optScorer.docID();
if (optScorerDoc < curDoc && (optScorerDoc = optScorer.advance(curDoc)) == NO_MORE_DOCS) {
optScorer = null;
return reqScore;
}
return optScorerDoc == curDoc ? reqScore + optScorer.score() : reqScore;
}
// in lucene/core/src/java/org/apache/lucene/search/ReferenceManager.java
private synchronized void swapReference(G newReference) throws IOException {
ensureOpen();
final G oldReference = current;
current = newReference;
release(oldReference);
}
// in lucene/core/src/java/org/apache/lucene/search/ReferenceManager.java
public final synchronized void close() throws IOException {
if (current != null) {
// make sure we can call this more than once
// closeable javadoc says:
// if this is already closed then invoking this method has no effect.
swapReference(null);
afterClose();
}
}
// in lucene/core/src/java/org/apache/lucene/search/ReferenceManager.java
protected void afterClose() throws IOException {
}
// in lucene/core/src/java/org/apache/lucene/search/ReferenceManager.java
private void doMaybeRefresh() throws IOException {
// it's ok to call lock() here (blocking) because we're supposed to get here
// from either maybeRefreh() or maybeRefreshBlocking(), after the lock has
// already been obtained. Doing that protects us from an accidental bug
// where this method will be called outside the scope of refreshLock.
// Per ReentrantLock's javadoc, calling lock() by the same thread more than
// once is ok, as long as unlock() is called a matching number of times.
refreshLock.lock();
try {
final G reference = acquire();
try {
G newReference = refreshIfNeeded(reference);
if (newReference != null) {
assert newReference != reference : "refreshIfNeeded should return null if refresh wasn't needed";
boolean success = false;
try {
swapReference(newReference);
success = true;
} finally {
if (!success) {
release(newReference);
}
}
}
} finally {
release(reference);
}
afterRefresh();
} finally {
refreshLock.unlock();
}
}
// in lucene/core/src/java/org/apache/lucene/search/ReferenceManager.java
public final boolean maybeRefresh() throws IOException {
ensureOpen();
// Ensure only 1 thread does reopen at once; other threads just return immediately:
final boolean doTryRefresh = refreshLock.tryLock();
if (doTryRefresh) {
try {
doMaybeRefresh();
} finally {
refreshLock.unlock();
}
}
return doTryRefresh;
}
// in lucene/core/src/java/org/apache/lucene/search/ReferenceManager.java
public final void maybeRefreshBlocking() throws IOException, InterruptedException {
ensureOpen();
// Ensure only 1 thread does reopen at once
refreshLock.lock();
try {
doMaybeRefresh();
} finally {
refreshLock.unlock();
}
}
// in lucene/core/src/java/org/apache/lucene/search/ReferenceManager.java
protected void afterRefresh() throws IOException {
}
// in lucene/core/src/java/org/apache/lucene/search/ReferenceManager.java
public final void release(G reference) throws IOException {
assert reference != null;
decRef(reference);
}
// in lucene/core/src/java/org/apache/lucene/search/Query.java
public Weight createWeight(IndexSearcher searcher) throws IOException {
throw new UnsupportedOperationException("Query " + this + " does not implement createWeight");
}
// in lucene/core/src/java/org/apache/lucene/search/Query.java
public Query rewrite(IndexReader reader) throws IOException {
return this;
}
// in lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
Override
public Query rewrite(IndexReader reader) throws IOException {
if (query != null) {
Query rewritten = query.rewrite(reader);
if (rewritten != query) {
rewritten = new ConstantScoreQuery(rewritten);
rewritten.setBoost(this.getBoost());
return rewritten;
}
}
return this;
}
// in lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
Override
public float getValueForNormalization() throws IOException {
// we calculate sumOfSquaredWeights of the inner weight, but ignore it (just to initialize everything)
if (innerWeight != null) innerWeight.getValueForNormalization();
queryWeight = getBoost();
return queryWeight * queryWeight;
}
// in lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
Override
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
boolean topScorer, final Bits acceptDocs) throws IOException {
final DocIdSetIterator disi;
if (filter != null) {
assert query == null;
final DocIdSet dis = filter.getDocIdSet(context, acceptDocs);
if (dis == null) {
return null;
}
disi = dis.iterator();
} else {
assert query != null && innerWeight != null;
disi = innerWeight.scorer(context, scoreDocsInOrder, topScorer, acceptDocs);
}
if (disi == null) {
return null;
}
return new ConstantScorer(disi, this, queryWeight);
}
// in lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
Override
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
final Scorer cs = scorer(context, true, false, context.reader().getLiveDocs());
final boolean exists = (cs != null && cs.advance(doc) == doc);
final ComplexExplanation result = new ComplexExplanation();
if (exists) {
result.setDescription(ConstantScoreQuery.this.toString() + ", product of:");
result.setValue(queryWeight);
result.setMatch(Boolean.TRUE);
result.addDetail(new Explanation(getBoost(), "boost"));
result.addDetail(new Explanation(queryNorm, "queryNorm"));
} else {
result.setDescription(ConstantScoreQuery.this.toString() + " doesn't match id " + doc);
result.setValue(0);
result.setMatch(Boolean.FALSE);
}
return result;
}
// in lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
Override
public int nextDoc() throws IOException {
return docIdSetIterator.nextDoc();
}
// in lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
Override
public float score() throws IOException {
return theScore;
}
// in lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
Override
public int advance(int target) throws IOException {
return docIdSetIterator.advance(target);
}
// in lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
private Collector wrapCollector(final Collector collector) {
return new Collector() {
@Override
public void setScorer(Scorer scorer) throws IOException {
// we must wrap again here, but using the scorer passed in as parameter:
collector.setScorer(new ConstantScorer(scorer, ConstantScorer.this.weight, ConstantScorer.this.theScore));
}
@Override
public void collect(int doc) throws IOException {
collector.collect(doc);
}
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {
collector.setNextReader(context);
}
@Override
public boolean acceptsDocsOutOfOrder() {
return collector.acceptsDocsOutOfOrder();
}
};
}
// in lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
Override
public void setScorer(Scorer scorer) throws IOException {
// we must wrap again here, but using the scorer passed in as parameter:
collector.setScorer(new ConstantScorer(scorer, ConstantScorer.this.weight, ConstantScorer.this.theScore));
}
// in lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
Override
public void collect(int doc) throws IOException {
collector.collect(doc);
}
// in lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
Override
public void setNextReader(AtomicReaderContext context) throws IOException {
collector.setNextReader(context);
}
// in lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
Override
public void score(Collector collector) throws IOException {
if (docIdSetIterator instanceof Scorer) {
((Scorer) docIdSetIterator).score(wrapCollector(collector));
} else {
super.score(collector);
}
}
// in lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
Override
public boolean score(Collector collector, int max, int firstDocID) throws IOException {
if (docIdSetIterator instanceof Scorer) {
return ((Scorer) docIdSetIterator).score(wrapCollector(collector), max, firstDocID);
} else {
return super.score(collector, max, firstDocID);
}
}
// in lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
Override
public Weight createWeight(IndexSearcher searcher) throws IOException {
return new ConstantScoreQuery.ConstantWeight(searcher);
}
// in lucene/core/src/java/org/apache/lucene/search/similarities/MultiSimilarity.java
Override
public ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
ExactSimScorer subScorers[] = new ExactSimScorer[sims.length];
for (int i = 0; i < subScorers.length; i++) {
subScorers[i] = sims[i].exactSimScorer(((MultiStats)stats).subStats[i], context);
}
return new MultiExactDocScorer(subScorers);
}
// in lucene/core/src/java/org/apache/lucene/search/similarities/MultiSimilarity.java
Override
public SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
SloppySimScorer subScorers[] = new SloppySimScorer[sims.length];
for (int i = 0; i < subScorers.length; i++) {
subScorers[i] = sims[i].sloppySimScorer(((MultiStats)stats).subStats[i], context);
}
return new MultiSloppyDocScorer(subScorers);
}
// in lucene/core/src/java/org/apache/lucene/search/similarities/SimilarityBase.java
Override
public ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
if (stats instanceof MultiSimilarity.MultiStats) {
// a multi term query (e.g. phrase). return the summation,
// scoring almost as if it were boolean query
SimWeight subStats[] = ((MultiSimilarity.MultiStats) stats).subStats;
ExactSimScorer subScorers[] = new ExactSimScorer[subStats.length];
for (int i = 0; i < subScorers.length; i++) {
BasicStats basicstats = (BasicStats) subStats[i];
subScorers[i] = new BasicExactDocScorer(basicstats, context.reader().normValues(basicstats.field));
}
return new MultiSimilarity.MultiExactDocScorer(subScorers);
} else {
BasicStats basicstats = (BasicStats) stats;
return new BasicExactDocScorer(basicstats, context.reader().normValues(basicstats.field));
}
}
// in lucene/core/src/java/org/apache/lucene/search/similarities/SimilarityBase.java
Override
public SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
if (stats instanceof MultiSimilarity.MultiStats) {
// a multi term query (e.g. phrase). return the summation,
// scoring almost as if it were boolean query
SimWeight subStats[] = ((MultiSimilarity.MultiStats) stats).subStats;
SloppySimScorer subScorers[] = new SloppySimScorer[subStats.length];
for (int i = 0; i < subScorers.length; i++) {
BasicStats basicstats = (BasicStats) subStats[i];
subScorers[i] = new BasicSloppyDocScorer(basicstats, context.reader().normValues(basicstats.field));
}
return new MultiSimilarity.MultiSloppyDocScorer(subScorers);
} else {
BasicStats basicstats = (BasicStats) stats;
return new BasicSloppyDocScorer(basicstats, context.reader().normValues(basicstats.field));
}
}
// in lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java
Override
public final ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
IDFStats idfstats = (IDFStats) stats;
return new ExactTFIDFDocScorer(idfstats, context.reader().normValues(idfstats.field));
}
// in lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java
Override
public final SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
IDFStats idfstats = (IDFStats) stats;
return new SloppyTFIDFDocScorer(idfstats, context.reader().normValues(idfstats.field));
}
// in lucene/core/src/java/org/apache/lucene/search/similarities/BM25Similarity.java
Override
public final ExactSimScorer exactSimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
BM25Stats bm25stats = (BM25Stats) stats;
final DocValues norms = context.reader().normValues(bm25stats.field);
return norms == null
? new ExactBM25DocScorerNoNorms(bm25stats)
: new ExactBM25DocScorer(bm25stats, norms);
}
// in lucene/core/src/java/org/apache/lucene/search/similarities/BM25Similarity.java
Override
public final SloppySimScorer sloppySimScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
BM25Stats bm25stats = (BM25Stats) stats;
return new SloppyBM25DocScorer(bm25stats, context.reader().normValues(bm25stats.field));
}
// in lucene/core/src/java/org/apache/lucene/search/similarities/PerFieldSimilarityWrapper.java
Override
public final ExactSimScorer exactSimScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
PerFieldSimWeight perFieldWeight = (PerFieldSimWeight) weight;
return perFieldWeight.delegate.exactSimScorer(perFieldWeight.delegateWeight, context);
}
// in lucene/core/src/java/org/apache/lucene/search/similarities/PerFieldSimilarityWrapper.java
Override
public final SloppySimScorer sloppySimScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
PerFieldSimWeight perFieldWeight = (PerFieldSimWeight) weight;
return perFieldWeight.delegate.sloppySimScorer(perFieldWeight.delegateWeight, context);
}
// in lucene/core/src/java/org/apache/lucene/search/TermScorer.java
Override
public float freq() throws IOException {
return docsEnum.freq();
}
// in lucene/core/src/java/org/apache/lucene/search/TermScorer.java
Override
public int nextDoc() throws IOException {
return docsEnum.nextDoc();
}
// in lucene/core/src/java/org/apache/lucene/search/TermScorer.java
Override
public float score() throws IOException {
assert docID() != NO_MORE_DOCS;
return docScorer.score(docsEnum.docID(), docsEnum.freq());
}
// in lucene/core/src/java/org/apache/lucene/search/TermScorer.java
Override
public int advance(int target) throws IOException {
return docsEnum.advance(target);
}
// in lucene/core/src/java/org/apache/lucene/search/spans/SpanPayloadCheckQuery.java
Override
protected AcceptStatus acceptPosition(Spans spans) throws IOException {
boolean result = spans.isPayloadAvailable();
if (result == true){
Collection<byte[]> candidate = spans.getPayload();
if (candidate.size() == payloadToMatch.size()){
//TODO: check the byte arrays are the same
Iterator<byte[]> toMatchIter = payloadToMatch.iterator();
//check each of the byte arrays, in order
//hmm, can't rely on order here
for (byte[] candBytes : candidate) {
//if one is a mismatch, then return false
if (Arrays.equals(candBytes, toMatchIter.next()) == false){
return AcceptStatus.NO;
}
}
//we've verified all the bytes
return AcceptStatus.YES;
} else {
return AcceptStatus.NO;
}
}
return AcceptStatus.YES;
}
// in lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java
Override
public boolean next() throws IOException {
return adjust(spans.next());
}
// in lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java
Override
public boolean skipTo(int target) throws IOException {
return adjust(spans.skipTo(target));
}
// in lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java
Override
public Collection<byte[]> getPayload() throws IOException {
return new ArrayList<byte[]>(spans.getPayload());
}
// in lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java
Override
public boolean next() throws IOException {
if (firstTime) {
initList(true);
listToQueue(); // initialize queue
firstTime = false;
} else if (more) {
if (min().next()) { // trigger further scanning
queue.updateTop(); // maintain queue
} else {
more = false;
}
}
while (more) {
boolean queueStale = false;
if (min().doc() != max.doc()) { // maintain list
queueToList();
queueStale = true;
}
// skip to doc w/ all clauses
while (more && first.doc() < last.doc()) {
more = first.skipTo(last.doc()); // skip first upto last
firstToLast(); // and move it to the end
queueStale = true;
}
if (!more) return false;
// found doc w/ all clauses
if (queueStale) { // maintain the queue
listToQueue();
queueStale = false;
}
if (atMatch()) {
return true;
}
more = min().next();
if (more) {
queue.updateTop(); // maintain queue
}
}
return false; // no more matches
}
// in lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java
Override
public boolean skipTo(int target) throws IOException {
if (firstTime) { // initialize
initList(false);
for (SpansCell cell = first; more && cell!=null; cell=cell.next) {
more = cell.skipTo(target); // skip all
}
if (more) {
listToQueue();
}
firstTime = false;
} else { // normal case
while (more && min().doc() < target) { // skip as needed
if (min().skipTo(target)) {
queue.updateTop();
} else {
more = false;
}
}
}
return more && (atMatch() || next());
}
// in lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java
Override
public Collection<byte[]> getPayload() throws IOException {
Set<byte[]> matchPayload = new HashSet<byte[]>();
for (SpansCell cell = first; cell != null; cell = cell.next) {
if (cell.isPayloadAvailable()) {
matchPayload.addAll(cell.getPayload());
}
}
return matchPayload;
}
// in lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java
private void initList(boolean next) throws IOException {
for (int i = 0; more && i < ordered.size(); i++) {
SpansCell cell = ordered.get(i);
if (next)
more = cell.next(); // move to first entry
if (more) {
addToList(cell); // add to list
}
}
}
// in lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java
private void addToList(SpansCell cell) throws IOException {
if (last != null) { // add next to end of list
last.next = cell;
} else
first = cell;
last = cell;
cell.next = null;
}
// in lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java
private void queueToList() throws IOException {
last = first = null;
while (queue.top() != null) {
addToList(queue.pop());
}
}
// in lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java
Override
public Query rewrite(IndexReader reader) throws IOException {
SpanOrQuery clone = null;
for (int i = 0 ; i < clauses.size(); i++) {
SpanQuery c = clauses.get(i);
SpanQuery query = (SpanQuery) c.rewrite(reader);
if (query != c) { // clause rewrote: must clone
if (clone == null)
clone = this.clone();
clone.clauses.set(i,query);
}
}
if (clone != null) {
return clone; // some clauses rewrote
} else {
return this; // no clauses rewrote
}
}
// in lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java
Override
public Spans getSpans(final AtomicReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts) throws IOException {
if (clauses.size() == 1) // optimize 1-clause case
return (clauses.get(0)).getSpans(context, acceptDocs, termContexts);
return new Spans() {
private SpanQueue queue = null;
private boolean initSpanQueue(int target) throws IOException {
queue = new SpanQueue(clauses.size());
Iterator<SpanQuery> i = clauses.iterator();
while (i.hasNext()) {
Spans spans = i.next().getSpans(context, acceptDocs, termContexts);
if ( ((target == -1) && spans.next())
|| ((target != -1) && spans.skipTo(target))) {
queue.add(spans);
}
}
return queue.size() != 0;
}
@Override
public boolean next() throws IOException {
if (queue == null) {
return initSpanQueue(-1);
}
if (queue.size() == 0) { // all done
return false;
}
if (top().next()) { // move to next
queue.updateTop();
return true;
}
queue.pop(); // exhausted a clause
return queue.size() != 0;
}
private Spans top() { return queue.top(); }
@Override
public boolean skipTo(int target) throws IOException {
if (queue == null) {
return initSpanQueue(target);
}
boolean skipCalled = false;
while (queue.size() != 0 && top().doc() < target) {
if (top().skipTo(target)) {
queue.updateTop();
} else {
queue.pop();
}
skipCalled = true;
}
if (skipCalled) {
return queue.size() != 0;
}
return next();
}
@Override
public int doc() { return top().doc(); }
@Override
public int start() { return top().start(); }
@Override
public int end() { return top().end(); }
@Override
public Collection<byte[]> getPayload() throws IOException {
ArrayList<byte[]> result = null;
Spans theTop = top();
if (theTop != null && theTop.isPayloadAvailable()) {
result = new ArrayList<byte[]>(theTop.getPayload());
}
return result;
}
@Override
public boolean isPayloadAvailable() {
Spans top = top();
return top != null && top.isPayloadAvailable();
}
@Override
public String toString() {
return "spans("+SpanOrQuery.this+")@"+
((queue == null)?"START"
:(queue.size()>0?(doc()+":"+start()+"-"+end()):"END"));
}
};
}
// in lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java
private boolean initSpanQueue(int target) throws IOException {
queue = new SpanQueue(clauses.size());
Iterator<SpanQuery> i = clauses.iterator();
while (i.hasNext()) {
Spans spans = i.next().getSpans(context, acceptDocs, termContexts);
if ( ((target == -1) && spans.next())
|| ((target != -1) && spans.skipTo(target))) {
queue.add(spans);
}
}
return queue.size() != 0;
}
// in lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java
Override
public boolean next() throws IOException {
if (queue == null) {
return initSpanQueue(-1);
}
if (queue.size() == 0) { // all done
return false;
}
if (top().next()) { // move to next
queue.updateTop();
return true;
}
queue.pop(); // exhausted a clause
return queue.size() != 0;
}
// in lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java
Override
public boolean skipTo(int target) throws IOException {
if (queue == null) {
return initSpanQueue(target);
}
boolean skipCalled = false;
while (queue.size() != 0 && top().doc() < target) {
if (top().skipTo(target)) {
queue.updateTop();
} else {
queue.pop();
}
skipCalled = true;
}
if (skipCalled) {
return queue.size() != 0;
}
return next();
}
// in lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java
Override
public Collection<byte[]> getPayload() throws IOException {
ArrayList<byte[]> result = null;
Spans theTop = top();
if (theTop != null && theTop.isPayloadAvailable()) {
result = new ArrayList<byte[]>(theTop.getPayload());
}
return result;
}
// in lucene/core/src/java/org/apache/lucene/search/spans/SpanQuery.java
Override
public Weight createWeight(IndexSearcher searcher) throws IOException {
return new SpanWeight(this, searcher);
}
// in lucene/core/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java
Override
public Spans getSpans(AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
throw new UnsupportedOperationException("Query should have been rewritten");
}
// in lucene/core/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java
Override
public Query rewrite(IndexReader reader) throws IOException {
final Query q = query.rewrite(reader);
if (!(q instanceof SpanQuery))
throw new UnsupportedOperationException("You can only use SpanMultiTermQueryWrapper with a suitable SpanRewriteMethod.");
return q;
}
// in lucene/core/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java
Override
public SpanQuery rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
return delegate.rewrite(reader, query);
}
// in lucene/core/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java
Override
public SpanQuery rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
return delegate.rewrite(reader, query);
}
// in lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java
Override
public Spans getSpans(final AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
return new PositionCheckSpan(context, acceptDocs, termContexts);
}
// in lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java
Override
public Query rewrite(IndexReader reader) throws IOException {
SpanPositionCheckQuery clone = null;
SpanQuery rewritten = (SpanQuery) match.rewrite(reader);
if (rewritten != match) {
clone = (SpanPositionCheckQuery) this.clone();
clone.match = rewritten;
}
if (clone != null) {
return clone; // some clauses rewrote
} else {
return this; // no clauses rewrote
}
}
// in lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java
Override
public boolean next() throws IOException {
if (!spans.next())
return false;
return doNext();
}
// in lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java
Override
public boolean skipTo(int target) throws IOException {
if (!spans.skipTo(target))
return false;
return doNext();
}
// in lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java
protected boolean doNext() throws IOException {
for (;;) {
switch(acceptPosition(this)) {
case YES: return true;
case NO:
if (!spans.next())
return false;
break;
case NO_AND_ADVANCE:
if (!spans.skipTo(spans.doc()+1))
return false;
break;
}
}
}
// in lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java
Override
public Collection<byte[]> getPayload() throws IOException {
ArrayList<byte[]> result = null;
if (spans.isPayloadAvailable()) {
result = new ArrayList<byte[]>(spans.getPayload());
}
return result;//TODO: any way to avoid the new construction?
}
// in lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionRangeQuery.java
Override
protected AcceptStatus acceptPosition(Spans spans) throws IOException {
assert spans.start() != spans.end();
if (spans.start() >= end)
return AcceptStatus.NO_AND_ADVANCE;
else if (spans.start() >= start && spans.end() <= end)
return AcceptStatus.YES;
else
return AcceptStatus.NO;
}
// in lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java
Override
public Spans getSpans(final AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
if (clauses.size() == 0) // optimize 0-clause case
return new SpanOrQuery(getClauses()).getSpans(context, acceptDocs, termContexts);
if (clauses.size() == 1) // optimize 1-clause case
return clauses.get(0).getSpans(context, acceptDocs, termContexts);
return inOrder
? (Spans) new NearSpansOrdered(this, context, acceptDocs, termContexts, collectPayloads)
: (Spans) new NearSpansUnordered(this, context, acceptDocs, termContexts);
}
// in lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java
Override
public Query rewrite(IndexReader reader) throws IOException {
SpanNearQuery clone = null;
for (int i = 0 ; i < clauses.size(); i++) {
SpanQuery c = clauses.get(i);
SpanQuery query = (SpanQuery) c.rewrite(reader);
if (query != c) { // clause rewrote: must clone
if (clone == null)
clone = this.clone();
clone.clauses.set(i,query);
}
}
if (clone != null) {
return clone; // some clauses rewrote
} else {
return this; // no clauses rewrote
}
}
// in lucene/core/src/java/org/apache/lucene/search/spans/SpanNearPayloadCheckQuery.java
Override
protected AcceptStatus acceptPosition(Spans spans) throws IOException {
boolean result = spans.isPayloadAvailable();
if (result == true) {
Collection<byte[]> candidate = spans.getPayload();
if (candidate.size() == payloadToMatch.size()) {
//TODO: check the byte arrays are the same
//hmm, can't rely on order here
int matches = 0;
for (byte[] candBytes : candidate) {
//Unfortunately, we can't rely on order, so we need to compare all
for (byte[] payBytes : payloadToMatch) {
if (Arrays.equals(candBytes, payBytes) == true) {
matches++;
break;
}
}
}
if (matches == payloadToMatch.size()){
//we've verified all the bytes
return AcceptStatus.YES;
} else {
return AcceptStatus.NO;
}
} else {
return AcceptStatus.NO;
}
}
return AcceptStatus.NO;
}
// in lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java
Override
public int nextDoc() throws IOException {
if (!setFreqCurrentDoc()) {
doc = NO_MORE_DOCS;
}
return doc;
}
// in lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java
Override
public int advance(int target) throws IOException {
if (!more) {
return doc = NO_MORE_DOCS;
}
if (spans.doc() < target) { // setFreqCurrentDoc() leaves spans.doc() ahead
more = spans.skipTo(target);
}
if (!setFreqCurrentDoc()) {
doc = NO_MORE_DOCS;
}
return doc;
}
// in lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java
protected boolean setFreqCurrentDoc() throws IOException {
if (!more) {
return false;
}
doc = spans.doc();
freq = 0.0f;
do {
int matchLength = spans.end() - spans.start();
freq += docScorer.computeSlopFactor(matchLength);
more = spans.next();
} while (more && (doc == spans.doc()));
return true;
}
// in lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java
Override
public float score() throws IOException {
return docScorer.score(doc, freq);
}
// in lucene/core/src/java/org/apache/lucene/search/spans/SpanScorer.java
Override
public float freq() throws IOException {
return freq;
}
// in lucene/core/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java
Override
public Spans getSpans(AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
return maskedQuery.getSpans(context, acceptDocs, termContexts);
}
// in lucene/core/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java
Override
public Weight createWeight(IndexSearcher searcher) throws IOException {
return maskedQuery.createWeight(searcher);
}
// in lucene/core/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java
Override
public Query rewrite(IndexReader reader) throws IOException {
FieldMaskingSpanQuery clone = null;
SpanQuery rewritten = (SpanQuery) maskedQuery.rewrite(reader);
if (rewritten != maskedQuery) {
clone = (FieldMaskingSpanQuery) this.clone();
clone.maskedQuery = rewritten;
}
if (clone != null) {
return clone;
} else {
return this;
}
}
// in lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java
Override
public float getValueForNormalization() throws IOException {
return stats == null ? 1.0f : stats.getValueForNormalization();
}
// in lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java
Override
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException {
if (stats == null) {
return null;
} else {
return new SpanScorer(query.getSpans(context, acceptDocs, termContexts), this, similarity.sloppySimScorer(stats, context));
}
}
// in lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java
Override
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
Scorer scorer = scorer(context, true, false, context.reader().getLiveDocs());
if (scorer != null) {
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
float freq = scorer.freq();
SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
ComplexExplanation result = new ComplexExplanation();
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
result.addDetail(scoreExplanation);
result.setValue(scoreExplanation.getValue());
result.setMatch(true);
return result;
}
}
return new ComplexExplanation(false, 0.0f, "no matching term");
}
// in lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java
Override
public Spans getSpans(final AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
TermContext termContext = termContexts.get(term);
final TermState state;
if (termContext == null) {
// this happens with span-not query, as it doesn't include the NOT side in extractTerms()
// so we seek to the term now in this segment..., this sucks because its ugly mostly!
final Fields fields = context.reader().fields();
if (fields != null) {
final Terms terms = fields.terms(term.field());
if (terms != null) {
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(term.bytes(), true)) {
state = termsEnum.termState();
} else {
state = null;
}
} else {
state = null;
}
} else {
state = null;
}
} else {
state = termContext.get(context.ord);
}
if (state == null) { // term is not present in that reader
return TermSpans.EMPTY_TERM_SPANS;
}
final TermsEnum termsEnum = context.reader().terms(term.field()).iterator(null);
termsEnum.seekExact(term.bytes(), state);
final DocsAndPositionsEnum postings = termsEnum.docsAndPositions(acceptDocs, null, false);
if (postings != null) {
return new TermSpans(postings, term);
} else {
// term does exist, but has no positions
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run SpanTermQuery (term=" + term.text() + ")");
}
}
// in lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java
Override
public Collection<byte[]> getPayload() throws IOException {
return matchPayload;
}
// in lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java
Override
public boolean next() throws IOException {
if (firstTime) {
firstTime = false;
for (int i = 0; i < subSpans.length; i++) {
if (! subSpans[i].next()) {
more = false;
return false;
}
}
more = true;
}
if(collectPayloads) {
matchPayload.clear();
}
return advanceAfterOrdered();
}
// in lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java
Override
public boolean skipTo(int target) throws IOException {
if (firstTime) {
firstTime = false;
for (int i = 0; i < subSpans.length; i++) {
if (! subSpans[i].skipTo(target)) {
more = false;
return false;
}
}
more = true;
} else if (more && (subSpans[0].doc() < target)) {
if (subSpans[0].skipTo(target)) {
inSameDoc = false;
} else {
more = false;
return false;
}
}
if(collectPayloads) {
matchPayload.clear();
}
return advanceAfterOrdered();
}
// in lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java
private boolean advanceAfterOrdered() throws IOException {
while (more && (inSameDoc || toSameDoc())) {
if (stretchToOrder() && shrinkToAfterShortestMatch()) {
return true;
}
}
return false; // no more matches
}
// in lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java
private boolean toSameDoc() throws IOException {
ArrayUtil.mergeSort(subSpansByDoc, spanDocComparator);
int firstIndex = 0;
int maxDoc = subSpansByDoc[subSpansByDoc.length - 1].doc();
while (subSpansByDoc[firstIndex].doc() != maxDoc) {
if (! subSpansByDoc[firstIndex].skipTo(maxDoc)) {
more = false;
inSameDoc = false;
return false;
}
maxDoc = subSpansByDoc[firstIndex].doc();
if (++firstIndex == subSpansByDoc.length) {
firstIndex = 0;
}
}
for (int i = 0; i < subSpansByDoc.length; i++) {
assert (subSpansByDoc[i].doc() == maxDoc)
: " NearSpansOrdered.toSameDoc() spans " + subSpansByDoc[0]
+ "\n at doc " + subSpansByDoc[i].doc()
+ ", but should be at " + maxDoc;
}
inSameDoc = true;
return true;
}
// in lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java
private boolean stretchToOrder() throws IOException {
matchDoc = subSpans[0].doc();
for (int i = 1; inSameDoc && (i < subSpans.length); i++) {
while (! docSpansOrdered(subSpans[i-1], subSpans[i])) {
if (! subSpans[i].next()) {
inSameDoc = false;
more = false;
break;
} else if (matchDoc != subSpans[i].doc()) {
inSameDoc = false;
break;
}
}
}
return inSameDoc;
}
// in lucene/core/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java
private boolean shrinkToAfterShortestMatch() throws IOException {
matchStart = subSpans[subSpans.length - 1].start();
matchEnd = subSpans[subSpans.length - 1].end();
Set<byte[]> possibleMatchPayloads = new HashSet<byte[]>();
if (subSpans[subSpans.length - 1].isPayloadAvailable()) {
possibleMatchPayloads.addAll(subSpans[subSpans.length - 1].getPayload());
}
Collection<byte[]> possiblePayload = null;
int matchSlop = 0;
int lastStart = matchStart;
int lastEnd = matchEnd;
for (int i = subSpans.length - 2; i >= 0; i--) {
Spans prevSpans = subSpans[i];
if (collectPayloads && prevSpans.isPayloadAvailable()) {
Collection<byte[]> payload = prevSpans.getPayload();
possiblePayload = new ArrayList<byte[]>(payload.size());
possiblePayload.addAll(payload);
}
int prevStart = prevSpans.start();
int prevEnd = prevSpans.end();
while (true) { // Advance prevSpans until after (lastStart, lastEnd)
if (! prevSpans.next()) {
inSameDoc = false;
more = false;
break; // Check remaining subSpans for final match.
} else if (matchDoc != prevSpans.doc()) {
inSameDoc = false; // The last subSpans is not advanced here.
break; // Check remaining subSpans for last match in this document.
} else {
int ppStart = prevSpans.start();
int ppEnd = prevSpans.end(); // Cannot avoid invoking .end()
if (! docSpansOrdered(ppStart, ppEnd, lastStart, lastEnd)) {
break; // Check remaining subSpans.
} else { // prevSpans still before (lastStart, lastEnd)
prevStart = ppStart;
prevEnd = ppEnd;
if (collectPayloads && prevSpans.isPayloadAvailable()) {
Collection<byte[]> payload = prevSpans.getPayload();
possiblePayload = new ArrayList<byte[]>(payload.size());
possiblePayload.addAll(payload);
}
}
}
}
if (collectPayloads && possiblePayload != null) {
possibleMatchPayloads.addAll(possiblePayload);
}
assert prevStart <= matchStart;
if (matchStart > prevEnd) { // Only non overlapping spans add to slop.
matchSlop += (matchStart - prevEnd);
}
/* Do not break on (matchSlop > allowedSlop) here to make sure
* that subSpans[0] is advanced after the match, if any.
*/
matchStart = prevStart;
lastStart = prevStart;
lastEnd = prevEnd;
}
boolean match = matchSlop <= allowedSlop;
if(collectPayloads && match && possibleMatchPayloads.size() > 0) {
matchPayload.addAll(possibleMatchPayloads);
}
return match; // ordered and allowed slop
}
// in lucene/core/src/java/org/apache/lucene/search/spans/SpanFirstQuery.java
Override
protected AcceptStatus acceptPosition(Spans spans) throws IOException {
assert spans.start() != spans.end() : "start equals end: " + spans.start();
if (spans.start() >= end)
return AcceptStatus.NO_AND_ADVANCE;
else if (spans.end() <= end)
return AcceptStatus.YES;
else
return AcceptStatus.NO;
}
// in lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java
Override
public Spans getSpans(final AtomicReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts) throws IOException {
return new Spans() {
private Spans includeSpans = include.getSpans(context, acceptDocs, termContexts);
private boolean moreInclude = true;
private Spans excludeSpans = exclude.getSpans(context, acceptDocs, termContexts);
private boolean moreExclude = excludeSpans.next();
@Override
public boolean next() throws IOException {
if (moreInclude) // move to next include
moreInclude = includeSpans.next();
while (moreInclude && moreExclude) {
if (includeSpans.doc() > excludeSpans.doc()) // skip exclude
moreExclude = excludeSpans.skipTo(includeSpans.doc());
while (moreExclude // while exclude is before
&& includeSpans.doc() == excludeSpans.doc()
&& excludeSpans.end() <= includeSpans.start()) {
moreExclude = excludeSpans.next(); // increment exclude
}
if (!moreExclude // if no intersection
|| includeSpans.doc() != excludeSpans.doc()
|| includeSpans.end() <= excludeSpans.start())
break; // we found a match
moreInclude = includeSpans.next(); // intersected: keep scanning
}
return moreInclude;
}
@Override
public boolean skipTo(int target) throws IOException {
if (moreInclude) // skip include
moreInclude = includeSpans.skipTo(target);
if (!moreInclude)
return false;
if (moreExclude // skip exclude
&& includeSpans.doc() > excludeSpans.doc())
moreExclude = excludeSpans.skipTo(includeSpans.doc());
while (moreExclude // while exclude is before
&& includeSpans.doc() == excludeSpans.doc()
&& excludeSpans.end() <= includeSpans.start()) {
moreExclude = excludeSpans.next(); // increment exclude
}
if (!moreExclude // if no intersection
|| includeSpans.doc() != excludeSpans.doc()
|| includeSpans.end() <= excludeSpans.start())
return true; // we found a match
return next(); // scan to next match
}
@Override
public int doc() { return includeSpans.doc(); }
@Override
public int start() { return includeSpans.start(); }
@Override
public int end() { return includeSpans.end(); }
// TODO: Remove warning after API has been finalized
@Override
public Collection<byte[]> getPayload() throws IOException {
ArrayList<byte[]> result = null;
if (includeSpans.isPayloadAvailable()) {
result = new ArrayList<byte[]>(includeSpans.getPayload());
}
return result;
}
// TODO: Remove warning after API has been finalized
@Override
public boolean isPayloadAvailable() {
return includeSpans.isPayloadAvailable();
}
@Override
public String toString() {
return "spans(" + SpanNotQuery.this.toString() + ")";
}
};
}
// in lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java
Override
public boolean next() throws IOException {
if (moreInclude) // move to next include
moreInclude = includeSpans.next();
while (moreInclude && moreExclude) {
if (includeSpans.doc() > excludeSpans.doc()) // skip exclude
moreExclude = excludeSpans.skipTo(includeSpans.doc());
while (moreExclude // while exclude is before
&& includeSpans.doc() == excludeSpans.doc()
&& excludeSpans.end() <= includeSpans.start()) {
moreExclude = excludeSpans.next(); // increment exclude
}
if (!moreExclude // if no intersection
|| includeSpans.doc() != excludeSpans.doc()
|| includeSpans.end() <= excludeSpans.start())
break; // we found a match
moreInclude = includeSpans.next(); // intersected: keep scanning
}
return moreInclude;
}
// in lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java
Override
public boolean skipTo(int target) throws IOException {
if (moreInclude) // skip include
moreInclude = includeSpans.skipTo(target);
if (!moreInclude)
return false;
if (moreExclude // skip exclude
&& includeSpans.doc() > excludeSpans.doc())
moreExclude = excludeSpans.skipTo(includeSpans.doc());
while (moreExclude // while exclude is before
&& includeSpans.doc() == excludeSpans.doc()
&& excludeSpans.end() <= includeSpans.start()) {
moreExclude = excludeSpans.next(); // increment exclude
}
if (!moreExclude // if no intersection
|| includeSpans.doc() != excludeSpans.doc()
|| includeSpans.end() <= excludeSpans.start())
return true; // we found a match
return next(); // scan to next match
}
// in lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java
Override
public Collection<byte[]> getPayload() throws IOException {
ArrayList<byte[]> result = null;
if (includeSpans.isPayloadAvailable()) {
result = new ArrayList<byte[]>(includeSpans.getPayload());
}
return result;
}
// in lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java
Override
public Query rewrite(IndexReader reader) throws IOException {
SpanNotQuery clone = null;
SpanQuery rewrittenInclude = (SpanQuery) include.rewrite(reader);
if (rewrittenInclude != include) {
clone = this.clone();
clone.include = rewrittenInclude;
}
SpanQuery rewrittenExclude = (SpanQuery) exclude.rewrite(reader);
if (rewrittenExclude != exclude) {
if (clone == null) clone = this.clone();
clone.exclude = rewrittenExclude;
}
if (clone != null) {
return clone; // some clauses rewrote
} else {
return this; // no clauses rewrote
}
}
// in lucene/core/src/java/org/apache/lucene/search/spans/TermSpans.java
Override
public boolean next() throws IOException {
if (count == freq) {
if (postings == null) {
return false;
}
doc = postings.nextDoc();
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
return false;
}
freq = postings.freq();
count = 0;
}
position = postings.nextPosition();
count++;
return true;
}
// in lucene/core/src/java/org/apache/lucene/search/spans/TermSpans.java
Override
public boolean skipTo(int target) throws IOException {
doc = postings.advance(target);
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
return false;
}
freq = postings.freq();
count = 0;
position = postings.nextPosition();
count++;
return true;
}
// in lucene/core/src/java/org/apache/lucene/search/spans/TermSpans.java
Override
public Collection<byte[]> getPayload() throws IOException {
final BytesRef payload = postings.getPayload();
final byte[] bytes;
if (payload != null) {
bytes = new byte[payload.length];
System.arraycopy(payload.bytes, payload.offset, bytes, 0, payload.length);
} else {
bytes = null;
}
return Collections.singletonList(bytes);
}
// in lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
Override
public float getValueForNormalization() throws IOException {
float sum = 0.0f;
for (int i = 0 ; i < weights.size(); i++) {
// call sumOfSquaredWeights for all clauses in case of side effects
float s = weights.get(i).getValueForNormalization(); // sum sub weights
if (!clauses.get(i).isProhibited())
// only add to sum for non-prohibited clauses
sum += s;
}
sum *= getBoost() * getBoost(); // boost each sub-weight
return sum ;
}
// in lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
Override
public Explanation explain(AtomicReaderContext context, int doc)
throws IOException {
final int minShouldMatch =
BooleanQuery.this.getMinimumNumberShouldMatch();
ComplexExplanation sumExpl = new ComplexExplanation();
sumExpl.setDescription("sum of:");
int coord = 0;
float sum = 0.0f;
boolean fail = false;
int shouldMatchCount = 0;
Iterator<BooleanClause> cIter = clauses.iterator();
for (Iterator<Weight> wIter = weights.iterator(); wIter.hasNext();) {
Weight w = wIter.next();
BooleanClause c = cIter.next();
if (w.scorer(context, true, true, context.reader().getLiveDocs()) == null) {
if (c.isRequired()) {
fail = true;
Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")");
sumExpl.addDetail(r);
}
continue;
}
Explanation e = w.explain(context, doc);
if (e.isMatch()) {
if (!c.isProhibited()) {
sumExpl.addDetail(e);
sum += e.getValue();
coord++;
} else {
Explanation r =
new Explanation(0.0f, "match on prohibited clause (" + c.getQuery().toString() + ")");
r.addDetail(e);
sumExpl.addDetail(r);
fail = true;
}
if (c.getOccur() == Occur.SHOULD)
shouldMatchCount++;
} else if (c.isRequired()) {
Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")");
r.addDetail(e);
sumExpl.addDetail(r);
fail = true;
}
}
if (fail) {
sumExpl.setMatch(Boolean.FALSE);
sumExpl.setValue(0.0f);
sumExpl.setDescription
("Failure to meet condition(s) of required/prohibited clause(s)");
return sumExpl;
} else if (shouldMatchCount < minShouldMatch) {
sumExpl.setMatch(Boolean.FALSE);
sumExpl.setValue(0.0f);
sumExpl.setDescription("Failure to match minimum number "+
"of optional clauses: " + minShouldMatch);
return sumExpl;
}
sumExpl.setMatch(0 < coord ? Boolean.TRUE : Boolean.FALSE);
sumExpl.setValue(sum);
final float coordFactor = disableCoord ? 1.0f : coord(coord, maxCoord);
if (coordFactor == 1.0f) {
return sumExpl; // eliminate wrapper
} else {
ComplexExplanation result = new ComplexExplanation(sumExpl.isMatch(),
sum*coordFactor,
"product of:");
result.addDetail(sumExpl);
result.addDetail(new Explanation(coordFactor,
"coord("+coord+"/"+maxCoord+")"));
return result;
}
}
// in lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
Override
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs)
throws IOException {
if (termConjunction) {
// specialized scorer for term conjunctions
return createConjunctionTermScorer(context, acceptDocs);
}
List<Scorer> required = new ArrayList<Scorer>();
List<Scorer> prohibited = new ArrayList<Scorer>();
List<Scorer> optional = new ArrayList<Scorer>();
Iterator<BooleanClause> cIter = clauses.iterator();
for (Weight w : weights) {
BooleanClause c = cIter.next();
Scorer subScorer = w.scorer(context, true, false, acceptDocs);
if (subScorer == null) {
if (c.isRequired()) {
return null;
}
} else if (c.isRequired()) {
required.add(subScorer);
} else if (c.isProhibited()) {
prohibited.add(subScorer);
} else {
optional.add(subScorer);
}
}
// Check if we can return a BooleanScorer
if (!scoreDocsInOrder && topScorer && required.size() == 0) {
return new BooleanScorer(this, disableCoord, minNrShouldMatch, optional, prohibited, maxCoord);
}
if (required.size() == 0 && optional.size() == 0) {
// no required and optional clauses.
return null;
} else if (optional.size() < minNrShouldMatch) {
// either >1 req scorer, or there are 0 req scorers and at least 1
// optional scorer. Therefore if there are not enough optional scorers
// no documents will be matched by the query
return null;
}
// Return a BooleanScorer2
return new BooleanScorer2(this, disableCoord, minNrShouldMatch, required, prohibited, optional, maxCoord);
}
// in lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
private Scorer createConjunctionTermScorer(AtomicReaderContext context, Bits acceptDocs)
throws IOException {
// TODO: fix scorer API to specify "needsScores" up
// front, so we can do match-only if caller doesn't
// needs scores
final DocsAndFreqs[] docsAndFreqs = new DocsAndFreqs[weights.size()];
for (int i = 0; i < docsAndFreqs.length; i++) {
final TermWeight weight = (TermWeight) weights.get(i);
final TermsEnum termsEnum = weight.getTermsEnum(context);
if (termsEnum == null) {
return null;
}
final ExactSimScorer docScorer = weight.createDocScorer(context);
final DocsEnum docsAndFreqsEnum = termsEnum.docs(acceptDocs, null, true);
if (docsAndFreqsEnum == null) {
// TODO: we could carry over TermState from the
// terms we already seek'd to, to save re-seeking
// to make the match-only scorer, but it's likely
// rare that BQ mixes terms from omitTf and
// non-omitTF fields:
// At least one sub cannot provide freqs; abort
// and fallback to full match-only scorer:
return createMatchOnlyConjunctionTermScorer(context, acceptDocs);
}
docsAndFreqs[i] = new DocsAndFreqs(docsAndFreqsEnum,
docsAndFreqsEnum,
termsEnum.docFreq(), docScorer);
}
return new ConjunctionTermScorer(this, disableCoord ? 1.0f : coord(
docsAndFreqs.length, docsAndFreqs.length), docsAndFreqs);
}
// in lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
private Scorer createMatchOnlyConjunctionTermScorer(AtomicReaderContext context, Bits acceptDocs)
throws IOException {
final DocsAndFreqs[] docsAndFreqs = new DocsAndFreqs[weights.size()];
for (int i = 0; i < docsAndFreqs.length; i++) {
final TermWeight weight = (TermWeight) weights.get(i);
final TermsEnum termsEnum = weight.getTermsEnum(context);
if (termsEnum == null) {
return null;
}
final ExactSimScorer docScorer = weight.createDocScorer(context);
docsAndFreqs[i] = new DocsAndFreqs(null,
termsEnum.docs(acceptDocs, null, false),
termsEnum.docFreq(), docScorer);
}
return new MatchOnlyConjunctionTermScorer(this, disableCoord ? 1.0f : coord(
docsAndFreqs.length, docsAndFreqs.length), docsAndFreqs);
}
// in lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
Override
public Weight createWeight(IndexSearcher searcher) throws IOException {
return new BooleanWeight(searcher, disableCoord);
}
// in lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
Override
public Query rewrite(IndexReader reader) throws IOException {
if (minNrShouldMatch == 0 && clauses.size() == 1) { // optimize 1-clause queries
BooleanClause c = clauses.get(0);
if (!c.isProhibited()) { // just return clause
Query query = c.getQuery().rewrite(reader); // rewrite first
if (getBoost() != 1.0f) { // incorporate boost
if (query == c.getQuery()) { // if rewrite was no-op
query = query.clone(); // then clone before boost
}
// Since the BooleanQuery only has 1 clause, the BooleanQuery will be
// written out. Therefore the rewritten Query's boost must incorporate both
// the clause's boost, and the boost of the BooleanQuery itself
query.setBoost(getBoost() * query.getBoost());
}
return query;
}
}
BooleanQuery clone = null; // recursively rewrite
for (int i = 0 ; i < clauses.size(); i++) {
BooleanClause c = clauses.get(i);
Query query = c.getQuery().rewrite(reader);
if (query != c.getQuery()) { // clause rewrote: must clone
if (clone == null) {
// The BooleanQuery clone is lazily initialized so only initialize
// it if a rewritten clause differs from the original clause (and hasn't been
// initialized already). If nothing differs, the clone isn't needlessly created
clone = this.clone();
}
clone.clauses.set(i, new BooleanClause(query, c.getOccur()));
}
}
if (clone != null) {
return clone; // some clauses rewrote
} else
return this; // no clauses rewrote
}
// in lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java
Override
public void collect(int doc) throws IOException {
float score = scorer.score();
// This collector cannot handle these scores:
assert score != Float.NEGATIVE_INFINITY;
assert !Float.isNaN(score);
totalHits++;
if (score <= pqTop.score) {
// Since docs are returned in-order (i.e., increasing doc Id), a document
// with equal score to pqTop.score cannot compete since HitQueue favors
// documents with lower doc Ids. Therefore reject those docs too.
return;
}
pqTop.doc = doc + docBase;
pqTop.score = score;
pqTop = pq.updateTop();
}
// in lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java
Override
public void collect(int doc) throws IOException {
float score = scorer.score();
// This collector cannot handle these scores:
assert score != Float.NEGATIVE_INFINITY;
assert !Float.isNaN(score);
totalHits++;
if (score > after.score || (score == after.score && doc <= afterDoc)) {
// hit was collected on a previous page
return;
}
if (score <= pqTop.score) {
// Since docs are returned in-order (i.e., increasing doc Id), a document
// with equal score to pqTop.score cannot compete since HitQueue favors
// documents with lower doc Ids. Therefore reject those docs too.
return;
}
collectedHits++;
pqTop.doc = doc + docBase;
pqTop.score = score;
pqTop = pq.updateTop();
}
// in lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java
Override
public void collect(int doc) throws IOException {
float score = scorer.score();
// This collector cannot handle NaN
assert !Float.isNaN(score);
totalHits++;
if (score < pqTop.score) {
// Doesn't compete w/ bottom entry in queue
return;
}
doc += docBase;
if (score == pqTop.score && doc > pqTop.doc) {
// Break tie in score by doc ID:
return;
}
pqTop.doc = doc;
pqTop.score = score;
pqTop = pq.updateTop();
}
// in lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java
Override
public void collect(int doc) throws IOException {
float score = scorer.score();
// This collector cannot handle NaN
assert !Float.isNaN(score);
totalHits++;
if (score > after.score || (score == after.score && doc <= afterDoc)) {
// hit was collected on a previous page
return;
}
if (score < pqTop.score) {
// Doesn't compete w/ bottom entry in queue
return;
}
doc += docBase;
if (score == pqTop.score && doc > pqTop.doc) {
// Break tie in score by doc ID:
return;
}
collectedHits++;
pqTop.doc = doc;
pqTop.score = score;
pqTop = pq.updateTop();
}
// in lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java
Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheDocIdSet.java
Override
public final DocIdSetIterator iterator() throws IOException {
if (acceptDocs == null) {
// Specialization optimization disregard acceptDocs
return new DocIdSetIterator() {
private int doc = -1;
@Override
public int docID() {
return doc;
}
@Override
public int nextDoc() {
do {
doc++;
if (doc >= maxDoc) {
return doc = NO_MORE_DOCS;
}
} while (!matchDoc(doc));
return doc;
}
@Override
public int advance(int target) {
for(doc=target; doc<maxDoc; doc++) {
if (matchDoc(doc)) {
return doc;
}
}
return doc = NO_MORE_DOCS;
}
};
} else if (acceptDocs instanceof FixedBitSet || acceptDocs instanceof OpenBitSet) {
// special case for FixedBitSet / OpenBitSet: use the iterator and filter it
// (used e.g. when Filters are chained by FilteredQuery)
return new FilteredDocIdSetIterator(((DocIdSet) acceptDocs).iterator()) {
@Override
protected boolean match(int doc) {
return FieldCacheDocIdSet.this.matchDoc(doc);
}
};
} else {
// Stupid consultation of acceptDocs and matchDoc()
return new DocIdSetIterator() {
private int doc = -1;
@Override
public int docID() {
return doc;
}
@Override
public int nextDoc() {
do {
doc++;
if (doc >= maxDoc) {
return doc = NO_MORE_DOCS;
}
} while (!(matchDoc(doc) && acceptDocs.get(doc)));
return doc;
}
@Override
public int advance(int target) {
for(doc=target; doc<maxDoc; doc++) {
if (matchDoc(doc) && acceptDocs.get(doc)) {
return doc;
}
}
return doc = NO_MORE_DOCS;
}
};
}
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
public Object get(AtomicReader reader, Entry key, boolean setDocsWithField) throws IOException {
Map<Entry,Object> innerCache;
Object value;
final Object readerKey = reader.getCoreCacheKey();
synchronized (readerCache) {
innerCache = readerCache.get(readerKey);
if (innerCache == null) {
// First time this reader is using FieldCache
innerCache = new HashMap<Entry,Object>();
readerCache.put(readerKey, innerCache);
wrapper.initReader(reader);
value = null;
} else {
value = innerCache.get(key);
}
if (value == null) {
value = new CreationPlaceholder();
innerCache.put(key, value);
}
}
if (value instanceof CreationPlaceholder) {
synchronized (value) {
CreationPlaceholder progress = (CreationPlaceholder) value;
if (progress.value == null) {
progress.value = createValue(reader, key, setDocsWithField);
synchronized (readerCache) {
innerCache.put(key, progress.value);
}
// Only check if key.custom (the parser) is
// non-null; else, we check twice for a single
// call to FieldCache.getXXX
if (key.custom != null && wrapper != null) {
final PrintStream infoStream = wrapper.getInfoStream();
if (infoStream != null) {
printNewInsanity(infoStream, progress.value);
}
}
}
return progress.value;
}
}
return value;
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
public byte[] getBytes (AtomicReader reader, String field, boolean setDocsWithField) throws IOException {
return getBytes(reader, field, null, setDocsWithField);
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
public byte[] getBytes(AtomicReader reader, String field, ByteParser parser, boolean setDocsWithField)
throws IOException {
return (byte[]) caches.get(Byte.TYPE).get(reader, new Entry(field, parser), setDocsWithField);
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
Override
protected Object createValue(AtomicReader reader, Entry entryKey, boolean setDocsWithField)
throws IOException {
String field = entryKey.field;
ByteParser parser = (ByteParser) entryKey.custom;
if (parser == null) {
return wrapper.getBytes(reader, field, FieldCache.DEFAULT_BYTE_PARSER, setDocsWithField);
}
final int maxDoc = reader.maxDoc();
final byte[] retArray = new byte[maxDoc];
Terms terms = reader.terms(field);
FixedBitSet docsWithField = null;
if (terms != null) {
if (setDocsWithField) {
final int termsDocCount = terms.getDocCount();
assert termsDocCount <= maxDoc;
if (termsDocCount == maxDoc) {
// Fast case: all docs have this field:
wrapper.setDocsWithField(reader, field, new Bits.MatchAllBits(maxDoc));
setDocsWithField = false;
}
}
final TermsEnum termsEnum = terms.iterator(null);
DocsEnum docs = null;
try {
while(true) {
final BytesRef term = termsEnum.next();
if (term == null) {
break;
}
final byte termval = parser.parseByte(term);
docs = termsEnum.docs(null, docs, false);
while (true) {
final int docID = docs.nextDoc();
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
retArray[docID] = termval;
if (setDocsWithField) {
if (docsWithField == null) {
// Lazy init
docsWithField = new FixedBitSet(maxDoc);
}
docsWithField.set(docID);
}
}
}
} catch (StopFillCacheException stop) {
}
}
if (setDocsWithField) {
wrapper.setDocsWithField(reader, field, docsWithField);
}
return retArray;
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
public short[] getShorts (AtomicReader reader, String field, boolean setDocsWithField) throws IOException {
return getShorts(reader, field, null, setDocsWithField);
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
public short[] getShorts(AtomicReader reader, String field, ShortParser parser, boolean setDocsWithField)
throws IOException {
return (short[]) caches.get(Short.TYPE).get(reader, new Entry(field, parser), setDocsWithField);
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
Override
protected Object createValue(AtomicReader reader, Entry entryKey, boolean setDocsWithField)
throws IOException {
String field = entryKey.field;
ShortParser parser = (ShortParser) entryKey.custom;
if (parser == null) {
return wrapper.getShorts(reader, field, FieldCache.DEFAULT_SHORT_PARSER, setDocsWithField);
}
final int maxDoc = reader.maxDoc();
final short[] retArray = new short[maxDoc];
Terms terms = reader.terms(field);
FixedBitSet docsWithField = null;
if (terms != null) {
if (setDocsWithField) {
final int termsDocCount = terms.getDocCount();
assert termsDocCount <= maxDoc;
if (termsDocCount == maxDoc) {
// Fast case: all docs have this field:
wrapper.setDocsWithField(reader, field, new Bits.MatchAllBits(maxDoc));
setDocsWithField = false;
}
}
final TermsEnum termsEnum = terms.iterator(null);
DocsEnum docs = null;
try {
while(true) {
final BytesRef term = termsEnum.next();
if (term == null) {
break;
}
final short termval = parser.parseShort(term);
docs = termsEnum.docs(null, docs, false);
while (true) {
final int docID = docs.nextDoc();
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
retArray[docID] = termval;
if (setDocsWithField) {
if (docsWithField == null) {
// Lazy init
docsWithField = new FixedBitSet(maxDoc);
}
docsWithField.set(docID);
}
}
}
} catch (StopFillCacheException stop) {
}
}
if (setDocsWithField) {
wrapper.setDocsWithField(reader, field, docsWithField);
}
return retArray;
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
public int[] getInts (AtomicReader reader, String field, boolean setDocsWithField) throws IOException {
return getInts(reader, field, null, setDocsWithField);
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
public int[] getInts(AtomicReader reader, String field, IntParser parser, boolean setDocsWithField)
throws IOException {
return (int[]) caches.get(Integer.TYPE).get(reader, new Entry(field, parser), setDocsWithField);
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
Override
protected Object createValue(AtomicReader reader, Entry entryKey, boolean setDocsWithField)
throws IOException {
String field = entryKey.field;
IntParser parser = (IntParser) entryKey.custom;
if (parser == null) {
try {
return wrapper.getInts(reader, field, DEFAULT_INT_PARSER, setDocsWithField);
} catch (NumberFormatException ne) {
return wrapper.getInts(reader, field, NUMERIC_UTILS_INT_PARSER, setDocsWithField);
}
}
final int maxDoc = reader.maxDoc();
int[] retArray = null;
Terms terms = reader.terms(field);
FixedBitSet docsWithField = null;
if (terms != null) {
if (setDocsWithField) {
final int termsDocCount = terms.getDocCount();
assert termsDocCount <= maxDoc;
if (termsDocCount == maxDoc) {
// Fast case: all docs have this field:
wrapper.setDocsWithField(reader, field, new Bits.MatchAllBits(maxDoc));
setDocsWithField = false;
}
}
final TermsEnum termsEnum = terms.iterator(null);
DocsEnum docs = null;
try {
while(true) {
final BytesRef term = termsEnum.next();
if (term == null) {
break;
}
final int termval = parser.parseInt(term);
if (retArray == null) {
// late init so numeric fields don't double allocate
retArray = new int[maxDoc];
}
docs = termsEnum.docs(null, docs, false);
while (true) {
final int docID = docs.nextDoc();
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
retArray[docID] = termval;
if (setDocsWithField) {
if (docsWithField == null) {
// Lazy init
docsWithField = new FixedBitSet(maxDoc);
}
docsWithField.set(docID);
}
}
}
} catch (StopFillCacheException stop) {
}
}
if (retArray == null) {
// no values
retArray = new int[maxDoc];
}
if (setDocsWithField) {
wrapper.setDocsWithField(reader, field, docsWithField);
}
return retArray;
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
public Bits getDocsWithField(AtomicReader reader, String field)
throws IOException {
return (Bits) caches.get(DocsWithFieldCache.class).get(reader, new Entry(field, null), false);
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
Override
protected Object createValue(AtomicReader reader, Entry entryKey, boolean setDocsWithField /* ignored */)
throws IOException {
final String field = entryKey.field;
FixedBitSet res = null;
Terms terms = reader.terms(field);
final int maxDoc = reader.maxDoc();
if (terms != null) {
final int termsDocCount = terms.getDocCount();
assert termsDocCount <= maxDoc;
if (termsDocCount == maxDoc) {
// Fast case: all docs have this field:
return new Bits.MatchAllBits(maxDoc);
}
final TermsEnum termsEnum = terms.iterator(null);
DocsEnum docs = null;
while(true) {
final BytesRef term = termsEnum.next();
if (term == null) {
break;
}
if (res == null) {
// lazy init
res = new FixedBitSet(maxDoc);
}
docs = termsEnum.docs(null, docs, false);
// TODO: use bulk API
while (true) {
final int docID = docs.nextDoc();
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
res.set(docID);
}
}
}
if (res == null) {
return new Bits.MatchNoBits(maxDoc);
}
final int numSet = res.cardinality();
if (numSet >= maxDoc) {
// The cardinality of the BitSet is maxDoc if all documents have a value.
assert numSet == maxDoc;
return new Bits.MatchAllBits(maxDoc);
}
return res;
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
public float[] getFloats (AtomicReader reader, String field, boolean setDocsWithField)
throws IOException {
return getFloats(reader, field, null, setDocsWithField);
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
public float[] getFloats(AtomicReader reader, String field, FloatParser parser, boolean setDocsWithField)
throws IOException {
return (float[]) caches.get(Float.TYPE).get(reader, new Entry(field, parser), setDocsWithField);
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
Override
protected Object createValue(AtomicReader reader, Entry entryKey, boolean setDocsWithField)
throws IOException {
String field = entryKey.field;
FloatParser parser = (FloatParser) entryKey.custom;
if (parser == null) {
try {
return wrapper.getFloats(reader, field, DEFAULT_FLOAT_PARSER, setDocsWithField);
} catch (NumberFormatException ne) {
return wrapper.getFloats(reader, field, NUMERIC_UTILS_FLOAT_PARSER, setDocsWithField);
}
}
final int maxDoc = reader.maxDoc();
float[] retArray = null;
Terms terms = reader.terms(field);
FixedBitSet docsWithField = null;
if (terms != null) {
if (setDocsWithField) {
final int termsDocCount = terms.getDocCount();
assert termsDocCount <= maxDoc;
if (termsDocCount == maxDoc) {
// Fast case: all docs have this field:
wrapper.setDocsWithField(reader, field, new Bits.MatchAllBits(maxDoc));
setDocsWithField = false;
}
}
final TermsEnum termsEnum = terms.iterator(null);
DocsEnum docs = null;
try {
while(true) {
final BytesRef term = termsEnum.next();
if (term == null) {
break;
}
final float termval = parser.parseFloat(term);
if (retArray == null) {
// late init so numeric fields don't double allocate
retArray = new float[maxDoc];
}
docs = termsEnum.docs(null, docs, false);
while (true) {
final int docID = docs.nextDoc();
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
retArray[docID] = termval;
if (setDocsWithField) {
if (docsWithField == null) {
// Lazy init
docsWithField = new FixedBitSet(maxDoc);
}
docsWithField.set(docID);
}
}
}
} catch (StopFillCacheException stop) {
}
}
if (retArray == null) {
// no values
retArray = new float[maxDoc];
}
if (setDocsWithField) {
wrapper.setDocsWithField(reader, field, docsWithField);
}
return retArray;
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
public long[] getLongs(AtomicReader reader, String field, boolean setDocsWithField) throws IOException {
return getLongs(reader, field, null, setDocsWithField);
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
public long[] getLongs(AtomicReader reader, String field, FieldCache.LongParser parser, boolean setDocsWithField)
throws IOException {
return (long[]) caches.get(Long.TYPE).get(reader, new Entry(field, parser), setDocsWithField);
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
Override
protected Object createValue(AtomicReader reader, Entry entryKey, boolean setDocsWithField)
throws IOException {
String field = entryKey.field;
FieldCache.LongParser parser = (FieldCache.LongParser) entryKey.custom;
if (parser == null) {
try {
return wrapper.getLongs(reader, field, DEFAULT_LONG_PARSER, setDocsWithField);
} catch (NumberFormatException ne) {
return wrapper.getLongs(reader, field, NUMERIC_UTILS_LONG_PARSER, setDocsWithField);
}
}
final int maxDoc = reader.maxDoc();
long[] retArray = null;
Terms terms = reader.terms(field);
FixedBitSet docsWithField = null;
if (terms != null) {
if (setDocsWithField) {
final int termsDocCount = terms.getDocCount();
assert termsDocCount <= maxDoc;
if (termsDocCount == maxDoc) {
// Fast case: all docs have this field:
wrapper.setDocsWithField(reader, field, new Bits.MatchAllBits(maxDoc));
setDocsWithField = false;
}
}
final TermsEnum termsEnum = terms.iterator(null);
DocsEnum docs = null;
try {
while(true) {
final BytesRef term = termsEnum.next();
if (term == null) {
break;
}
final long termval = parser.parseLong(term);
if (retArray == null) {
// late init so numeric fields don't double allocate
retArray = new long[maxDoc];
}
docs = termsEnum.docs(null, docs, false);
while (true) {
final int docID = docs.nextDoc();
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
retArray[docID] = termval;
if (setDocsWithField) {
if (docsWithField == null) {
// Lazy init
docsWithField = new FixedBitSet(maxDoc);
}
docsWithField.set(docID);
}
}
}
} catch (StopFillCacheException stop) {
}
}
if (retArray == null) {
// no values
retArray = new long[maxDoc];
}
if (setDocsWithField) {
wrapper.setDocsWithField(reader, field, docsWithField);
}
return retArray;
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
public double[] getDoubles(AtomicReader reader, String field, boolean setDocsWithField)
throws IOException {
return getDoubles(reader, field, null, setDocsWithField);
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
public double[] getDoubles(AtomicReader reader, String field, FieldCache.DoubleParser parser, boolean setDocsWithField)
throws IOException {
return (double[]) caches.get(Double.TYPE).get(reader, new Entry(field, parser), setDocsWithField);
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
Override
protected Object createValue(AtomicReader reader, Entry entryKey, boolean setDocsWithField)
throws IOException {
String field = entryKey.field;
FieldCache.DoubleParser parser = (FieldCache.DoubleParser) entryKey.custom;
if (parser == null) {
try {
return wrapper.getDoubles(reader, field, DEFAULT_DOUBLE_PARSER, setDocsWithField);
} catch (NumberFormatException ne) {
return wrapper.getDoubles(reader, field, NUMERIC_UTILS_DOUBLE_PARSER, setDocsWithField);
}
}
final int maxDoc = reader.maxDoc();
double[] retArray = null;
Terms terms = reader.terms(field);
FixedBitSet docsWithField = null;
if (terms != null) {
if (setDocsWithField) {
final int termsDocCount = terms.getDocCount();
assert termsDocCount <= maxDoc;
if (termsDocCount == maxDoc) {
// Fast case: all docs have this field:
wrapper.setDocsWithField(reader, field, new Bits.MatchAllBits(maxDoc));
setDocsWithField = false;
}
}
final TermsEnum termsEnum = terms.iterator(null);
DocsEnum docs = null;
try {
while(true) {
final BytesRef term = termsEnum.next();
if (term == null) {
break;
}
final double termval = parser.parseDouble(term);
if (retArray == null) {
// late init so numeric fields don't double allocate
retArray = new double[maxDoc];
}
docs = termsEnum.docs(null, docs, false);
while (true) {
final int docID = docs.nextDoc();
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
retArray[docID] = termval;
if (setDocsWithField) {
if (docsWithField == null) {
// Lazy init
docsWithField = new FixedBitSet(maxDoc);
}
docsWithField.set(docID);
}
}
}
} catch (StopFillCacheException stop) {
}
}
if (retArray == null) { // no values
retArray = new double[maxDoc];
}
if (setDocsWithField) {
wrapper.setDocsWithField(reader, field, docsWithField);
}
return retArray;
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
Override
public SeekStatus seekCeil(BytesRef text, boolean useCache /* ignored */) throws IOException {
int low = 1;
int high = numOrd-1;
while (low <= high) {
int mid = (low + high) >>> 1;
seekExact(mid);
int cmp = term.compareTo(text);
if (cmp < 0)
low = mid + 1;
else if (cmp > 0)
high = mid - 1;
else
return SeekStatus.FOUND; // key found
}
if (low == numOrd) {
return SeekStatus.END;
} else {
seekExact(low);
return SeekStatus.NOT_FOUND;
}
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
public void seekExact(long ord) throws IOException {
assert(ord >= 0 && ord <= numOrd);
// TODO: if gap is small, could iterate from current position? Or let user decide that?
currentBlockNumber = bytes.fillAndGetIndex(term, termOrdToBytesOffset.get((int)ord));
end = blockEnds[currentBlockNumber];
currentOrd = (int)ord;
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
Override
public BytesRef next() throws IOException {
int start = term.offset + term.length;
if (start >= end) {
// switch byte blocks
if (currentBlockNumber +1 >= blocks.length) {
return null;
}
currentBlockNumber++;
term.bytes = blocks[currentBlockNumber];
end = blockEnds[currentBlockNumber];
start = 0;
if (end<=0) return null; // special case of empty last array
}
currentOrd++;
byte[] block = term.bytes;
if ((block[start] & 128) == 0) {
term.length = block[start];
term.offset = start+1;
} else {
term.length = (((block[start] & 0x7f)) << 8) | (block[1+start] & 0xff);
term.offset = start+2;
}
return term;
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
Override
public BytesRef term() throws IOException {
return term;
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
Override
public long ord() throws IOException {
return currentOrd;
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
throw new UnsupportedOperationException();
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
Override
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
throw new UnsupportedOperationException();
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
Override
public void seekExact(BytesRef term, TermState state) throws IOException {
assert state != null && state instanceof OrdTermState;
this.seekExact(((OrdTermState)state).ord);
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
Override
public TermState termState() throws IOException {
OrdTermState state = new OrdTermState();
state.ord = currentOrd;
return state;
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
public DocTermsIndex getTermsIndex(AtomicReader reader, String field) throws IOException {
return getTermsIndex(reader, field, PackedInts.FAST);
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
public DocTermsIndex getTermsIndex(AtomicReader reader, String field, float acceptableOverheadRatio) throws IOException {
return (DocTermsIndex) caches.get(DocTermsIndex.class).get(reader, new Entry(field, acceptableOverheadRatio), false);
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
Override
protected Object createValue(AtomicReader reader, Entry entryKey, boolean setDocsWithField /* ignored */)
throws IOException {
Terms terms = reader.terms(entryKey.field);
final float acceptableOverheadRatio = ((Float) entryKey.custom).floatValue();
final PagedBytes bytes = new PagedBytes(15);
int startBytesBPV;
int startTermsBPV;
int startNumUniqueTerms;
int maxDoc = reader.maxDoc();
final int termCountHardLimit;
if (maxDoc == Integer.MAX_VALUE) {
termCountHardLimit = Integer.MAX_VALUE;
} else {
termCountHardLimit = maxDoc+1;
}
if (terms != null) {
// Try for coarse estimate for number of bits; this
// should be an underestimate most of the time, which
// is fine -- GrowableWriter will reallocate as needed
long numUniqueTerms = 0;
try {
numUniqueTerms = terms.size();
} catch (UnsupportedOperationException uoe) {
numUniqueTerms = -1;
}
if (numUniqueTerms != -1) {
if (numUniqueTerms > termCountHardLimit) {
// app is misusing the API (there is more than
// one term per doc); in this case we make best
// effort to load what we can (see LUCENE-2142)
numUniqueTerms = termCountHardLimit;
}
startBytesBPV = PackedInts.bitsRequired(numUniqueTerms*4);
startTermsBPV = PackedInts.bitsRequired(numUniqueTerms);
startNumUniqueTerms = (int) numUniqueTerms;
} else {
startBytesBPV = 1;
startTermsBPV = 1;
startNumUniqueTerms = 1;
}
} else {
startBytesBPV = 1;
startTermsBPV = 1;
startNumUniqueTerms = 1;
}
GrowableWriter termOrdToBytesOffset = new GrowableWriter(startBytesBPV, 1+startNumUniqueTerms, acceptableOverheadRatio);
final GrowableWriter docToTermOrd = new GrowableWriter(startTermsBPV, maxDoc, acceptableOverheadRatio);
// 0 is reserved for "unset"
bytes.copyUsingLengthPrefix(new BytesRef());
int termOrd = 1;
if (terms != null) {
final TermsEnum termsEnum = terms.iterator(null);
DocsEnum docs = null;
while(true) {
final BytesRef term = termsEnum.next();
if (term == null) {
break;
}
if (termOrd >= termCountHardLimit) {
break;
}
if (termOrd == termOrdToBytesOffset.size()) {
// NOTE: this code only runs if the incoming
// reader impl doesn't implement
// size (which should be uncommon)
termOrdToBytesOffset = termOrdToBytesOffset.resize(ArrayUtil.oversize(1+termOrd, 1));
}
termOrdToBytesOffset.set(termOrd, bytes.copyUsingLengthPrefix(term));
docs = termsEnum.docs(null, docs, false);
while (true) {
final int docID = docs.nextDoc();
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
docToTermOrd.set(docID, termOrd);
}
termOrd++;
}
if (termOrdToBytesOffset.size() > termOrd) {
termOrdToBytesOffset = termOrdToBytesOffset.resize(termOrd);
}
}
// maybe an int-only impl?
return new DocTermsIndexImpl(bytes.freeze(true), termOrdToBytesOffset.getMutable(), docToTermOrd.getMutable(), termOrd);
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
public DocTerms getTerms(AtomicReader reader, String field) throws IOException {
return getTerms(reader, field, PackedInts.FAST);
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
public DocTerms getTerms(AtomicReader reader, String field, float acceptableOverheadRatio) throws IOException {
return (DocTerms) caches.get(DocTerms.class).get(reader, new Entry(field, acceptableOverheadRatio), false);
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
Override
protected Object createValue(AtomicReader reader, Entry entryKey, boolean setDocsWithField /* ignored */)
throws IOException {
Terms terms = reader.terms(entryKey.field);
final float acceptableOverheadRatio = ((Float) entryKey.custom).floatValue();
final int termCountHardLimit = reader.maxDoc();
// Holds the actual term data, expanded.
final PagedBytes bytes = new PagedBytes(15);
int startBPV;
if (terms != null) {
// Try for coarse estimate for number of bits; this
// should be an underestimate most of the time, which
// is fine -- GrowableWriter will reallocate as needed
long numUniqueTerms = 0;
try {
numUniqueTerms = terms.size();
} catch (UnsupportedOperationException uoe) {
numUniqueTerms = -1;
}
if (numUniqueTerms != -1) {
if (numUniqueTerms > termCountHardLimit) {
numUniqueTerms = termCountHardLimit;
}
startBPV = PackedInts.bitsRequired(numUniqueTerms*4);
} else {
startBPV = 1;
}
} else {
startBPV = 1;
}
final GrowableWriter docToOffset = new GrowableWriter(startBPV, reader.maxDoc(), acceptableOverheadRatio);
// pointer==0 means not set
bytes.copyUsingLengthPrefix(new BytesRef());
if (terms != null) {
int termCount = 0;
final TermsEnum termsEnum = terms.iterator(null);
DocsEnum docs = null;
while(true) {
if (termCount++ == termCountHardLimit) {
// app is misusing the API (there is more than
// one term per doc); in this case we make best
// effort to load what we can (see LUCENE-2142)
break;
}
final BytesRef term = termsEnum.next();
if (term == null) {
break;
}
final long pointer = bytes.copyUsingLengthPrefix(term);
docs = termsEnum.docs(null, docs, false);
while (true) {
final int docID = docs.nextDoc();
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
docToOffset.set(docID, pointer);
}
}
}
// maybe an int-only impl?
return new DocTermsImpl(bytes.freeze(true), docToOffset.getMutable());
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
public DocTermOrds getDocTermOrds(AtomicReader reader, String field) throws IOException {
return (DocTermOrds) caches.get(DocTermOrds.class).get(reader, new Entry(field, null), false);
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java
Override
protected Object createValue(AtomicReader reader, Entry entryKey, boolean setDocsWithField /* ignored */)
throws IOException {
return new DocTermOrds(reader, entryKey.field);
}
// in lucene/core/src/java/org/apache/lucene/search/PhraseScorer.java
Override
public int nextDoc() throws IOException {
return advance(max.doc);
}
// in lucene/core/src/java/org/apache/lucene/search/PhraseScorer.java
Override
public float score() throws IOException {
return docScorer.score(max.doc, freq);
}
// in lucene/core/src/java/org/apache/lucene/search/PhraseScorer.java
private boolean advanceMin(int target) throws IOException {
if (!min.skipTo(target)) {
max.doc = NO_MORE_DOCS; // for further calls to docID()
return false;
}
min = min.next; // cyclic
max = max.next; // cyclic
return true;
}
// in lucene/core/src/java/org/apache/lucene/search/PhraseScorer.java
Override
public int advance(int target) throws IOException {
freq = 0.0f;
if (!advanceMin(target)) {
return NO_MORE_DOCS;
}
boolean restart=false;
while (freq == 0.0f) {
while (min.doc < max.doc || restart) {
restart = false;
if (!advanceMin(max.doc)) {
return NO_MORE_DOCS;
}
}
// found a doc with all of the terms
freq = phraseFreq(); // check for phrase
restart = true;
}
// found a match
return max.doc;
}
// in lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java
private int doNext() throws IOException {
int first = 0;
int doc = scorers[scorers.length - 1].docID();
Scorer firstScorer;
while ((firstScorer = scorers[first]).docID() < doc) {
doc = firstScorer.advance(doc);
first = first == scorers.length - 1 ? 0 : first + 1;
}
return doc;
}
// in lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java
Override
public int advance(int target) throws IOException {
if (lastDoc == NO_MORE_DOCS) {
return lastDoc;
} else if (scorers[(scorers.length - 1)].docID() < target) {
scorers[(scorers.length - 1)].advance(target);
}
return lastDoc = doNext();
}
// in lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java
Override
public int nextDoc() throws IOException {
if (lastDoc == NO_MORE_DOCS) {
return lastDoc;
} else if (lastDoc == -1) {
return lastDoc = scorers[scorers.length - 1].docID();
}
scorers[(scorers.length - 1)].nextDoc();
return lastDoc = doNext();
}
// in lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java
Override
public float score() throws IOException {
float sum = 0.0f;
for (int i = 0; i < scorers.length; i++) {
sum += scorers[i].score();
}
return sum * coord;
}
// in lucene/core/src/java/org/apache/lucene/search/PrefixQuery.java
Override
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
TermsEnum tenum = terms.iterator(null);
if (prefix.bytes().length == 0) {
// no prefix -- match all terms for this field:
return tenum;
}
return new PrefixTermsEnum(tenum, prefix.bytes());
}
// in lucene/core/src/java/org/apache/lucene/search/NRTManager.java
Override
protected void decRef(IndexSearcher reference) throws IOException {
reference.getIndexReader().decRef();
}
// in lucene/core/src/java/org/apache/lucene/search/NRTManager.java
public long updateDocument(Term t, Iterable<? extends IndexableField> d, Analyzer a) throws IOException {
writer.updateDocument(t, d, a);
// Return gen as of when indexing finished:
return indexingGen.get();
}
// in lucene/core/src/java/org/apache/lucene/search/NRTManager.java
public long updateDocument(Term t, Iterable<? extends IndexableField> d) throws IOException {
writer.updateDocument(t, d);
// Return gen as of when indexing finished:
return indexingGen.get();
}
// in lucene/core/src/java/org/apache/lucene/search/NRTManager.java
public long updateDocuments(Term t, Iterable<? extends Iterable<? extends IndexableField>> docs, Analyzer a) throws IOException {
writer.updateDocuments(t, docs, a);
// Return gen as of when indexing finished:
return indexingGen.get();
}
// in lucene/core/src/java/org/apache/lucene/search/NRTManager.java
public long updateDocuments(Term t, Iterable<? extends Iterable<? extends IndexableField>> docs) throws IOException {
writer.updateDocuments(t, docs);
// Return gen as of when indexing finished:
return indexingGen.get();
}
// in lucene/core/src/java/org/apache/lucene/search/NRTManager.java
public long deleteDocuments(Term t) throws IOException {
writer.deleteDocuments(t);
// Return gen as of when indexing finished:
return indexingGen.get();
}
// in lucene/core/src/java/org/apache/lucene/search/NRTManager.java
public long deleteDocuments(Term... terms) throws IOException {
writer.deleteDocuments(terms);
// Return gen as of when indexing finished:
return indexingGen.get();
}
// in lucene/core/src/java/org/apache/lucene/search/NRTManager.java
public long deleteDocuments(Query q) throws IOException {
writer.deleteDocuments(q);
// Return gen as of when indexing finished:
return indexingGen.get();
}
// in lucene/core/src/java/org/apache/lucene/search/NRTManager.java
public long deleteDocuments(Query... queries) throws IOException {
writer.deleteDocuments(queries);
// Return gen as of when indexing finished:
return indexingGen.get();
}
// in lucene/core/src/java/org/apache/lucene/search/NRTManager.java
public long deleteAll() throws IOException {
writer.deleteAll();
// Return gen as of when indexing finished:
return indexingGen.get();
}
// in lucene/core/src/java/org/apache/lucene/search/NRTManager.java
public long addDocument(Iterable<? extends IndexableField> d, Analyzer a) throws IOException {
writer.addDocument(d, a);
// Return gen as of when indexing finished:
return indexingGen.get();
}
// in lucene/core/src/java/org/apache/lucene/search/NRTManager.java
public long addDocuments(Iterable<? extends Iterable<? extends IndexableField>> docs, Analyzer a) throws IOException {
writer.addDocuments(docs, a);
// Return gen as of when indexing finished:
return indexingGen.get();
}
// in lucene/core/src/java/org/apache/lucene/search/NRTManager.java
public long addDocument(Iterable<? extends IndexableField> d) throws IOException {
writer.addDocument(d);
// Return gen as of when indexing finished:
return indexingGen.get();
}
// in lucene/core/src/java/org/apache/lucene/search/NRTManager.java
public long addDocuments(Iterable<? extends Iterable<? extends IndexableField>> docs) throws IOException {
writer.addDocuments(docs);
// Return gen as of when indexing finished:
return indexingGen.get();
}
// in lucene/core/src/java/org/apache/lucene/search/NRTManager.java
public long addIndexes(Directory... dirs) throws CorruptIndexException, IOException {
writer.addIndexes(dirs);
// Return gen as of when indexing finished:
return indexingGen.get();
}
// in lucene/core/src/java/org/apache/lucene/search/NRTManager.java
public long addIndexes(IndexReader... readers) throws CorruptIndexException, IOException {
writer.addIndexes(readers);
// Return gen as of when indexing finished:
return indexingGen.get();
}
// in lucene/core/src/java/org/apache/lucene/search/NRTManager.java
Override
protected IndexSearcher refreshIfNeeded(IndexSearcher referenceToRefresh) throws IOException {
// Record gen as of when reopen started:
lastRefreshGen = writer.getAndIncrementGeneration();
final IndexReader r = referenceToRefresh.getIndexReader();
assert r instanceof DirectoryReader: "searcher's IndexReader should be a DirectoryReader, but got " + r;
final DirectoryReader dirReader = (DirectoryReader) r;
IndexSearcher newSearcher = null;
if (!dirReader.isCurrent()) {
final IndexReader newReader = DirectoryReader.openIfChanged(dirReader);
if (newReader != null) {
newSearcher = SearcherManager.getSearcher(searcherFactory, newReader);
}
}
return newSearcher;
}
// in lucene/core/src/java/org/apache/lucene/search/NRTManager.java
Override
protected synchronized void afterClose() throws IOException {
genLock.lock();
try {
// max it out to make sure nobody can wait on another gen
searchingGen = MAX_SEARCHER_GEN;
newGeneration.signalAll();
} finally {
genLock.unlock();
}
}
// in lucene/core/src/java/org/apache/lucene/search/NRTManager.java
public boolean isSearcherCurrent() throws IOException {
final IndexSearcher searcher = acquire();
try {
final IndexReader r = searcher.getIndexReader();
assert r instanceof DirectoryReader: "searcher's IndexReader should be a DirectoryReader, but got " + r;
return ((DirectoryReader) r).isCurrent();
} finally {
release(searcher);
}
}
// in lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java
private ScorerDocQueue initScorerDocQueue() throws IOException {
final ScorerDocQueue docQueue = new ScorerDocQueue(nrScorers);
for (final Scorer se : subScorers) {
if (se.nextDoc() != NO_MORE_DOCS) {
docQueue.insert(se);
}
}
return docQueue;
}
// in lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java
Override
public void score(Collector collector) throws IOException {
collector.setScorer(this);
while (nextDoc() != NO_MORE_DOCS) {
collector.collect(currentDoc);
}
}
// in lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java
Override
public boolean score(Collector collector, int max, int firstDocID) throws IOException {
// firstDocID is ignored since nextDoc() sets 'currentDoc'
collector.setScorer(this);
while (currentDoc < max) {
collector.collect(currentDoc);
if (nextDoc() == NO_MORE_DOCS) {
return false;
}
}
return true;
}
// in lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java
Override
public int nextDoc() throws IOException {
if (scorerDocQueue.size() < minimumNrMatchers || !advanceAfterCurrent()) {
currentDoc = NO_MORE_DOCS;
}
return currentDoc;
}
// in lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java
protected boolean advanceAfterCurrent() throws IOException {
do { // repeat until minimum nr of matchers
currentDoc = scorerDocQueue.topDoc();
currentScore = scorerDocQueue.topScore();
nrMatchers = 1;
do { // Until all subscorers are after currentDoc
if (!scorerDocQueue.topNextAndAdjustElsePop()) {
if (scorerDocQueue.size() == 0) {
break; // nothing more to advance, check for last match.
}
}
if (scorerDocQueue.topDoc() != currentDoc) {
break; // All remaining subscorers are after currentDoc.
}
currentScore += scorerDocQueue.topScore();
nrMatchers++;
} while (true);
if (nrMatchers >= minimumNrMatchers) {
return true;
} else if (scorerDocQueue.size() < minimumNrMatchers) {
return false;
}
} while (true);
}
// in lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java
Override
public float score() throws IOException { return (float)currentScore; }
// in lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java
Override
public int advance(int target) throws IOException {
if (scorerDocQueue.size() < minimumNrMatchers) {
return currentDoc = NO_MORE_DOCS;
}
if (target <= currentDoc) {
return currentDoc;
}
do {
if (scorerDocQueue.topDoc() >= target) {
return advanceAfterCurrent() ? currentDoc : (currentDoc = NO_MORE_DOCS);
} else if (!scorerDocQueue.topSkipToAndAdjustElsePop(target)) {
if (scorerDocQueue.size() < minimumNrMatchers) {
return currentDoc = NO_MORE_DOCS;
}
}
} while (true);
}
// in lucene/core/src/java/org/apache/lucene/search/MultiTermQuery.java
protected TermsEnum getTermsEnum(MultiTermQuery query, Terms terms, AttributeSource atts) throws IOException {
return query.getTermsEnum(terms, atts); // allow RewriteMethod subclasses to pull a TermsEnum from the MTQ
}
// in lucene/core/src/java/org/apache/lucene/search/MultiTermQuery.java
protected final TermsEnum getTermsEnum(Terms terms) throws IOException {
return getTermsEnum(terms, new AttributeSource());
}
// in lucene/core/src/java/org/apache/lucene/search/MultiTermQuery.java
Override
public final Query rewrite(IndexReader reader) throws IOException {
return rewriteMethod.rewrite(reader, this);
}
// in lucene/core/src/java/org/apache/lucene/search/DocIdSet.java
Override
public int advance(int target) throws IOException { return NO_MORE_DOCS; }
// in lucene/core/src/java/org/apache/lucene/search/DocIdSet.java
Override
public int nextDoc() throws IOException { return NO_MORE_DOCS; }
// in lucene/core/src/java/org/apache/lucene/search/DocIdSet.java
Override
public Bits bits() throws IOException {
return null;
}
// in lucene/core/src/java/org/apache/lucene/search/DocIdSet.java
public Bits bits() throws IOException {
return null;
}
// in lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java
Override
public float score() throws IOException {
int doc = docID();
if (doc >= lastScoredDoc) {
if (doc > lastScoredDoc) {
lastDocScore = scorer.score();
lastScoredDoc = doc;
}
coordinator.nrMatchers++;
}
return lastDocScore;
}
// in lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java
Override
public int nextDoc() throws IOException {
return scorer.nextDoc();
}
// in lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java
Override
public int advance(int target) throws IOException {
return scorer.advance(target);
}
// in lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java
private Scorer countingDisjunctionSumScorer(final List<Scorer> scorers,
int minNrShouldMatch) throws IOException {
// each scorer from the list counted as a single matcher
return new DisjunctionSumScorer(weight, scorers, minNrShouldMatch) {
private int lastScoredDoc = -1;
// Save the score of lastScoredDoc, so that we don't compute it more than
// once in score().
private float lastDocScore = Float.NaN;
@Override public float score() throws IOException {
int doc = docID();
if (doc >= lastScoredDoc) {
if (doc > lastScoredDoc) {
lastDocScore = super.score();
lastScoredDoc = doc;
}
coordinator.nrMatchers += super.nrMatchers;
}
return lastDocScore;
}
};
}
// in lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java
Override public float score() throws IOException {
int doc = docID();
if (doc >= lastScoredDoc) {
if (doc > lastScoredDoc) {
lastDocScore = super.score();
lastScoredDoc = doc;
}
coordinator.nrMatchers += super.nrMatchers;
}
return lastDocScore;
}
// in lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java
private Scorer countingConjunctionSumScorer(boolean disableCoord,
List<Scorer> requiredScorers) throws IOException {
// each scorer from the list counted as a single matcher
final int requiredNrMatchers = requiredScorers.size();
return new ConjunctionScorer(weight, disableCoord ? 1.0f : ((BooleanWeight)weight).coord(requiredScorers.size(), requiredScorers.size()), requiredScorers) {
private int lastScoredDoc = -1;
// Save the score of lastScoredDoc, so that we don't compute it more than
// once in score().
private float lastDocScore = Float.NaN;
@Override public float score() throws IOException {
int doc = docID();
if (doc >= lastScoredDoc) {
if (doc > lastScoredDoc) {
lastDocScore = super.score();
lastScoredDoc = doc;
}
coordinator.nrMatchers += requiredNrMatchers;
}
// All scorers match, so defaultSimilarity super.score() always has 1 as
// the coordination factor.
// Therefore the sum of the scores of the requiredScorers
// is used as score.
return lastDocScore;
}
};
}
// in lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java
Override public float score() throws IOException {
int doc = docID();
if (doc >= lastScoredDoc) {
if (doc > lastScoredDoc) {
lastDocScore = super.score();
lastScoredDoc = doc;
}
coordinator.nrMatchers += requiredNrMatchers;
}
// All scorers match, so defaultSimilarity super.score() always has 1 as
// the coordination factor.
// Therefore the sum of the scores of the requiredScorers
// is used as score.
return lastDocScore;
}
// in lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java
private Scorer dualConjunctionSumScorer(boolean disableCoord,
Scorer req1, Scorer req2) throws IOException { // non counting.
return new ConjunctionScorer(weight, disableCoord ? 1.0f : ((BooleanWeight)weight).coord(2, 2), req1, req2);
// All scorers match, so defaultSimilarity always has 1 as
// the coordination factor.
// Therefore the sum of the scores of two scorers
// is used as score.
}
// in lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java
private Scorer makeCountingSumScorer(boolean disableCoord) throws IOException { // each scorer counted as a single matcher
return (requiredScorers.size() == 0)
? makeCountingSumScorerNoReq(disableCoord)
: makeCountingSumScorerSomeReq(disableCoord);
}
// in lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java
private Scorer makeCountingSumScorerNoReq(boolean disableCoord) throws IOException { // No required scorers
// minNrShouldMatch optional scorers are required, but at least 1
int nrOptRequired = (minNrShouldMatch < 1) ? 1 : minNrShouldMatch;
Scorer requiredCountingSumScorer;
if (optionalScorers.size() > nrOptRequired)
requiredCountingSumScorer = countingDisjunctionSumScorer(optionalScorers, nrOptRequired);
else if (optionalScorers.size() == 1)
requiredCountingSumScorer = new SingleMatchScorer(optionalScorers.get(0));
else {
requiredCountingSumScorer = countingConjunctionSumScorer(disableCoord, optionalScorers);
}
return addProhibitedScorers(requiredCountingSumScorer);
}
// in lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java
private Scorer makeCountingSumScorerSomeReq(boolean disableCoord) throws IOException { // At least one required scorer.
if (optionalScorers.size() == minNrShouldMatch) { // all optional scorers also required.
ArrayList<Scorer> allReq = new ArrayList<Scorer>(requiredScorers);
allReq.addAll(optionalScorers);
return addProhibitedScorers(countingConjunctionSumScorer(disableCoord, allReq));
} else { // optionalScorers.size() > minNrShouldMatch, and at least one required scorer
Scorer requiredCountingSumScorer =
requiredScorers.size() == 1
? new SingleMatchScorer(requiredScorers.get(0))
: countingConjunctionSumScorer(disableCoord, requiredScorers);
if (minNrShouldMatch > 0) { // use a required disjunction scorer over the optional scorers
return addProhibitedScorers(
dualConjunctionSumScorer( // non counting
disableCoord,
requiredCountingSumScorer,
countingDisjunctionSumScorer(
optionalScorers,
minNrShouldMatch)));
} else { // minNrShouldMatch == 0
return new ReqOptSumScorer(
addProhibitedScorers(requiredCountingSumScorer),
optionalScorers.size() == 1
? new SingleMatchScorer(optionalScorers.get(0))
// require 1 in combined, optional scorer.
: countingDisjunctionSumScorer(optionalScorers, 1));
}
}
}
// in lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java
private Scorer addProhibitedScorers(Scorer requiredCountingSumScorer) throws IOException
{
return (prohibitedScorers.size() == 0)
? requiredCountingSumScorer // no prohibited
: new ReqExclScorer(requiredCountingSumScorer,
((prohibitedScorers.size() == 1)
? prohibitedScorers.get(0)
: new DisjunctionSumScorer(weight, prohibitedScorers)));
}
// in lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java
Override
public void score(Collector collector) throws IOException {
collector.setScorer(this);
while ((doc = countingSumScorer.nextDoc()) != NO_MORE_DOCS) {
collector.collect(doc);
}
}
// in lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java
Override
public boolean score(Collector collector, int max, int firstDocID) throws IOException {
doc = firstDocID;
collector.setScorer(this);
while (doc < max) {
collector.collect(doc);
doc = countingSumScorer.nextDoc();
}
return doc != NO_MORE_DOCS;
}
// in lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java
Override
public int nextDoc() throws IOException {
return doc = countingSumScorer.nextDoc();
}
// in lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java
Override
public float score() throws IOException {
coordinator.nrMatchers = 0;
float sum = countingSumScorer.score();
return sum * coordinator.coordFactors[coordinator.nrMatchers];
}
// in lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java
Override
public int advance(int target) throws IOException {
return doc = countingSumScorer.advance(target);
}
// in lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java
final boolean next() throws IOException { // increments to next doc
doc = postings.nextDoc();
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
return false;
}
return true;
}
// in lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java
final boolean skipTo(int target) throws IOException {
doc = postings.advance(target);
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
return false;
}
return true;
}
// in lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java
final void firstPosition() throws IOException {
count = postings.freq(); // read first pos
nextPosition();
}
// in lucene/core/src/java/org/apache/lucene/search/PhrasePositions.java
final boolean nextPosition() throws IOException {
if (count-- > 0) { // read subsequent pos's
position = postings.nextPosition() - offset;
return true;
} else
return false;
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java
public static FieldCacheRangeFilter<String> newStringRange(String field, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) {
return new FieldCacheRangeFilter<String>(field, null, lowerVal, upperVal, includeLower, includeUpper) {
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
final FieldCache.DocTermsIndex fcsi = FieldCache.DEFAULT.getTermsIndex(context.reader(), field);
final BytesRef spare = new BytesRef();
final int lowerPoint = fcsi.binarySearchLookup(lowerVal == null ? null : new BytesRef(lowerVal), spare);
final int upperPoint = fcsi.binarySearchLookup(upperVal == null ? null : new BytesRef(upperVal), spare);
final int inclusiveLowerPoint, inclusiveUpperPoint;
// Hints:
// * binarySearchLookup returns 0, if value was null.
// * the value is <0 if no exact hit was found, the returned value
// is (-(insertion point) - 1)
if (lowerPoint == 0) {
assert lowerVal == null;
inclusiveLowerPoint = 1;
} else if (includeLower && lowerPoint > 0) {
inclusiveLowerPoint = lowerPoint;
} else if (lowerPoint > 0) {
inclusiveLowerPoint = lowerPoint + 1;
} else {
inclusiveLowerPoint = Math.max(1, -lowerPoint - 1);
}
if (upperPoint == 0) {
assert upperVal == null;
inclusiveUpperPoint = Integer.MAX_VALUE;
} else if (includeUpper && upperPoint > 0) {
inclusiveUpperPoint = upperPoint;
} else if (upperPoint > 0) {
inclusiveUpperPoint = upperPoint - 1;
} else {
inclusiveUpperPoint = -upperPoint - 2;
}
if (inclusiveUpperPoint <= 0 || inclusiveLowerPoint > inclusiveUpperPoint)
return DocIdSet.EMPTY_DOCIDSET;
assert inclusiveLowerPoint > 0 && inclusiveUpperPoint > 0;
return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
@Override
protected final boolean matchDoc(int doc) {
final int docOrd = fcsi.getOrd(doc);
return docOrd >= inclusiveLowerPoint && docOrd <= inclusiveUpperPoint;
}
};
}
};
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java
Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
final FieldCache.DocTermsIndex fcsi = FieldCache.DEFAULT.getTermsIndex(context.reader(), field);
final BytesRef spare = new BytesRef();
final int lowerPoint = fcsi.binarySearchLookup(lowerVal == null ? null : new BytesRef(lowerVal), spare);
final int upperPoint = fcsi.binarySearchLookup(upperVal == null ? null : new BytesRef(upperVal), spare);
final int inclusiveLowerPoint, inclusiveUpperPoint;
// Hints:
// * binarySearchLookup returns 0, if value was null.
// * the value is <0 if no exact hit was found, the returned value
// is (-(insertion point) - 1)
if (lowerPoint == 0) {
assert lowerVal == null;
inclusiveLowerPoint = 1;
} else if (includeLower && lowerPoint > 0) {
inclusiveLowerPoint = lowerPoint;
} else if (lowerPoint > 0) {
inclusiveLowerPoint = lowerPoint + 1;
} else {
inclusiveLowerPoint = Math.max(1, -lowerPoint - 1);
}
if (upperPoint == 0) {
assert upperVal == null;
inclusiveUpperPoint = Integer.MAX_VALUE;
} else if (includeUpper && upperPoint > 0) {
inclusiveUpperPoint = upperPoint;
} else if (upperPoint > 0) {
inclusiveUpperPoint = upperPoint - 1;
} else {
inclusiveUpperPoint = -upperPoint - 2;
}
if (inclusiveUpperPoint <= 0 || inclusiveLowerPoint > inclusiveUpperPoint)
return DocIdSet.EMPTY_DOCIDSET;
assert inclusiveLowerPoint > 0 && inclusiveUpperPoint > 0;
return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
@Override
protected final boolean matchDoc(int doc) {
final int docOrd = fcsi.getOrd(doc);
return docOrd >= inclusiveLowerPoint && docOrd <= inclusiveUpperPoint;
}
};
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java
public static FieldCacheRangeFilter<Byte> newByteRange(String field, FieldCache.ByteParser parser, Byte lowerVal, Byte upperVal, boolean includeLower, boolean includeUpper) {
return new FieldCacheRangeFilter<Byte>(field, parser, lowerVal, upperVal, includeLower, includeUpper) {
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
final byte inclusiveLowerPoint, inclusiveUpperPoint;
if (lowerVal != null) {
final byte i = lowerVal.byteValue();
if (!includeLower && i == Byte.MAX_VALUE)
return DocIdSet.EMPTY_DOCIDSET;
inclusiveLowerPoint = (byte) (includeLower ? i : (i + 1));
} else {
inclusiveLowerPoint = Byte.MIN_VALUE;
}
if (upperVal != null) {
final byte i = upperVal.byteValue();
if (!includeUpper && i == Byte.MIN_VALUE)
return DocIdSet.EMPTY_DOCIDSET;
inclusiveUpperPoint = (byte) (includeUpper ? i : (i - 1));
} else {
inclusiveUpperPoint = Byte.MAX_VALUE;
}
if (inclusiveLowerPoint > inclusiveUpperPoint)
return DocIdSet.EMPTY_DOCIDSET;
final byte[] values = FieldCache.DEFAULT.getBytes(context.reader(), field, (FieldCache.ByteParser) parser, false);
return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
@Override
protected boolean matchDoc(int doc) {
return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
}
};
}
};
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java
Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
final byte inclusiveLowerPoint, inclusiveUpperPoint;
if (lowerVal != null) {
final byte i = lowerVal.byteValue();
if (!includeLower && i == Byte.MAX_VALUE)
return DocIdSet.EMPTY_DOCIDSET;
inclusiveLowerPoint = (byte) (includeLower ? i : (i + 1));
} else {
inclusiveLowerPoint = Byte.MIN_VALUE;
}
if (upperVal != null) {
final byte i = upperVal.byteValue();
if (!includeUpper && i == Byte.MIN_VALUE)
return DocIdSet.EMPTY_DOCIDSET;
inclusiveUpperPoint = (byte) (includeUpper ? i : (i - 1));
} else {
inclusiveUpperPoint = Byte.MAX_VALUE;
}
if (inclusiveLowerPoint > inclusiveUpperPoint)
return DocIdSet.EMPTY_DOCIDSET;
final byte[] values = FieldCache.DEFAULT.getBytes(context.reader(), field, (FieldCache.ByteParser) parser, false);
return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
@Override
protected boolean matchDoc(int doc) {
return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
}
};
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java
public static FieldCacheRangeFilter<Short> newShortRange(String field, FieldCache.ShortParser parser, Short lowerVal, Short upperVal, boolean includeLower, boolean includeUpper) {
return new FieldCacheRangeFilter<Short>(field, parser, lowerVal, upperVal, includeLower, includeUpper) {
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
final short inclusiveLowerPoint, inclusiveUpperPoint;
if (lowerVal != null) {
short i = lowerVal.shortValue();
if (!includeLower && i == Short.MAX_VALUE)
return DocIdSet.EMPTY_DOCIDSET;
inclusiveLowerPoint = (short) (includeLower ? i : (i + 1));
} else {
inclusiveLowerPoint = Short.MIN_VALUE;
}
if (upperVal != null) {
short i = upperVal.shortValue();
if (!includeUpper && i == Short.MIN_VALUE)
return DocIdSet.EMPTY_DOCIDSET;
inclusiveUpperPoint = (short) (includeUpper ? i : (i - 1));
} else {
inclusiveUpperPoint = Short.MAX_VALUE;
}
if (inclusiveLowerPoint > inclusiveUpperPoint)
return DocIdSet.EMPTY_DOCIDSET;
final short[] values = FieldCache.DEFAULT.getShorts(context.reader(), field, (FieldCache.ShortParser) parser, false);
return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
@Override
protected boolean matchDoc(int doc) {
return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
}
};
}
};
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java
Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
final short inclusiveLowerPoint, inclusiveUpperPoint;
if (lowerVal != null) {
short i = lowerVal.shortValue();
if (!includeLower && i == Short.MAX_VALUE)
return DocIdSet.EMPTY_DOCIDSET;
inclusiveLowerPoint = (short) (includeLower ? i : (i + 1));
} else {
inclusiveLowerPoint = Short.MIN_VALUE;
}
if (upperVal != null) {
short i = upperVal.shortValue();
if (!includeUpper && i == Short.MIN_VALUE)
return DocIdSet.EMPTY_DOCIDSET;
inclusiveUpperPoint = (short) (includeUpper ? i : (i - 1));
} else {
inclusiveUpperPoint = Short.MAX_VALUE;
}
if (inclusiveLowerPoint > inclusiveUpperPoint)
return DocIdSet.EMPTY_DOCIDSET;
final short[] values = FieldCache.DEFAULT.getShorts(context.reader(), field, (FieldCache.ShortParser) parser, false);
return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
@Override
protected boolean matchDoc(int doc) {
return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
}
};
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java
public static FieldCacheRangeFilter<Integer> newIntRange(String field, FieldCache.IntParser parser, Integer lowerVal, Integer upperVal, boolean includeLower, boolean includeUpper) {
return new FieldCacheRangeFilter<Integer>(field, parser, lowerVal, upperVal, includeLower, includeUpper) {
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
final int inclusiveLowerPoint, inclusiveUpperPoint;
if (lowerVal != null) {
int i = lowerVal.intValue();
if (!includeLower && i == Integer.MAX_VALUE)
return DocIdSet.EMPTY_DOCIDSET;
inclusiveLowerPoint = includeLower ? i : (i + 1);
} else {
inclusiveLowerPoint = Integer.MIN_VALUE;
}
if (upperVal != null) {
int i = upperVal.intValue();
if (!includeUpper && i == Integer.MIN_VALUE)
return DocIdSet.EMPTY_DOCIDSET;
inclusiveUpperPoint = includeUpper ? i : (i - 1);
} else {
inclusiveUpperPoint = Integer.MAX_VALUE;
}
if (inclusiveLowerPoint > inclusiveUpperPoint)
return DocIdSet.EMPTY_DOCIDSET;
final int[] values = FieldCache.DEFAULT.getInts(context.reader(), field, (FieldCache.IntParser) parser, false);
return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
@Override
protected boolean matchDoc(int doc) {
return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
}
};
}
};
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java
Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
final int inclusiveLowerPoint, inclusiveUpperPoint;
if (lowerVal != null) {
int i = lowerVal.intValue();
if (!includeLower && i == Integer.MAX_VALUE)
return DocIdSet.EMPTY_DOCIDSET;
inclusiveLowerPoint = includeLower ? i : (i + 1);
} else {
inclusiveLowerPoint = Integer.MIN_VALUE;
}
if (upperVal != null) {
int i = upperVal.intValue();
if (!includeUpper && i == Integer.MIN_VALUE)
return DocIdSet.EMPTY_DOCIDSET;
inclusiveUpperPoint = includeUpper ? i : (i - 1);
} else {
inclusiveUpperPoint = Integer.MAX_VALUE;
}
if (inclusiveLowerPoint > inclusiveUpperPoint)
return DocIdSet.EMPTY_DOCIDSET;
final int[] values = FieldCache.DEFAULT.getInts(context.reader(), field, (FieldCache.IntParser) parser, false);
return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
@Override
protected boolean matchDoc(int doc) {
return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
}
};
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java
public static FieldCacheRangeFilter<Long> newLongRange(String field, FieldCache.LongParser parser, Long lowerVal, Long upperVal, boolean includeLower, boolean includeUpper) {
return new FieldCacheRangeFilter<Long>(field, parser, lowerVal, upperVal, includeLower, includeUpper) {
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
final long inclusiveLowerPoint, inclusiveUpperPoint;
if (lowerVal != null) {
long i = lowerVal.longValue();
if (!includeLower && i == Long.MAX_VALUE)
return DocIdSet.EMPTY_DOCIDSET;
inclusiveLowerPoint = includeLower ? i : (i + 1L);
} else {
inclusiveLowerPoint = Long.MIN_VALUE;
}
if (upperVal != null) {
long i = upperVal.longValue();
if (!includeUpper && i == Long.MIN_VALUE)
return DocIdSet.EMPTY_DOCIDSET;
inclusiveUpperPoint = includeUpper ? i : (i - 1L);
} else {
inclusiveUpperPoint = Long.MAX_VALUE;
}
if (inclusiveLowerPoint > inclusiveUpperPoint)
return DocIdSet.EMPTY_DOCIDSET;
final long[] values = FieldCache.DEFAULT.getLongs(context.reader(), field, (FieldCache.LongParser) parser, false);
return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
@Override
protected boolean matchDoc(int doc) {
return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
}
};
}
};
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java
Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
final long inclusiveLowerPoint, inclusiveUpperPoint;
if (lowerVal != null) {
long i = lowerVal.longValue();
if (!includeLower && i == Long.MAX_VALUE)
return DocIdSet.EMPTY_DOCIDSET;
inclusiveLowerPoint = includeLower ? i : (i + 1L);
} else {
inclusiveLowerPoint = Long.MIN_VALUE;
}
if (upperVal != null) {
long i = upperVal.longValue();
if (!includeUpper && i == Long.MIN_VALUE)
return DocIdSet.EMPTY_DOCIDSET;
inclusiveUpperPoint = includeUpper ? i : (i - 1L);
} else {
inclusiveUpperPoint = Long.MAX_VALUE;
}
if (inclusiveLowerPoint > inclusiveUpperPoint)
return DocIdSet.EMPTY_DOCIDSET;
final long[] values = FieldCache.DEFAULT.getLongs(context.reader(), field, (FieldCache.LongParser) parser, false);
return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
@Override
protected boolean matchDoc(int doc) {
return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
}
};
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java
public static FieldCacheRangeFilter<Float> newFloatRange(String field, FieldCache.FloatParser parser, Float lowerVal, Float upperVal, boolean includeLower, boolean includeUpper) {
return new FieldCacheRangeFilter<Float>(field, parser, lowerVal, upperVal, includeLower, includeUpper) {
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
// we transform the floating point numbers to sortable integers
// using NumericUtils to easier find the next bigger/lower value
final float inclusiveLowerPoint, inclusiveUpperPoint;
if (lowerVal != null) {
float f = lowerVal.floatValue();
if (!includeUpper && f > 0.0f && Float.isInfinite(f))
return DocIdSet.EMPTY_DOCIDSET;
int i = NumericUtils.floatToSortableInt(f);
inclusiveLowerPoint = NumericUtils.sortableIntToFloat( includeLower ? i : (i + 1) );
} else {
inclusiveLowerPoint = Float.NEGATIVE_INFINITY;
}
if (upperVal != null) {
float f = upperVal.floatValue();
if (!includeUpper && f < 0.0f && Float.isInfinite(f))
return DocIdSet.EMPTY_DOCIDSET;
int i = NumericUtils.floatToSortableInt(f);
inclusiveUpperPoint = NumericUtils.sortableIntToFloat( includeUpper ? i : (i - 1) );
} else {
inclusiveUpperPoint = Float.POSITIVE_INFINITY;
}
if (inclusiveLowerPoint > inclusiveUpperPoint)
return DocIdSet.EMPTY_DOCIDSET;
final float[] values = FieldCache.DEFAULT.getFloats(context.reader(), field, (FieldCache.FloatParser) parser, false);
return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
@Override
protected boolean matchDoc(int doc) {
return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
}
};
}
};
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java
Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
// we transform the floating point numbers to sortable integers
// using NumericUtils to easier find the next bigger/lower value
final float inclusiveLowerPoint, inclusiveUpperPoint;
if (lowerVal != null) {
float f = lowerVal.floatValue();
if (!includeUpper && f > 0.0f && Float.isInfinite(f))
return DocIdSet.EMPTY_DOCIDSET;
int i = NumericUtils.floatToSortableInt(f);
inclusiveLowerPoint = NumericUtils.sortableIntToFloat( includeLower ? i : (i + 1) );
} else {
inclusiveLowerPoint = Float.NEGATIVE_INFINITY;
}
if (upperVal != null) {
float f = upperVal.floatValue();
if (!includeUpper && f < 0.0f && Float.isInfinite(f))
return DocIdSet.EMPTY_DOCIDSET;
int i = NumericUtils.floatToSortableInt(f);
inclusiveUpperPoint = NumericUtils.sortableIntToFloat( includeUpper ? i : (i - 1) );
} else {
inclusiveUpperPoint = Float.POSITIVE_INFINITY;
}
if (inclusiveLowerPoint > inclusiveUpperPoint)
return DocIdSet.EMPTY_DOCIDSET;
final float[] values = FieldCache.DEFAULT.getFloats(context.reader(), field, (FieldCache.FloatParser) parser, false);
return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
@Override
protected boolean matchDoc(int doc) {
return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
}
};
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java
public static FieldCacheRangeFilter<Double> newDoubleRange(String field, FieldCache.DoubleParser parser, Double lowerVal, Double upperVal, boolean includeLower, boolean includeUpper) {
return new FieldCacheRangeFilter<Double>(field, parser, lowerVal, upperVal, includeLower, includeUpper) {
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
// we transform the floating point numbers to sortable integers
// using NumericUtils to easier find the next bigger/lower value
final double inclusiveLowerPoint, inclusiveUpperPoint;
if (lowerVal != null) {
double f = lowerVal.doubleValue();
if (!includeUpper && f > 0.0 && Double.isInfinite(f))
return DocIdSet.EMPTY_DOCIDSET;
long i = NumericUtils.doubleToSortableLong(f);
inclusiveLowerPoint = NumericUtils.sortableLongToDouble( includeLower ? i : (i + 1L) );
} else {
inclusiveLowerPoint = Double.NEGATIVE_INFINITY;
}
if (upperVal != null) {
double f = upperVal.doubleValue();
if (!includeUpper && f < 0.0 && Double.isInfinite(f))
return DocIdSet.EMPTY_DOCIDSET;
long i = NumericUtils.doubleToSortableLong(f);
inclusiveUpperPoint = NumericUtils.sortableLongToDouble( includeUpper ? i : (i - 1L) );
} else {
inclusiveUpperPoint = Double.POSITIVE_INFINITY;
}
if (inclusiveLowerPoint > inclusiveUpperPoint)
return DocIdSet.EMPTY_DOCIDSET;
final double[] values = FieldCache.DEFAULT.getDoubles(context.reader(), field, (FieldCache.DoubleParser) parser, false);
// ignore deleted docs if range doesn't contain 0
return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
@Override
protected boolean matchDoc(int doc) {
return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
}
};
}
};
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheRangeFilter.java
Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
// we transform the floating point numbers to sortable integers
// using NumericUtils to easier find the next bigger/lower value
final double inclusiveLowerPoint, inclusiveUpperPoint;
if (lowerVal != null) {
double f = lowerVal.doubleValue();
if (!includeUpper && f > 0.0 && Double.isInfinite(f))
return DocIdSet.EMPTY_DOCIDSET;
long i = NumericUtils.doubleToSortableLong(f);
inclusiveLowerPoint = NumericUtils.sortableLongToDouble( includeLower ? i : (i + 1L) );
} else {
inclusiveLowerPoint = Double.NEGATIVE_INFINITY;
}
if (upperVal != null) {
double f = upperVal.doubleValue();
if (!includeUpper && f < 0.0 && Double.isInfinite(f))
return DocIdSet.EMPTY_DOCIDSET;
long i = NumericUtils.doubleToSortableLong(f);
inclusiveUpperPoint = NumericUtils.sortableLongToDouble( includeUpper ? i : (i - 1L) );
} else {
inclusiveUpperPoint = Double.POSITIVE_INFINITY;
}
if (inclusiveLowerPoint > inclusiveUpperPoint)
return DocIdSet.EMPTY_DOCIDSET;
final double[] values = FieldCache.DEFAULT.getDoubles(context.reader(), field, (FieldCache.DoubleParser) parser, false);
// ignore deleted docs if range doesn't contain 0
return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
@Override
protected boolean matchDoc(int doc) {
return values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint;
}
};
}
// in lucene/core/src/java/org/apache/lucene/search/FilteredDocIdSet.java
Override
public Bits bits() throws IOException {
final Bits bits = _innerSet.bits();
return (bits == null) ? null : new Bits() {
public boolean get(int docid) {
return bits.get(docid) && FilteredDocIdSet.this.match(docid);
}
public int length() {
return bits.length();
}
};
}
// in lucene/core/src/java/org/apache/lucene/search/FilteredDocIdSet.java
Override
public DocIdSetIterator iterator() throws IOException {
final DocIdSetIterator iterator = _innerSet.iterator();
if (iterator == null) {
return null;
}
return new FilteredDocIdSetIterator(iterator) {
@Override
protected boolean match(int docid) {
return FilteredDocIdSet.this.match(docid);
}
};
}
// in lucene/core/src/java/org/apache/lucene/search/PositiveScoresOnlyCollector.java
Override
public void collect(int doc) throws IOException {
if (scorer.score() > 0) {
c.collect(doc);
}
}
// in lucene/core/src/java/org/apache/lucene/search/PositiveScoresOnlyCollector.java
Override
public void setNextReader(AtomicReaderContext context) throws IOException {
c.setNextReader(context);
}
// in lucene/core/src/java/org/apache/lucene/search/PositiveScoresOnlyCollector.java
Override
public void setScorer(Scorer scorer) throws IOException {
// Set a ScoreCachingWrappingScorer in case the wrapped Collector will call
// score() also.
this.scorer = new ScoreCachingWrappingScorer(scorer);
c.setScorer(this.scorer);
}
// in lucene/core/src/java/org/apache/lucene/search/SearcherLifetimeManager.java
Override
public synchronized void close() throws IOException {
searcher.getIndexReader().decRef();
}
// in lucene/core/src/java/org/apache/lucene/search/SearcherLifetimeManager.java
public long record(IndexSearcher searcher) throws IOException {
ensureOpen();
// TODO: we don't have to use IR.getVersion to track;
// could be risky (if it's buggy); we could get better
// bug isolation if we assign our own private ID:
final long version = ((DirectoryReader) searcher.getIndexReader()).getVersion();
SearcherTracker tracker = searchers.get(version);
if (tracker == null) {
//System.out.println("RECORD version=" + version + " ms=" + System.currentTimeMillis());
tracker = new SearcherTracker(searcher);
if (searchers.putIfAbsent(version, tracker) != null) {
// Another thread beat us -- must decRef to undo
// incRef done by SearcherTracker ctor:
tracker.close();
}
} else if (tracker.searcher != searcher) {
throw new IllegalArgumentException("the provided searcher has the same underlying reader version yet the searcher instance differs from before (new=" + searcher + " vs old=" + tracker.searcher);
}
return version;
}
// in lucene/core/src/java/org/apache/lucene/search/SearcherLifetimeManager.java
public void release(IndexSearcher s) throws IOException {
s.getIndexReader().decRef();
}
// in lucene/core/src/java/org/apache/lucene/search/SearcherLifetimeManager.java
public synchronized void prune(Pruner pruner) throws IOException {
// Cannot just pass searchers.values() to ArrayList ctor
// (not thread-safe since the values can change while
// ArrayList is init'ing itself); must instead iterate
// ourselves:
final List<SearcherTracker> trackers = new ArrayList<SearcherTracker>();
for(SearcherTracker tracker : searchers.values()) {
trackers.add(tracker);
}
Collections.sort(trackers);
double lastRecordTimeSec = 0.0;
final double now = System.nanoTime()/NANOS_PER_SEC;
for (SearcherTracker tracker: trackers) {
final double ageSec;
if (lastRecordTimeSec == 0.0) {
ageSec = 0.0;
} else {
ageSec = now - lastRecordTimeSec;
}
// First tracker is always age 0.0 sec, since it's
// still "live"; second tracker's age (= seconds since
// it was "live") is now minus first tracker's
// recordTime, etc:
if (pruner.doPrune(ageSec, tracker.searcher)) {
//System.out.println("PRUNE version=" + tracker.version + " age=" + ageSec + " ms=" + System.currentTimeMillis());
searchers.remove(tracker.version);
tracker.close();
}
lastRecordTimeSec = tracker.recordTimeSec;
}
}
// in lucene/core/src/java/org/apache/lucene/search/SearcherLifetimeManager.java
Override
public synchronized void close() throws IOException {
closed = true;
final List<SearcherTracker> toClose = new ArrayList<SearcherTracker>(searchers.values());
// Remove up front in case exc below, so we don't
// over-decRef on double-close:
for(SearcherTracker tracker : toClose) {
searchers.remove(tracker.version);
}
IOUtils.close(toClose);
// Make some effort to catch mis-use:
if (searchers.size() != 0) {
throw new IllegalStateException("another thread called record while this SearcherLifetimeManager instance was being closed; not all searchers were closed");
}
}
// in lucene/core/src/java/org/apache/lucene/search/TimeLimitingCollector.java
Override
public void collect(final int doc) throws IOException {
final long time = clock.get();
if (timeout < time) {
if (greedy) {
//System.out.println(this+" greedy: before failing, collecting doc: "+(docBase + doc)+" "+(time-t0));
collector.collect(doc);
}
//System.out.println(this+" failing on: "+(docBase + doc)+" "+(time-t0));
throw new TimeExceededException( timeout-t0, time-t0, docBase + doc );
}
//System.out.println(this+" collecting: "+(docBase + doc)+" "+(time-t0));
collector.collect(doc);
}
// in lucene/core/src/java/org/apache/lucene/search/TimeLimitingCollector.java
Override
public void setNextReader(AtomicReaderContext context) throws IOException {
collector.setNextReader(context);
this.docBase = context.docBase;
if (Long.MIN_VALUE == t0) {
setBaseline();
}
}
// in lucene/core/src/java/org/apache/lucene/search/TimeLimitingCollector.java
Override
public void setScorer(Scorer scorer) throws IOException {
collector.setScorer(scorer);
}
// in lucene/core/src/java/org/apache/lucene/search/TermRangeQuery.java
Override
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
if (lowerTerm != null && upperTerm != null && lowerTerm.compareTo(upperTerm) > 0) {
return TermsEnum.EMPTY;
}
TermsEnum tenum = terms.iterator(null);
if ((lowerTerm == null || (includeLower && lowerTerm.length == 0)) && upperTerm == null) {
return tenum;
}
return new TermRangeTermsEnum(tenum,
lowerTerm, upperTerm, includeLower, includeUpper);
}
// in lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java
Override
public int nextDoc() throws IOException {
if (numScorers == 0) return doc = NO_MORE_DOCS;
while (subScorers[0].docID() == doc) {
if (subScorers[0].nextDoc() != NO_MORE_DOCS) {
heapAdjust(0);
} else {
heapRemoveRoot();
if (numScorers == 0) {
return doc = NO_MORE_DOCS;
}
}
}
return doc = subScorers[0].docID();
}
// in lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java
Override
public float score() throws IOException {
int doc = subScorers[0].docID();
scoreSum = scoreMax = subScorers[0].score();
int size = numScorers;
scoreAll(1, size, doc);
scoreAll(2, size, doc);
return scoreMax + (scoreSum - scoreMax) * tieBreakerMultiplier;
}
// in lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java
private void scoreAll(int root, int size, int doc) throws IOException {
if (root < size && subScorers[root].docID() == doc) {
float sub = subScorers[root].score();
scoreSum += sub;
scoreMax = Math.max(scoreMax, sub);
scoreAll((root<<1)+1, size, doc);
scoreAll((root<<1)+2, size, doc);
}
}
// in lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java
Override
public int advance(int target) throws IOException {
if (numScorers == 0) return doc = NO_MORE_DOCS;
while (subScorers[0].docID() < target) {
if (subScorers[0].advance(target) != NO_MORE_DOCS) {
heapAdjust(0);
} else {
heapRemoveRoot();
if (numScorers == 0) {
return doc = NO_MORE_DOCS;
}
}
}
return doc = subScorers[0].docID();
}
// in lucene/core/src/java/org/apache/lucene/search/CachingCollector.java
Override
public void collect(int doc) throws IOException {
if (curDocs == null) {
// Cache was too large
cachedScorer.score = scorer.score();
cachedScorer.doc = doc;
other.collect(doc);
return;
}
// Allocate a bigger array or abort caching
if (upto == curDocs.length) {
base += upto;
// Compute next array length - don't allocate too big arrays
int nextLength = 8*curDocs.length;
if (nextLength > MAX_ARRAY_SIZE) {
nextLength = MAX_ARRAY_SIZE;
}
if (base + nextLength > maxDocsToCache) {
// try to allocate a smaller array
nextLength = maxDocsToCache - base;
if (nextLength <= 0) {
// Too many docs to collect -- clear cache
curDocs = null;
curScores = null;
cachedSegs.clear();
cachedDocs.clear();
cachedScores.clear();
cachedScorer.score = scorer.score();
cachedScorer.doc = doc;
other.collect(doc);
return;
}
}
curDocs = new int[nextLength];
cachedDocs.add(curDocs);
curScores = new float[nextLength];
cachedScores.add(curScores);
upto = 0;
}
curDocs[upto] = doc;
cachedScorer.score = curScores[upto] = scorer.score();
upto++;
cachedScorer.doc = doc;
other.collect(doc);
}
// in lucene/core/src/java/org/apache/lucene/search/CachingCollector.java
Override
public void replay(Collector other) throws IOException {
replayInit(other);
int curUpto = 0;
int curBase = 0;
int chunkUpto = 0;
curDocs = EMPTY_INT_ARRAY;
for (SegStart seg : cachedSegs) {
other.setNextReader(seg.readerContext);
other.setScorer(cachedScorer);
while (curBase + curUpto < seg.end) {
if (curUpto == curDocs.length) {
curBase += curDocs.length;
curDocs = cachedDocs.get(chunkUpto);
curScores = cachedScores.get(chunkUpto);
chunkUpto++;
curUpto = 0;
}
cachedScorer.score = curScores[curUpto];
cachedScorer.doc = curDocs[curUpto];
other.collect(curDocs[curUpto++]);
}
}
}
// in lucene/core/src/java/org/apache/lucene/search/CachingCollector.java
Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
other.setScorer(cachedScorer);
}
// in lucene/core/src/java/org/apache/lucene/search/CachingCollector.java
Override
public void collect(int doc) throws IOException {
if (curDocs == null) {
// Cache was too large
other.collect(doc);
return;
}
// Allocate a bigger array or abort caching
if (upto == curDocs.length) {
base += upto;
// Compute next array length - don't allocate too big arrays
int nextLength = 8*curDocs.length;
if (nextLength > MAX_ARRAY_SIZE) {
nextLength = MAX_ARRAY_SIZE;
}
if (base + nextLength > maxDocsToCache) {
// try to allocate a smaller array
nextLength = maxDocsToCache - base;
if (nextLength <= 0) {
// Too many docs to collect -- clear cache
curDocs = null;
cachedSegs.clear();
cachedDocs.clear();
other.collect(doc);
return;
}
}
curDocs = new int[nextLength];
cachedDocs.add(curDocs);
upto = 0;
}
curDocs[upto] = doc;
upto++;
other.collect(doc);
}
// in lucene/core/src/java/org/apache/lucene/search/CachingCollector.java
Override
public void replay(Collector other) throws IOException {
replayInit(other);
int curUpto = 0;
int curbase = 0;
int chunkUpto = 0;
curDocs = EMPTY_INT_ARRAY;
for (SegStart seg : cachedSegs) {
other.setNextReader(seg.readerContext);
while (curbase + curUpto < seg.end) {
if (curUpto == curDocs.length) {
curbase += curDocs.length;
curDocs = cachedDocs.get(chunkUpto);
chunkUpto++;
curUpto = 0;
}
other.collect(curDocs[curUpto++]);
}
}
}
// in lucene/core/src/java/org/apache/lucene/search/CachingCollector.java
Override
public void setScorer(Scorer scorer) throws IOException {
other.setScorer(scorer);
}
// in lucene/core/src/java/org/apache/lucene/search/CachingCollector.java
public static CachingCollector create(final boolean acceptDocsOutOfOrder, boolean cacheScores, double maxRAMMB) {
Collector other = new Collector() {
@Override
public boolean acceptsDocsOutOfOrder() {
return acceptDocsOutOfOrder;
}
@Override
public void setScorer(Scorer scorer) throws IOException {}
@Override
public void collect(int doc) throws IOException {}
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {}
};
return create(other, cacheScores, maxRAMMB);
}
// in lucene/core/src/java/org/apache/lucene/search/CachingCollector.java
Override
public void setScorer(Scorer scorer) throws IOException {}
// in lucene/core/src/java/org/apache/lucene/search/CachingCollector.java
Override
public void collect(int doc) throws IOException {}
// in lucene/core/src/java/org/apache/lucene/search/CachingCollector.java
Override
public void setNextReader(AtomicReaderContext context) throws IOException {}
// in lucene/core/src/java/org/apache/lucene/search/CachingCollector.java
Override
public void setNextReader(AtomicReaderContext context) throws IOException {
other.setNextReader(context);
if (lastReaderContext != null) {
cachedSegs.add(new SegStart(lastReaderContext, base+upto));
}
lastReaderContext = context;
}
// in lucene/core/src/java/org/apache/lucene/search/Sort.java
public Sort rewrite(IndexSearcher searcher) throws IOException {
boolean changed = false;
SortField[] rewrittenSortFields = new SortField[fields.length];
for (int i = 0; i < fields.length; i++) {
rewrittenSortFields[i] = fields[i].rewrite(searcher);
if (fields[i] != rewrittenSortFields[i]) {
changed = true;
}
}
return (changed) ? new Sort(rewrittenSortFields) : this;
}
// in lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
Override
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException {
assert !termArrays.isEmpty();
final AtomicReader reader = context.reader();
final Bits liveDocs = acceptDocs;
PhraseQuery.PostingsAndFreq[] postingsFreqs = new PhraseQuery.PostingsAndFreq[termArrays.size()];
final Terms fieldTerms = reader.terms(field);
if (fieldTerms == null) {
return null;
}
// Reuse single TermsEnum below:
final TermsEnum termsEnum = fieldTerms.iterator(null);
for (int pos=0; pos<postingsFreqs.length; pos++) {
Term[] terms = termArrays.get(pos);
final DocsAndPositionsEnum postingsEnum;
int docFreq;
if (terms.length > 1) {
postingsEnum = new UnionDocsAndPositionsEnum(liveDocs, context, terms, termContexts, termsEnum);
// coarse -- this overcounts since a given doc can
// have more than one term:
docFreq = 0;
for(int termIdx=0;termIdx<terms.length;termIdx++) {
final Term term = terms[termIdx];
TermState termState = termContexts.get(term).get(context.ord);
if (termState == null) {
// Term not in reader
continue;
}
termsEnum.seekExact(term.bytes(), termState);
docFreq += termsEnum.docFreq();
}
if (docFreq == 0) {
// None of the terms are in this reader
return null;
}
} else {
final Term term = terms[0];
TermState termState = termContexts.get(term).get(context.ord);
if (termState == null) {
// Term not in reader
return null;
}
termsEnum.seekExact(term.bytes(), termState);
postingsEnum = termsEnum.docsAndPositions(liveDocs, null, false);
if (postingsEnum == null) {
// term does exist, but has no positions
assert termsEnum.docs(liveDocs, null, false) != null: "termstate found but no term exists in reader";
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")");
}
docFreq = termsEnum.docFreq();
}
postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue(), terms);
}
// sort by increasing docFreq order
if (slop == 0) {
ArrayUtil.mergeSort(postingsFreqs);
}
if (slop == 0) {
ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.exactSimScorer(stats, context));
if (s.noDocs) {
return null;
} else {
return s;
}
} else {
return new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.sloppySimScorer(stats, context));
}
}
// in lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
Override
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
Scorer scorer = scorer(context, true, false, context.reader().getLiveDocs());
if (scorer != null) {
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
float freq = scorer.freq();
SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
ComplexExplanation result = new ComplexExplanation();
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
result.addDetail(scoreExplanation);
result.setValue(scoreExplanation.getValue());
result.setMatch(true);
return result;
}
}
return new ComplexExplanation(false, 0.0f, "no matching term");
}
// in lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
Override
public Weight createWeight(IndexSearcher searcher) throws IOException {
return new MultiPhraseWeight(searcher);
}
// in lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
Override
public final int nextDoc() throws IOException {
if (_queue.size() == 0) {
return NO_MORE_DOCS;
}
// TODO: move this init into positions(): if the search
// doesn't need the positions for this doc then don't
// waste CPU merging them:
_posList.clear();
_doc = _queue.top().docID();
// merge sort all positions together
DocsAndPositionsEnum postings;
do {
postings = _queue.top();
final int freq = postings.freq();
for (int i = 0; i < freq; i++) {
_posList.add(postings.nextPosition());
}
if (postings.nextDoc() != NO_MORE_DOCS) {
_queue.updateTop();
} else {
_queue.pop();
}
} while (_queue.size() > 0 && _queue.top().docID() == _doc);
_posList.sort();
_freq = _posList.size();
return _doc;
}
// in lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
Override
public final int advance(int target) throws IOException {
while (_queue.top() != null && target > _queue.top().docID()) {
DocsAndPositionsEnum postings = _queue.pop();
if (postings.advance(target) != NO_MORE_DOCS) {
_queue.add(postings);
}
}
return nextDoc();
}
// in lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java
Override
public final int freq() throws IOException {
return _freq;
}
// in lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java
Override
public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException {
final BooleanQuery bq = SCORING_BOOLEAN_QUERY_REWRITE.rewrite(reader, query);
// TODO: if empty boolean query return NullQuery?
if (bq.clauses().isEmpty())
return bq;
// strip the scores off
final Query result = new ConstantScoreQuery(bq);
result.setBoost(query.getBoost());
return result;
}
// in lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java
Override
public final Q rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException {
final Q result = getTopLevelQuery();
final ParallelArraysTermCollector col = new ParallelArraysTermCollector();
collectTerms(reader, query, col);
final int size = col.terms.size();
if (size > 0) {
final int sort[] = col.terms.sort(col.termsEnum.getComparator());
final float[] boost = col.array.boost;
final TermContext[] termStates = col.array.termState;
for (int i = 0; i < size; i++) {
final int pos = sort[i];
final Term term = new Term(query.getField(), col.terms.get(pos, new BytesRef()));
assert reader.docFreq(term) == termStates[pos].docFreq();
addClause(result, term, termStates[pos].docFreq(), query.getBoost() * boost[pos], termStates[pos]);
}
}
return result;
}
// in lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java
Override
public void setNextEnum(TermsEnum termsEnum) throws IOException {
this.termsEnum = termsEnum;
this.boostAtt = termsEnum.attributes().addAttribute(BoostAttribute.class);
}
// in lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java
Override
public boolean collect(BytesRef bytes) throws IOException {
final int e = terms.add(bytes);
final TermState state = termsEnum.termState();
assert state != null;
if (e < 0 ) {
// duplicate term: update docFreq
final int pos = (-e)-1;
array.termState[pos].register(state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
assert array.boost[pos] == boostAtt.getBoost() : "boost should be equal in all segment TermsEnums";
} else {
// new entry: we populate the entry initially
array.boost[e] = boostAtt.getBoost();
array.termState[e] = new TermContext(topReaderContext, state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
ScoringRewrite.this.checkMaxClauseCount(terms.size());
}
return true;
}
// in lucene/core/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java
Override
public Query rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException {
// Get the enum and start visiting terms. If we
// exhaust the enum before hitting either of the
// cutoffs, we use ConstantBooleanQueryRewrite; else,
// ConstantFilterRewrite:
final int docCountCutoff = (int) ((docCountPercent / 100.) * reader.maxDoc());
final int termCountLimit = Math.min(BooleanQuery.getMaxClauseCount(), termCountCutoff);
final CutOffTermCollector col = new CutOffTermCollector(docCountCutoff, termCountLimit);
collectTerms(reader, query, col);
final int size = col.pendingTerms.size();
if (col.hasCutOff) {
return MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE.rewrite(reader, query);
} else if (size == 0) {
return getTopLevelQuery();
} else {
final BooleanQuery bq = getTopLevelQuery();
final BytesRefHash pendingTerms = col.pendingTerms;
final int sort[] = pendingTerms.sort(col.termsEnum.getComparator());
for(int i = 0; i < size; i++) {
final int pos = sort[i];
// docFreq is not used for constant score here, we pass 1
// to explicitely set a fake value, so it's not calculated
addClause(bq, new Term(query.field, pendingTerms.get(pos, new BytesRef())), 1, 1.0f, col.array.termState[pos]);
}
// Strip scores
final Query result = new ConstantScoreQuery(bq);
result.setBoost(query.getBoost());
return result;
}
}
// in lucene/core/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java
Override
public void setNextEnum(TermsEnum termsEnum) throws IOException {
this.termsEnum = termsEnum;
}
// in lucene/core/src/java/org/apache/lucene/search/ConstantScoreAutoRewrite.java
Override
public boolean collect(BytesRef bytes) throws IOException {
int pos = pendingTerms.add(bytes);
docVisitCount += termsEnum.docFreq();
if (pendingTerms.size() >= termCountLimit || docVisitCount >= docCountCutoff) {
hasCutOff = true;
return false;
}
final TermState termState = termsEnum.termState();
assert termState != null;
if (pos < 0) {
pos = (-pos)-1;
array.termState[pos].register(termState, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
} else {
array.termState[pos] = new TermContext(topReaderContext, termState, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
}
return true;
}
// in lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java
Override
public float getValueForNormalization() throws IOException {
float max = 0.0f, sum = 0.0f;
for (Weight currentWeight : weights) {
float sub = currentWeight.getValueForNormalization();
sum += sub;
max = Math.max(max, sub);
}
float boost = getBoost();
return (((sum - max) * tieBreakerMultiplier * tieBreakerMultiplier) + max) * boost * boost;
}
// in lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java
Override
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException {
Scorer[] scorers = new Scorer[weights.size()];
int idx = 0;
for (Weight w : weights) {
// we will advance() subscorers
Scorer subScorer = w.scorer(context, true, false, acceptDocs);
if (subScorer != null && subScorer.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
scorers[idx++] = subScorer;
}
}
if (idx == 0) return null; // all scorers did not have documents
DisjunctionMaxScorer result = new DisjunctionMaxScorer(this, tieBreakerMultiplier, scorers, idx);
return result;
}
// in lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java
Override
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
if (disjuncts.size() == 1) return weights.get(0).explain(context,doc);
ComplexExplanation result = new ComplexExplanation();
float max = 0.0f, sum = 0.0f;
result.setDescription(tieBreakerMultiplier == 0.0f ? "max of:" : "max plus " + tieBreakerMultiplier + " times others of:");
for (Weight wt : weights) {
Explanation e = wt.explain(context, doc);
if (e.isMatch()) {
result.setMatch(Boolean.TRUE);
result.addDetail(e);
sum += e.getValue();
max = Math.max(max, e.getValue());
}
}
result.setValue(max + (sum - max) * tieBreakerMultiplier);
return result;
}
// in lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java
Override
public Weight createWeight(IndexSearcher searcher) throws IOException {
return new DisjunctionMaxWeight(searcher);
}
// in lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java
Override
public Query rewrite(IndexReader reader) throws IOException {
int numDisjunctions = disjuncts.size();
if (numDisjunctions == 1) {
Query singleton = disjuncts.get(0);
Query result = singleton.rewrite(reader);
if (getBoost() != 1.0f) {
if (result == singleton) result = result.clone();
result.setBoost(getBoost() * result.getBoost());
}
return result;
}
DisjunctionMaxQuery clone = null;
for (int i = 0 ; i < numDisjunctions; i++) {
Query clause = disjuncts.get(i);
Query rewrite = clause.rewrite(reader);
if (rewrite != clause) {
if (clone == null) clone = this.clone();
clone.disjuncts.set(i, rewrite);
}
}
if (clone != null) return clone;
else return this;
}
// in lucene/core/src/java/org/apache/lucene/search/FieldCacheTermsFilter.java
Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
final FieldCache.DocTermsIndex fcsi = getFieldCache().getTermsIndex(context.reader(), field);
final FixedBitSet bits = new FixedBitSet(fcsi.numOrd());
final BytesRef spare = new BytesRef();
for (int i=0;i<terms.length;i++) {
int termNumber = fcsi.binarySearchLookup(terms[i], spare);
if (termNumber > 0) {
bits.set(termNumber);
}
}
return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
@Override
protected final boolean matchDoc(int doc) {
return bits.get(fcsi.getOrd(doc));
}
};
}
// in lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
Override
public int nextDoc() throws IOException {
while(true) {
// first (rarest) term
final int doc = chunkStates[0].posEnum.nextDoc();
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
docID = doc;
return doc;
}
// not-first terms
int i = 1;
while(i < chunkStates.length) {
final ChunkState cs = chunkStates[i];
int doc2 = cs.posEnum.docID();
if (cs.useAdvance) {
if (doc2 < doc) {
doc2 = cs.posEnum.advance(doc);
}
} else {
int iter = 0;
while(doc2 < doc) {
// safety net -- fallback to .advance if we've
// done too many .nextDocs
if (++iter == 50) {
doc2 = cs.posEnum.advance(doc);
break;
} else {
doc2 = cs.posEnum.nextDoc();
}
}
}
if (doc2 > doc) {
break;
}
i++;
}
if (i == chunkStates.length) {
// this doc has all the terms -- now test whether
// phrase occurs
docID = doc;
freq = phraseFreq();
if (freq != 0) {
return docID;
}
}
}
}
// in lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
Override
public int advance(int target) throws IOException {
// first term
int doc = chunkStates[0].posEnum.advance(target);
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
docID = DocIdSetIterator.NO_MORE_DOCS;
return doc;
}
while(true) {
// not-first terms
int i = 1;
while(i < chunkStates.length) {
int doc2 = chunkStates[i].posEnum.docID();
if (doc2 < doc) {
doc2 = chunkStates[i].posEnum.advance(doc);
}
if (doc2 > doc) {
break;
}
i++;
}
if (i == chunkStates.length) {
// this doc has all the terms -- now test whether
// phrase occurs
docID = doc;
freq = phraseFreq();
if (freq != 0) {
return docID;
}
}
doc = chunkStates[0].posEnum.nextDoc();
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
docID = doc;
return doc;
}
}
}
// in lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
Override
public float score() throws IOException {
return docScorer.score(docID, freq);
}
// in lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
private int phraseFreq() throws IOException {
freq = 0;
// init chunks
for(int i=0;i<chunkStates.length;i++) {
final ChunkState cs = chunkStates[i];
cs.posLimit = cs.posEnum.freq();
cs.pos = cs.offset + cs.posEnum.nextPosition();
cs.posUpto = 1;
cs.lastPos = -1;
}
int chunkStart = 0;
int chunkEnd = CHUNK;
// process chunk by chunk
boolean end = false;
// TODO: we could fold in chunkStart into offset and
// save one subtract per pos incr
while(!end) {
gen++;
if (gen == 0) {
// wraparound
Arrays.fill(gens, 0);
gen++;
}
// first term
{
final ChunkState cs = chunkStates[0];
while(cs.pos < chunkEnd) {
if (cs.pos > cs.lastPos) {
cs.lastPos = cs.pos;
final int posIndex = cs.pos - chunkStart;
counts[posIndex] = 1;
assert gens[posIndex] != gen;
gens[posIndex] = gen;
}
if (cs.posUpto == cs.posLimit) {
end = true;
break;
}
cs.posUpto++;
cs.pos = cs.offset + cs.posEnum.nextPosition();
}
}
// middle terms
boolean any = true;
for(int t=1;t<endMinus1;t++) {
final ChunkState cs = chunkStates[t];
any = false;
while(cs.pos < chunkEnd) {
if (cs.pos > cs.lastPos) {
cs.lastPos = cs.pos;
final int posIndex = cs.pos - chunkStart;
if (posIndex >= 0 && gens[posIndex] == gen && counts[posIndex] == t) {
// viable
counts[posIndex]++;
any = true;
}
}
if (cs.posUpto == cs.posLimit) {
end = true;
break;
}
cs.posUpto++;
cs.pos = cs.offset + cs.posEnum.nextPosition();
}
if (!any) {
break;
}
}
if (!any) {
// petered out for this chunk
chunkStart += CHUNK;
chunkEnd += CHUNK;
continue;
}
// last term
{
final ChunkState cs = chunkStates[endMinus1];
while(cs.pos < chunkEnd) {
if (cs.pos > cs.lastPos) {
cs.lastPos = cs.pos;
final int posIndex = cs.pos - chunkStart;
if (posIndex >= 0 && gens[posIndex] == gen && counts[posIndex] == endMinus1) {
freq++;
}
}
if (cs.posUpto == cs.posLimit) {
end = true;
break;
}
cs.posUpto++;
cs.pos = cs.offset + cs.posEnum.nextPosition();
}
}
chunkStart += CHUNK;
chunkEnd += CHUNK;
}
return freq;
}
// in lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
Override
public void collect(int doc) throws IOException {
++totalHits;
if (queueFull) {
if ((reverseMul * comparator.compareBottom(doc)) <= 0) {
// since docs are visited in doc Id order, if compare is 0, it means
// this document is larger than anything else in the queue, and
// therefore not competitive.
return;
}
// This hit is competitive - replace bottom element in queue & adjustTop
comparator.copy(bottom.slot, doc);
updateBottom(doc);
comparator.setBottom(bottom.slot);
} else {
// Startup transient: queue hasn't gathered numHits yet
final int slot = totalHits - 1;
// Copy hit into queue
comparator.copy(slot, doc);
add(slot, doc, Float.NaN);
if (queueFull) {
comparator.setBottom(bottom.slot);
}
}
}
// in lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
Override
public void setNextReader(AtomicReaderContext context) throws IOException {
this.docBase = context.docBase;
queue.setComparator(0, comparator.setNextReader(context));
comparator = queue.firstComparator;
}
// in lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
Override
public void setScorer(Scorer scorer) throws IOException {
comparator.setScorer(scorer);
}
// in lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
Override
public void collect(int doc) throws IOException {
++totalHits;
if (queueFull) {
// Fastmatch: return if this hit is not competitive
final int cmp = reverseMul * comparator.compareBottom(doc);
if (cmp < 0 || (cmp == 0 && doc + docBase > bottom.doc)) {
return;
}
// This hit is competitive - replace bottom element in queue & adjustTop
comparator.copy(bottom.slot, doc);
updateBottom(doc);
comparator.setBottom(bottom.slot);
} else {
// Startup transient: queue hasn't gathered numHits yet
final int slot = totalHits - 1;
// Copy hit into queue
comparator.copy(slot, doc);
add(slot, doc, Float.NaN);
if (queueFull) {
comparator.setBottom(bottom.slot);
}
}
}
// in lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
Override
public void collect(int doc) throws IOException {
++totalHits;
if (queueFull) {
if ((reverseMul * comparator.compareBottom(doc)) <= 0) {
// since docs are visited in doc Id order, if compare is 0, it means
// this document is largest than anything else in the queue, and
// therefore not competitive.
return;
}
// Compute the score only if the hit is competitive.
final float score = scorer.score();
// This hit is competitive - replace bottom element in queue & adjustTop
comparator.copy(bottom.slot, doc);
updateBottom(doc, score);
comparator.setBottom(bottom.slot);
} else {
// Compute the score only if the hit is competitive.
final float score = scorer.score();
// Startup transient: queue hasn't gathered numHits yet
final int slot = totalHits - 1;
// Copy hit into queue
comparator.copy(slot, doc);
add(slot, doc, score);
if (queueFull) {
comparator.setBottom(bottom.slot);
}
}
}
// in lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
comparator.setScorer(scorer);
}
// in lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
Override
public void collect(int doc) throws IOException {
++totalHits;
if (queueFull) {
// Fastmatch: return if this hit is not competitive
final int cmp = reverseMul * comparator.compareBottom(doc);
if (cmp < 0 || (cmp == 0 && doc + docBase > bottom.doc)) {
return;
}
// Compute the score only if the hit is competitive.
final float score = scorer.score();
// This hit is competitive - replace bottom element in queue & adjustTop
comparator.copy(bottom.slot, doc);
updateBottom(doc, score);
comparator.setBottom(bottom.slot);
} else {
// Compute the score only if the hit is competitive.
final float score = scorer.score();
// Startup transient: queue hasn't gathered numHits yet
final int slot = totalHits - 1;
// Copy hit into queue
comparator.copy(slot, doc);
add(slot, doc, score);
if (queueFull) {
comparator.setBottom(bottom.slot);
}
}
}
// in lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
Override
public void collect(int doc) throws IOException {
final float score = scorer.score();
if (score > maxScore) {
maxScore = score;
}
++totalHits;
if (queueFull) {
if ((reverseMul * comparator.compareBottom(doc)) <= 0) {
// since docs are visited in doc Id order, if compare is 0, it means
// this document is largest than anything else in the queue, and
// therefore not competitive.
return;
}
// This hit is competitive - replace bottom element in queue & adjustTop
comparator.copy(bottom.slot, doc);
updateBottom(doc, score);
comparator.setBottom(bottom.slot);
} else {
// Startup transient: queue hasn't gathered numHits yet
final int slot = totalHits - 1;
// Copy hit into queue
comparator.copy(slot, doc);
add(slot, doc, score);
if (queueFull) {
comparator.setBottom(bottom.slot);
}
}
}
// in lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
super.setScorer(scorer);
}
// in lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
Override
public void collect(int doc) throws IOException {
final float score = scorer.score();
if (score > maxScore) {
maxScore = score;
}
++totalHits;
if (queueFull) {
// Fastmatch: return if this hit is not competitive
final int cmp = reverseMul * comparator.compareBottom(doc);
if (cmp < 0 || (cmp == 0 && doc + docBase > bottom.doc)) {
return;
}
// This hit is competitive - replace bottom element in queue & adjustTop
comparator.copy(bottom.slot, doc);
updateBottom(doc, score);
comparator.setBottom(bottom.slot);
} else {
// Startup transient: queue hasn't gathered numHits yet
final int slot = totalHits - 1;
// Copy hit into queue
comparator.copy(slot, doc);
add(slot, doc, score);
if (queueFull) {
comparator.setBottom(bottom.slot);
}
}
}
// in lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
Override
public void collect(int doc) throws IOException {
++totalHits;
if (queueFull) {
// Fastmatch: return if this hit is not competitive
for (int i = 0;; i++) {
final int c = reverseMul[i] * comparators[i].compareBottom(doc);
if (c < 0) {
// Definitely not competitive.
return;
} else if (c > 0) {
// Definitely competitive.
break;
} else if (i == comparators.length - 1) {
// Here c=0. If we're at the last comparator, this doc is not
// competitive, since docs are visited in doc Id order, which means
// this doc cannot compete with any other document in the queue.
return;
}
}
// This hit is competitive - replace bottom element in queue & adjustTop
for (int i = 0; i < comparators.length; i++) {
comparators[i].copy(bottom.slot, doc);
}
updateBottom(doc);
for (int i = 0; i < comparators.length; i++) {
comparators[i].setBottom(bottom.slot);
}
} else {
// Startup transient: queue hasn't gathered numHits yet
final int slot = totalHits - 1;
// Copy hit into queue
for (int i = 0; i < comparators.length; i++) {
comparators[i].copy(slot, doc);
}
add(slot, doc, Float.NaN);
if (queueFull) {
for (int i = 0; i < comparators.length; i++) {
comparators[i].setBottom(bottom.slot);
}
}
}
}
// in lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
Override
public void setNextReader(AtomicReaderContext context) throws IOException {
docBase = context.docBase;
for (int i = 0; i < comparators.length; i++) {
queue.setComparator(i, comparators[i].setNextReader(context));
}
}
// in lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
Override
public void setScorer(Scorer scorer) throws IOException {
// set the scorer on all comparators
for (int i = 0; i < comparators.length; i++) {
comparators[i].setScorer(scorer);
}
}
// in lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
Override
public void collect(int doc) throws IOException {
++totalHits;
if (queueFull) {
// Fastmatch: return if this hit is not competitive
for (int i = 0;; i++) {
final int c = reverseMul[i] * comparators[i].compareBottom(doc);
if (c < 0) {
// Definitely not competitive.
return;
} else if (c > 0) {
// Definitely competitive.
break;
} else if (i == comparators.length - 1) {
// This is the equals case.
if (doc + docBase > bottom.doc) {
// Definitely not competitive
return;
}
break;
}
}
// This hit is competitive - replace bottom element in queue & adjustTop
for (int i = 0; i < comparators.length; i++) {
comparators[i].copy(bottom.slot, doc);
}
updateBottom(doc);
for (int i = 0; i < comparators.length; i++) {
comparators[i].setBottom(bottom.slot);
}
} else {
// Startup transient: queue hasn't gathered numHits yet
final int slot = totalHits - 1;
// Copy hit into queue
for (int i = 0; i < comparators.length; i++) {
comparators[i].copy(slot, doc);
}
add(slot, doc, Float.NaN);
if (queueFull) {
for (int i = 0; i < comparators.length; i++) {
comparators[i].setBottom(bottom.slot);
}
}
}
}
// in lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
Override
public void collect(int doc) throws IOException {
final float score = scorer.score();
if (score > maxScore) {
maxScore = score;
}
++totalHits;
if (queueFull) {
// Fastmatch: return if this hit is not competitive
for (int i = 0;; i++) {
final int c = reverseMul[i] * comparators[i].compareBottom(doc);
if (c < 0) {
// Definitely not competitive.
return;
} else if (c > 0) {
// Definitely competitive.
break;
} else if (i == comparators.length - 1) {
// Here c=0. If we're at the last comparator, this doc is not
// competitive, since docs are visited in doc Id order, which means
// this doc cannot compete with any other document in the queue.
return;
}
}
// This hit is competitive - replace bottom element in queue & adjustTop
for (int i = 0; i < comparators.length; i++) {
comparators[i].copy(bottom.slot, doc);
}
updateBottom(doc, score);
for (int i = 0; i < comparators.length; i++) {
comparators[i].setBottom(bottom.slot);
}
} else {
// Startup transient: queue hasn't gathered numHits yet
final int slot = totalHits - 1;
// Copy hit into queue
for (int i = 0; i < comparators.length; i++) {
comparators[i].copy(slot, doc);
}
add(slot, doc, score);
if (queueFull) {
for (int i = 0; i < comparators.length; i++) {
comparators[i].setBottom(bottom.slot);
}
}
}
}
// in lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
super.setScorer(scorer);
}
// in lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
Override
public void collect(int doc) throws IOException {
final float score = scorer.score();
if (score > maxScore) {
maxScore = score;
}
++totalHits;
if (queueFull) {
// Fastmatch: return if this hit is not competitive
for (int i = 0;; i++) {
final int c = reverseMul[i] * comparators[i].compareBottom(doc);
if (c < 0) {
// Definitely not competitive.
return;
} else if (c > 0) {
// Definitely competitive.
break;
} else if (i == comparators.length - 1) {
// This is the equals case.
if (doc + docBase > bottom.doc) {
// Definitely not competitive
return;
}
break;
}
}
// This hit is competitive - replace bottom element in queue & adjustTop
for (int i = 0; i < comparators.length; i++) {
comparators[i].copy(bottom.slot, doc);
}
updateBottom(doc, score);
for (int i = 0; i < comparators.length; i++) {
comparators[i].setBottom(bottom.slot);
}
} else {
// Startup transient: queue hasn't gathered numHits yet
final int slot = totalHits - 1;
// Copy hit into queue
for (int i = 0; i < comparators.length; i++) {
comparators[i].copy(slot, doc);
}
add(slot, doc, score);
if (queueFull) {
for (int i = 0; i < comparators.length; i++) {
comparators[i].setBottom(bottom.slot);
}
}
}
}
// in lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
Override
public void collect(int doc) throws IOException {
++totalHits;
if (queueFull) {
// Fastmatch: return if this hit is not competitive
for (int i = 0;; i++) {
final int c = reverseMul[i] * comparators[i].compareBottom(doc);
if (c < 0) {
// Definitely not competitive.
return;
} else if (c > 0) {
// Definitely competitive.
break;
} else if (i == comparators.length - 1) {
// Here c=0. If we're at the last comparator, this doc is not
// competitive, since docs are visited in doc Id order, which means
// this doc cannot compete with any other document in the queue.
return;
}
}
// This hit is competitive - replace bottom element in queue & adjustTop
for (int i = 0; i < comparators.length; i++) {
comparators[i].copy(bottom.slot, doc);
}
// Compute score only if it is competitive.
final float score = scorer.score();
updateBottom(doc, score);
for (int i = 0; i < comparators.length; i++) {
comparators[i].setBottom(bottom.slot);
}
} else {
// Startup transient: queue hasn't gathered numHits yet
final int slot = totalHits - 1;
// Copy hit into queue
for (int i = 0; i < comparators.length; i++) {
comparators[i].copy(slot, doc);
}
// Compute score only if it is competitive.
final float score = scorer.score();
add(slot, doc, score);
if (queueFull) {
for (int i = 0; i < comparators.length; i++) {
comparators[i].setBottom(bottom.slot);
}
}
}
}
// in lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
super.setScorer(scorer);
}
// in lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
Override
public void collect(int doc) throws IOException {
++totalHits;
if (queueFull) {
// Fastmatch: return if this hit is not competitive
for (int i = 0;; i++) {
final int c = reverseMul[i] * comparators[i].compareBottom(doc);
if (c < 0) {
// Definitely not competitive.
return;
} else if (c > 0) {
// Definitely competitive.
break;
} else if (i == comparators.length - 1) {
// This is the equals case.
if (doc + docBase > bottom.doc) {
// Definitely not competitive
return;
}
break;
}
}
// This hit is competitive - replace bottom element in queue & adjustTop
for (int i = 0; i < comparators.length; i++) {
comparators[i].copy(bottom.slot, doc);
}
// Compute score only if it is competitive.
final float score = scorer.score();
updateBottom(doc, score);
for (int i = 0; i < comparators.length; i++) {
comparators[i].setBottom(bottom.slot);
}
} else {
// Startup transient: queue hasn't gathered numHits yet
final int slot = totalHits - 1;
// Copy hit into queue
for (int i = 0; i < comparators.length; i++) {
comparators[i].copy(slot, doc);
}
// Compute score only if it is competitive.
final float score = scorer.score();
add(slot, doc, score);
if (queueFull) {
for (int i = 0; i < comparators.length; i++) {
comparators[i].setBottom(bottom.slot);
}
}
}
}
// in lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
super.setScorer(scorer);
}
// in lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
Override
public void collect(int doc) throws IOException {
totalHits++;
//System.out.println(" collect doc=" + doc);
// Check if this hit was already collected on a
// previous page:
boolean sameValues = true;
for(int compIDX=0;compIDX<comparators.length;compIDX++) {
final FieldComparator comp = comparators[compIDX];
final int cmp = reverseMul[compIDX] * comp.compareDocToValue(doc, after.fields[compIDX]);
if (cmp < 0) {
// Already collected on a previous page
//System.out.println(" skip: before");
return;
} else if (cmp > 0) {
// Not yet collected
sameValues = false;
//System.out.println(" keep: after");
break;
}
}
// Tie-break by docID:
if (sameValues && doc <= afterDoc) {
// Already collected on a previous page
//System.out.println(" skip: tie-break");
return;
}
collectedHits++;
float score = Float.NaN;
if (trackMaxScore) {
score = scorer.score();
if (score > maxScore) {
maxScore = score;
}
}
if (queueFull) {
// Fastmatch: return if this hit is not competitive
for (int i = 0;; i++) {
final int c = reverseMul[i] * comparators[i].compareBottom(doc);
if (c < 0) {
// Definitely not competitive.
return;
} else if (c > 0) {
// Definitely competitive.
break;
} else if (i == comparators.length - 1) {
// This is the equals case.
if (doc + docBase > bottom.doc) {
// Definitely not competitive
return;
}
break;
}
}
// This hit is competitive - replace bottom element in queue & adjustTop
for (int i = 0; i < comparators.length; i++) {
comparators[i].copy(bottom.slot, doc);
}
// Compute score only if it is competitive.
if (trackDocScores && !trackMaxScore) {
score = scorer.score();
}
updateBottom(doc, score);
for (int i = 0; i < comparators.length; i++) {
comparators[i].setBottom(bottom.slot);
}
} else {
// Startup transient: queue hasn't gathered numHits yet
final int slot = collectedHits - 1;
//System.out.println(" slot=" + slot);
// Copy hit into queue
for (int i = 0; i < comparators.length; i++) {
comparators[i].copy(slot, doc);
}
// Compute score only if it is competitive.
if (trackDocScores && !trackMaxScore) {
score = scorer.score();
}
bottom = pq.add(new Entry(slot, docBase + doc, score));
queueFull = collectedHits == numHits;
if (queueFull) {
for (int i = 0; i < comparators.length; i++) {
comparators[i].setBottom(bottom.slot);
}
}
}
}
// in lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
for (int i = 0; i < comparators.length; i++) {
comparators[i].setScorer(scorer);
}
}
// in lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
Override
public void setNextReader(AtomicReaderContext context) throws IOException {
docBase = context.docBase;
afterDoc = after.doc - docBase;
for (int i = 0; i < comparators.length; i++) {
queue.setComparator(i, comparators[i].setNextReader(context));
}
}
// in lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
public static TopFieldCollector create(Sort sort, int numHits,
boolean fillFields, boolean trackDocScores, boolean trackMaxScore,
boolean docsScoredInOrder)
throws IOException {
return create(sort, numHits, null, fillFields, trackDocScores, trackMaxScore, docsScoredInOrder);
}
// in lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
public static TopFieldCollector create(Sort sort, int numHits, FieldDoc after,
boolean fillFields, boolean trackDocScores, boolean trackMaxScore,
boolean docsScoredInOrder)
throws IOException {
if (sort.fields.length == 0) {
throw new IllegalArgumentException("Sort must contain at least one field");
}
if (numHits <= 0) {
throw new IllegalArgumentException("numHits must be > 0; please use TotalHitCountCollector if you just need the total hit count");
}
FieldValueHitQueue<Entry> queue = FieldValueHitQueue.create(sort.fields, numHits);
if (after == null) {
if (queue.getComparators().length == 1) {
if (docsScoredInOrder) {
if (trackMaxScore) {
return new OneComparatorScoringMaxScoreCollector(queue, numHits, fillFields);
} else if (trackDocScores) {
return new OneComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields);
} else {
return new OneComparatorNonScoringCollector(queue, numHits, fillFields);
}
} else {
if (trackMaxScore) {
return new OutOfOrderOneComparatorScoringMaxScoreCollector(queue, numHits, fillFields);
} else if (trackDocScores) {
return new OutOfOrderOneComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields);
} else {
return new OutOfOrderOneComparatorNonScoringCollector(queue, numHits, fillFields);
}
}
}
// multiple comparators.
if (docsScoredInOrder) {
if (trackMaxScore) {
return new MultiComparatorScoringMaxScoreCollector(queue, numHits, fillFields);
} else if (trackDocScores) {
return new MultiComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields);
} else {
return new MultiComparatorNonScoringCollector(queue, numHits, fillFields);
}
} else {
if (trackMaxScore) {
return new OutOfOrderMultiComparatorScoringMaxScoreCollector(queue, numHits, fillFields);
} else if (trackDocScores) {
return new OutOfOrderMultiComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields);
} else {
return new OutOfOrderMultiComparatorNonScoringCollector(queue, numHits, fillFields);
}
}
} else {
if (after.fields == null) {
throw new IllegalArgumentException("after.fields wasn't set; you must pass fillFields=true for the previous search");
}
if (after.fields.length != sort.getSort().length) {
throw new IllegalArgumentException("after.fields has " + after.fields.length + " values but sort has " + sort.getSort().length);
}
return new PagingFieldCollector(queue, after, numHits, fillFields, trackDocScores, trackMaxScore);
}
}
// in lucene/core/src/java/org/apache/lucene/search/FilteredDocIdSetIterator.java
Override
public int nextDoc() throws IOException {
while ((doc = _innerIter.nextDoc()) != NO_MORE_DOCS) {
if (match(doc)) {
return doc;
}
}
return doc;
}
// in lucene/core/src/java/org/apache/lucene/search/FilteredDocIdSetIterator.java
Override
public int advance(int target) throws IOException {
doc = _innerIter.advance(target);
if (doc != NO_MORE_DOCS) {
if (match(doc)) {
return doc;
} else {
while ((doc = _innerIter.nextDoc()) != NO_MORE_DOCS) {
if (match(doc)) {
return doc;
}
}
return doc;
}
}
return doc;
}
// in lucene/core/src/java/org/apache/lucene/search/TopDocs.java
public static TopDocs merge(Sort sort, int topN, TopDocs[] shardHits) throws IOException {
final PriorityQueue<ShardRef> queue;
if (sort == null) {
queue = new ScoreMergeSortQueue(shardHits);
} else {
queue = new MergeSortQueue(sort, shardHits);
}
int totalHitCount = 0;
int availHitCount = 0;
float maxScore = Float.MIN_VALUE;
for(int shardIDX=0;shardIDX<shardHits.length;shardIDX++) {
final TopDocs shard = shardHits[shardIDX];
// totalHits can be non-zero even if no hits were
// collected, when searchAfter was used:
totalHitCount += shard.totalHits;
if (shard.scoreDocs != null && shard.scoreDocs.length > 0) {
availHitCount += shard.scoreDocs.length;
queue.add(new ShardRef(shardIDX));
maxScore = Math.max(maxScore, shard.getMaxScore());
//System.out.println(" maxScore now " + maxScore + " vs " + shard.getMaxScore());
}
}
if (availHitCount == 0) {
maxScore = Float.NaN;
}
final ScoreDoc[] hits = new ScoreDoc[Math.min(topN, availHitCount)];
int hitUpto = 0;
while(hitUpto < hits.length) {
assert queue.size() > 0;
ShardRef ref = queue.pop();
final ScoreDoc hit = shardHits[ref.shardIndex].scoreDocs[ref.hitIndex++];
hit.shardIndex = ref.shardIndex;
hits[hitUpto] = hit;
//System.out.println(" hitUpto=" + hitUpto);
//System.out.println(" doc=" + hits[hitUpto].doc + " score=" + hits[hitUpto].score);
hitUpto++;
if (ref.hitIndex < shardHits[ref.shardIndex].scoreDocs.length) {
// Not done with this these TopDocs yet:
queue.add(ref);
}
}
if (sort == null) {
return new TopDocs(totalHitCount, hits, maxScore);
} else {
return new TopFieldDocs(totalHitCount, hits, sort.getSort(), maxScore);
}
}
// in lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java
Override
public void collect(final int doc) throws IOException {
final BucketTable table = bucketTable;
final int i = doc & BucketTable.MASK;
final Bucket bucket = table.buckets[i];
if (bucket.doc != doc) { // invalid bucket
bucket.doc = doc; // set doc
bucket.score = scorer.score(); // initialize score
bucket.bits = mask; // initialize mask
bucket.coord = 1; // initialize coord
bucket.next = table.first; // push onto valid list
table.first = bucket;
} else { // valid bucket
bucket.score += scorer.score(); // increment score
bucket.bits |= mask; // add bits in mask
bucket.coord++; // increment coord
}
}
// in lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java
Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
}
// in lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java
Override
public int advance(int target) throws IOException { return NO_MORE_DOCS; }
// in lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java
Override
public int nextDoc() throws IOException { return NO_MORE_DOCS; }
// in lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java
Override
public float score() throws IOException { return score; }
// in lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java
Override
public boolean score(Collector collector, int max, int firstDocID) throws IOException {
// Make sure it's only BooleanScorer that calls us:
assert firstDocID == -1;
boolean more;
Bucket tmp;
BucketScorer bs = new BucketScorer(weight);
// The internal loop will set the score and doc before calling collect.
collector.setScorer(bs);
do {
bucketTable.first = null;
while (current != null) { // more queued
// check prohibited & required
if ((current.bits & PROHIBITED_MASK) == 0) {
// TODO: re-enable this if BQ ever sends us required
// clauses
//&& (current.bits & requiredMask) == requiredMask) {
// TODO: can we remove this?
if (current.doc >= max){
tmp = current;
current = current.next;
tmp.next = bucketTable.first;
bucketTable.first = tmp;
continue;
}
if (current.coord >= minNrShouldMatch) {
bs.score = current.score * coordFactors[current.coord];
bs.doc = current.doc;
bs.freq = current.coord;
collector.collect(current.doc);
}
}
current = current.next; // pop the queue
}
if (bucketTable.first != null){
current = bucketTable.first;
bucketTable.first = current.next;
return true;
}
// refill the queue
more = false;
end += BucketTable.SIZE;
for (SubScorer sub = scorers; sub != null; sub = sub.next) {
int subScorerDocID = sub.scorer.docID();
if (subScorerDocID != NO_MORE_DOCS) {
more |= sub.scorer.score(sub.collector, end, subScorerDocID);
}
}
current = bucketTable.first;
} while (current != null || more);
return false;
}
// in lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java
Override
public int advance(int target) throws IOException {
throw new UnsupportedOperationException();
}
// in lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java
Override
public int nextDoc() throws IOException {
throw new UnsupportedOperationException();
}
// in lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java
Override
public void score(Collector collector) throws IOException {
score(collector, Integer.MAX_VALUE, -1);
}
// in lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java
Override
public int nextDoc() throws IOException {
doc++;
while(liveDocs != null && doc < maxDoc && !liveDocs.get(doc)) {
doc++;
}
if (doc == maxDoc) {
doc = NO_MORE_DOCS;
}
return doc;
}
// in lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java
Override
public int advance(int target) throws IOException {
doc = target-1;
return nextDoc();
}
// in lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java
Override
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException {
return new MatchAllScorer(context.reader(), acceptDocs, this, queryWeight);
}
// in lucene/core/src/java/org/apache/lucene/search/FuzzyQuery.java
Override
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
if (maxEdits == 0 || prefixLength >= term.text().length()) { // can only match if it's exact
return new SingleTermsEnum(terms.iterator(null), term.bytes());
}
return new FuzzyTermsEnum(terms, atts, getTerm(), maxEdits, prefixLength, transpositions);
}
// in lucene/core/src/java/org/apache/lucene/search/AutomatonQuery.java
Override
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
return compiled.getTermsEnum(terms);
}
// in lucene/core/src/java/org/apache/lucene/search/SortField.java
public FieldComparator<?> getComparator(final int numHits, final int sortPos) throws IOException {
switch (type) {
case SCORE:
return new FieldComparator.RelevanceComparator(numHits);
case DOC:
return new FieldComparator.DocComparator(numHits);
case INT:
if (useIndexValues) {
return new FieldComparator.IntDocValuesComparator(numHits, field);
} else {
return new FieldComparator.IntComparator(numHits, field, parser, (Integer) missingValue);
}
case FLOAT:
if (useIndexValues) {
return new FieldComparator.FloatDocValuesComparator(numHits, field);
} else {
return new FieldComparator.FloatComparator(numHits, field, parser, (Float) missingValue);
}
case LONG:
return new FieldComparator.LongComparator(numHits, field, parser, (Long) missingValue);
case DOUBLE:
return new FieldComparator.DoubleComparator(numHits, field, parser, (Double) missingValue);
case BYTE:
return new FieldComparator.ByteComparator(numHits, field, parser, (Byte) missingValue);
case SHORT:
return new FieldComparator.ShortComparator(numHits, field, parser, (Short) missingValue);
case CUSTOM:
assert comparatorSource != null;
return comparatorSource.newComparator(field, numHits, sortPos, reverse);
case STRING:
if (useIndexValues) {
return new FieldComparator.TermOrdValDocValuesComparator(numHits, field);
} else {
return new FieldComparator.TermOrdValComparator(numHits, field);
}
case STRING_VAL:
if (useIndexValues) {
return new FieldComparator.TermValDocValuesComparator(numHits, field);
} else {
return new FieldComparator.TermValComparator(numHits, field);
}
case REWRITEABLE:
throw new IllegalStateException("SortField needs to be rewritten through Sort.rewrite(..) and SortField.rewrite(..)");
default:
throw new IllegalStateException("Illegal sort type: " + type);
}
}
// in lucene/core/src/java/org/apache/lucene/search/SortField.java
public SortField rewrite(IndexSearcher searcher) throws IOException {
return this;
}
// in lucene/core/src/java/org/apache/lucene/search/ConjunctionTermScorer.java
private int doNext(int doc) throws IOException {
do {
if (lead.doc == DocIdSetIterator.NO_MORE_DOCS) {
return NO_MORE_DOCS;
}
advanceHead: do {
for (int i = 1; i < docsAndFreqs.length; i++) {
if (docsAndFreqs[i].doc < doc) {
docsAndFreqs[i].doc = docsAndFreqs[i].docs.advance(doc);
}
if (docsAndFreqs[i].doc > doc) {
// DocsEnum beyond the current doc - break and advance lead
break advanceHead;
}
}
// success - all DocsEnums are on the same doc
return doc;
} while (true);
// advance head for next iteration
doc = lead.doc = lead.docs.nextDoc();
} while (true);
}
// in lucene/core/src/java/org/apache/lucene/search/ConjunctionTermScorer.java
Override
public int advance(int target) throws IOException {
lead.doc = lead.docs.advance(target);
return lastDoc = doNext(lead.doc);
}
// in lucene/core/src/java/org/apache/lucene/search/ConjunctionTermScorer.java
Override
public int nextDoc() throws IOException {
lead.doc = lead.docs.nextDoc();
return lastDoc = doNext(lead.doc);
}
// in lucene/core/src/java/org/apache/lucene/search/ConjunctionTermScorer.java
Override
public float score() throws IOException {
float sum = 0.0f;
for (DocsAndFreqs docs : docsAndFreqs) {
sum += docs.docScorer.score(lastDoc, docs.docs.freq());
}
return sum * coord;
}
// in lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java
Override
public Weight createWeight(IndexSearcher searcher) throws IOException {
return new PayloadNearSpanWeight(this, searcher);
}
// in lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java
Override
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException {
return new PayloadNearSpanScorer(query.getSpans(context, acceptDocs, termContexts), this,
similarity, similarity.sloppySimScorer(stats, context));
}
// in lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java
Override
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
PayloadNearSpanScorer scorer = (PayloadNearSpanScorer) scorer(context, true, false, context.reader().getLiveDocs());
if (scorer != null) {
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
float freq = scorer.freq();
SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
Explanation expl = new Explanation();
expl.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
expl.addDetail(scoreExplanation);
expl.setValue(scoreExplanation.getValue());
// now the payloads part
Explanation payloadExpl = function.explain(doc, scorer.payloadsSeen, scorer.payloadScore);
// combined
ComplexExplanation result = new ComplexExplanation();
result.addDetail(expl);
result.addDetail(payloadExpl);
result.setValue(expl.getValue() * payloadExpl.getValue());
result.setDescription("PayloadNearQuery, product of:");
return result;
}
}
return new ComplexExplanation(false, 0.0f, "no matching term");
}
// in lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java
public void getPayloads(Spans[] subSpans) throws IOException {
for (int i = 0; i < subSpans.length; i++) {
if (subSpans[i] instanceof NearSpansOrdered) {
if (((NearSpansOrdered) subSpans[i]).isPayloadAvailable()) {
processPayloads(((NearSpansOrdered) subSpans[i]).getPayload(),
subSpans[i].start(), subSpans[i].end());
}
getPayloads(((NearSpansOrdered) subSpans[i]).getSubSpans());
} else if (subSpans[i] instanceof NearSpansUnordered) {
if (((NearSpansUnordered) subSpans[i]).isPayloadAvailable()) {
processPayloads(((NearSpansUnordered) subSpans[i]).getPayload(),
subSpans[i].start(), subSpans[i].end());
}
getPayloads(((NearSpansUnordered) subSpans[i]).getSubSpans());
}
}
}
// in lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java
Override
protected boolean setFreqCurrentDoc() throws IOException {
if (!more) {
return false;
}
doc = spans.doc();
freq = 0.0f;
payloadScore = 0;
payloadsSeen = 0;
do {
int matchLength = spans.end() - spans.start();
freq += docScorer.computeSlopFactor(matchLength);
Spans[] spansArr = new Spans[1];
spansArr[0] = spans;
getPayloads(spansArr);
more = spans.next();
} while (more && (doc == spans.doc()));
return true;
}
// in lucene/core/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java
Override
public float score() throws IOException {
return super.score()
* function.docScore(doc, fieldName, payloadsSeen, payloadScore);
}
// in lucene/core/src/java/org/apache/lucene/search/payloads/PayloadSpanUtil.java
public Collection<byte[]> getPayloadsForQuery(Query query) throws IOException {
Collection<byte[]> payloads = new ArrayList<byte[]>();
queryToSpanQuery(query, payloads);
return payloads;
}
// in lucene/core/src/java/org/apache/lucene/search/payloads/PayloadSpanUtil.java
private void queryToSpanQuery(Query query, Collection<byte[]> payloads)
throws IOException {
if (query instanceof BooleanQuery) {
BooleanClause[] queryClauses = ((BooleanQuery) query).getClauses();
for (int i = 0; i < queryClauses.length; i++) {
if (!queryClauses[i].isProhibited()) {
queryToSpanQuery(queryClauses[i].getQuery(), payloads);
}
}
} else if (query instanceof PhraseQuery) {
Term[] phraseQueryTerms = ((PhraseQuery) query).getTerms();
SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
for (int i = 0; i < phraseQueryTerms.length; i++) {
clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
}
int slop = ((PhraseQuery) query).getSlop();
boolean inorder = false;
if (slop == 0) {
inorder = true;
}
SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder);
sp.setBoost(query.getBoost());
getPayloads(payloads, sp);
} else if (query instanceof TermQuery) {
SpanTermQuery stq = new SpanTermQuery(((TermQuery) query).getTerm());
stq.setBoost(query.getBoost());
getPayloads(payloads, stq);
} else if (query instanceof SpanQuery) {
getPayloads(payloads, (SpanQuery) query);
} else if (query instanceof FilteredQuery) {
queryToSpanQuery(((FilteredQuery) query).getQuery(), payloads);
} else if (query instanceof DisjunctionMaxQuery) {
for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator
.hasNext();) {
queryToSpanQuery(iterator.next(), payloads);
}
} else if (query instanceof MultiPhraseQuery) {
final MultiPhraseQuery mpq = (MultiPhraseQuery) query;
final List<Term[]> termArrays = mpq.getTermArrays();
final int[] positions = mpq.getPositions();
if (positions.length > 0) {
int maxPosition = positions[positions.length - 1];
for (int i = 0; i < positions.length - 1; ++i) {
if (positions[i] > maxPosition) {
maxPosition = positions[i];
}
}
@SuppressWarnings({"rawtypes","unchecked"}) final List<Query>[] disjunctLists =
new List[maxPosition + 1];
int distinctPositions = 0;
for (int i = 0; i < termArrays.size(); ++i) {
final Term[] termArray = termArrays.get(i);
List<Query> disjuncts = disjunctLists[positions[i]];
if (disjuncts == null) {
disjuncts = (disjunctLists[positions[i]] = new ArrayList<Query>(
termArray.length));
++distinctPositions;
}
for (final Term term : termArray) {
disjuncts.add(new SpanTermQuery(term));
}
}
int positionGaps = 0;
int position = 0;
final SpanQuery[] clauses = new SpanQuery[distinctPositions];
for (int i = 0; i < disjunctLists.length; ++i) {
List<Query> disjuncts = disjunctLists[i];
if (disjuncts != null) {
clauses[position++] = new SpanOrQuery(disjuncts
.toArray(new SpanQuery[disjuncts.size()]));
} else {
++positionGaps;
}
}
final int slop = mpq.getSlop();
final boolean inorder = (slop == 0);
SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps,
inorder);
sp.setBoost(query.getBoost());
getPayloads(payloads, sp);
}
}
// in lucene/core/src/java/org/apache/lucene/search/payloads/PayloadSpanUtil.java
private void getPayloads(Collection<byte []> payloads, SpanQuery query)
throws IOException {
Map<Term,TermContext> termContexts = new HashMap<Term,TermContext>();
TreeSet<Term> terms = new TreeSet<Term>();
query.extractTerms(terms);
for (Term term : terms) {
termContexts.put(term, TermContext.build(context, term, true));
}
final AtomicReaderContext[] leaves = context.leaves();
for (AtomicReaderContext atomicReaderContext : leaves) {
final Spans spans = query.getSpans(atomicReaderContext, atomicReaderContext.reader().getLiveDocs(), termContexts);
while (spans.next() == true) {
if (spans.isPayloadAvailable()) {
Collection<byte[]> payload = spans.getPayload();
for (byte [] bytes : payload) {
payloads.add(bytes);
}
}
}
}
}
// in lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java
Override
public Weight createWeight(IndexSearcher searcher) throws IOException {
return new PayloadTermWeight(this, searcher);
}
// in lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java
Override
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException {
return new PayloadTermSpanScorer((TermSpans) query.getSpans(context, acceptDocs, termContexts),
this, similarity.sloppySimScorer(stats, context));
}
// in lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java
Override
protected boolean setFreqCurrentDoc() throws IOException {
if (!more) {
return false;
}
doc = spans.doc();
freq = 0.0f;
payloadScore = 0;
payloadsSeen = 0;
while (more && doc == spans.doc()) {
int matchLength = spans.end() - spans.start();
freq += docScorer.computeSlopFactor(matchLength);
processPayload(similarity);
more = spans.next();// this moves positions to the next match in this
// document
}
return more || (freq != 0);
}
// in lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java
protected void processPayload(Similarity similarity) throws IOException {
final DocsAndPositionsEnum postings = termSpans.getPostings();
if (postings.hasPayload()) {
payload = postings.getPayload();
if (payload != null) {
payloadScore = function.currentScore(doc, term.field(),
spans.start(), spans.end(), payloadsSeen, payloadScore,
docScorer.computePayloadFactor(doc, spans.start(), spans.end(), payload));
} else {
payloadScore = function.currentScore(doc, term.field(),
spans.start(), spans.end(), payloadsSeen, payloadScore, 1F);
}
payloadsSeen++;
} else {
// zero out the payload?
}
}
// in lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java
Override
public float score() throws IOException {
return includeSpanScore ? getSpanScore() * getPayloadScore()
: getPayloadScore();
}
// in lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java
protected float getSpanScore() throws IOException {
return super.score();
}
// in lucene/core/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java
Override
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
PayloadTermSpanScorer scorer = (PayloadTermSpanScorer) scorer(context, true, false, context.reader().getLiveDocs());
if (scorer != null) {
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
float freq = scorer.freq();
SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
Explanation expl = new Explanation();
expl.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
expl.addDetail(scoreExplanation);
expl.setValue(scoreExplanation.getValue());
// now the payloads part
// QUESTION: Is there a way to avoid this skipTo call? We need to know
// whether to load the payload or not
// GSI: I suppose we could toString the payload, but I don't think that
// would be a good idea
Explanation payloadExpl = new Explanation(scorer.getPayloadScore(), "scorePayload(...)");
payloadExpl.setValue(scorer.getPayloadScore());
// combined
ComplexExplanation result = new ComplexExplanation();
if (includeSpanScore) {
result.addDetail(expl);
result.addDetail(payloadExpl);
result.setValue(expl.getValue() * payloadExpl.getValue());
result.setDescription("btq, product of:");
} else {
result.addDetail(payloadExpl);
result.setValue(payloadExpl.getValue());
result.setDescription("btq(includeSpanScore=false), result of:");
}
result.setMatch(true); // LUCENE-1303
return result;
}
}
return new ComplexExplanation(false, 0.0f, "no matching term");
}
// in lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
protected TermsEnum getAutomatonEnum(int editDistance, BytesRef lastTerm)
throws IOException {
final List<CompiledAutomaton> runAutomata = initAutomata(editDistance);
if (editDistance < runAutomata.size()) {
//if (BlockTreeTermsWriter.DEBUG) System.out.println("FuzzyTE.getAEnum: ed=" + editDistance + " lastTerm=" + (lastTerm==null ? "null" : lastTerm.utf8ToString()));
final CompiledAutomaton compiled = runAutomata.get(editDistance);
return new AutomatonFuzzyTermsEnum(terms.intersect(compiled, lastTerm == null ? null : compiled.floor(lastTerm, new BytesRef())),
runAutomata.subList(0, editDistance + 1).toArray(new CompiledAutomaton[editDistance + 1]));
} else {
return null;
}
}
// in lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
private void bottomChanged(BytesRef lastTerm, boolean init)
throws IOException {
int oldMaxEdits = maxEdits;
// true if the last term encountered is lexicographically equal or after the bottom term in the PQ
boolean termAfter = bottomTerm == null || (lastTerm != null && termComparator.compare(lastTerm, bottomTerm) >= 0);
// as long as the max non-competitive boost is >= the max boost
// for some edit distance, keep dropping the max edit distance.
while (maxEdits > 0 && (termAfter ? bottom >= calculateMaxBoost(maxEdits) : bottom > calculateMaxBoost(maxEdits)))
maxEdits--;
if (oldMaxEdits != maxEdits || init) { // the maximum n has changed
maxEditDistanceChanged(lastTerm, maxEdits, init);
}
}
// in lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
protected void maxEditDistanceChanged(BytesRef lastTerm, int maxEdits, boolean init)
throws IOException {
TermsEnum newEnum = getAutomatonEnum(maxEdits, lastTerm);
// instead of assert, we do a hard check in case someone uses our enum directly
// assert newEnum != null;
if (newEnum == null) {
assert maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE;
throw new IllegalArgumentException("maxEdits cannot be > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE");
}
setEnum(newEnum);
}
// in lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
Override
public BytesRef next() throws IOException {
if (queuedBottom != null) {
bottomChanged(queuedBottom, false);
queuedBottom = null;
}
BytesRef term = actualEnum.next();
boostAtt.setBoost(actualBoostAtt.getBoost());
final float bottom = maxBoostAtt.getMaxNonCompetitiveBoost();
final BytesRef bottomTerm = maxBoostAtt.getCompetitiveTerm();
if (term != null && (bottom != this.bottom || bottomTerm != this.bottomTerm)) {
this.bottom = bottom;
this.bottomTerm = bottomTerm;
// clone the term before potentially doing something with it
// this is a rare but wonderful occurrence anyway
queuedBottom = BytesRef.deepCopyOf(term);
}
return term;
}
// in lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
Override
public int docFreq() throws IOException {
return actualEnum.docFreq();
}
// in lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
Override
public long totalTermFreq() throws IOException {
return actualEnum.totalTermFreq();
}
// in lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
return actualEnum.docs(liveDocs, reuse, needsFreqs);
}
// in lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
Override
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs,
DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
return actualEnum.docsAndPositions(liveDocs, reuse, needsOffsets);
}
// in lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
Override
public void seekExact(BytesRef term, TermState state) throws IOException {
actualEnum.seekExact(term, state);
}
// in lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
Override
public TermState termState() throws IOException {
return actualEnum.termState();
}
// in lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
Override
public long ord() throws IOException {
return actualEnum.ord();
}
// in lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
Override
public boolean seekExact(BytesRef text, boolean useCache) throws IOException {
return actualEnum.seekExact(text, useCache);
}
// in lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
Override
public SeekStatus seekCeil(BytesRef text, boolean useCache) throws IOException {
return actualEnum.seekCeil(text, useCache);
}
// in lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
Override
public void seekExact(long ord) throws IOException {
actualEnum.seekExact(ord);
}
// in lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
Override
public BytesRef term() throws IOException {
return actualEnum.term();
}
// in lucene/core/src/java/org/apache/lucene/search/FieldValueFilter.java
Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs)
throws IOException {
final Bits docsWithField = FieldCache.DEFAULT.getDocsWithField(
context.reader(), field);
if (negate) {
if (docsWithField instanceof MatchAllBits) {
return null;
}
return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
@Override
protected final boolean matchDoc(int doc) {
return !docsWithField.get(doc);
}
};
} else {
if (docsWithField instanceof MatchNoBits) {
return null;
}
if (docsWithField instanceof DocIdSet) {
// UweSays: this is always the case for our current impl - but who knows
// :-)
return BitsFilteredDocIdSet.wrap((DocIdSet) docsWithField, acceptDocs);
}
return new FieldCacheDocIdSet(context.reader().maxDoc(), acceptDocs) {
@Override
protected final boolean matchDoc(int doc) {
return docsWithField.get(doc);
}
};
}
}
// in lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
Override
protected float phraseFreq() throws IOException {
if (!initPhrasePositions()) {
return 0.0f;
}
float freq = 0.0f;
PhrasePositions pp = pq.pop();
int matchLength = end - pp.position;
int next = pq.top().position;
while (advancePP(pp)) {
if (hasRpts && !advanceRpts(pp)) {
break; // pps exhausted
}
if (pp.position > next) { // done minimizing current match-length
if (matchLength <= slop) {
freq += docScorer.computeSlopFactor(matchLength); // score match
}
pq.add(pp);
pp = pq.pop();
next = pq.top().position;
matchLength = end - pp.position;
} else {
int matchLength2 = end - pp.position;
if (matchLength2 < matchLength) {
matchLength = matchLength2;
}
}
}
if (matchLength <= slop) {
freq += docScorer.computeSlopFactor(matchLength); // score match
}
return freq;
}
// in lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
private boolean advancePP(PhrasePositions pp) throws IOException {
if (!pp.nextPosition()) {
return false;
}
if (pp.position > end) {
end = pp.position;
}
return true;
}
// in lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
private boolean advanceRpts(PhrasePositions pp) throws IOException {
if (pp.rptGroup < 0) {
return true; // not a repeater
}
PhrasePositions[] rg = rptGroups[pp.rptGroup];
OpenBitSet bits = new OpenBitSet(rg.length); // for re-queuing after collisions are resolved
int k0 = pp.rptInd;
int k;
while((k=collide(pp)) >= 0) {
pp = lesser(pp, rg[k]); // always advance the lesser of the (only) two colliding pps
if (!advancePP(pp)) {
return false; // exhausted
}
if (k != k0) { // careful: mark only those currently in the queue
bits.set(k); // mark that pp2 need to be re-queued
}
}
// collisions resolved, now re-queue
// empty (partially) the queue until seeing all pps advanced for resolving collisions
int n = 0;
while (bits.cardinality() > 0) {
PhrasePositions pp2 = pq.pop();
rptStack[n++] = pp2;
if (pp2.rptGroup >= 0 && bits.get(pp2.rptInd)) {
bits.clear(pp2.rptInd);
}
}
// add back to queue
for (int i=n-1; i>=0; i--) {
pq.add(rptStack[i]);
}
return true;
}
// in lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
private boolean initPhrasePositions() throws IOException {
end = Integer.MIN_VALUE;
if (!checkedRpts) {
return initFirstTime();
}
if (!hasRpts) {
initSimple();
return true; // PPs available
}
return initComplex();
}
// in lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
private void initSimple() throws IOException {
//System.err.println("initSimple: doc: "+min.doc);
pq.clear();
// position pps and build queue from list
for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
pp.firstPosition();
if (pp.position > end) {
end = pp.position;
}
pq.add(pp);
}
}
// in lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
private boolean initComplex() throws IOException {
//System.err.println("initComplex: doc: "+min.doc);
placeFirstPositions();
if (!advanceRepeatGroups()) {
return false; // PPs exhausted
}
fillQueue();
return true; // PPs available
}
// in lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
private void placeFirstPositions() throws IOException {
for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max
pp.firstPosition();
}
}
// in lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
private boolean advanceRepeatGroups() throws IOException {
for (PhrasePositions[] rg: rptGroups) {
if (hasMultiTermRpts) {
// more involved, some may not collide
int incr;
for (int i=0; i<rg.length; i+=incr) {
incr = 1;
PhrasePositions pp = rg[i];
int k;
while((k=collide(pp)) >= 0) {
PhrasePositions pp2 = lesser(pp, rg[k]);
if (!advancePP(pp2)) { // at initialization always advance pp with higher offset
return false; // exhausted
}
if (pp2.rptInd < i) { // should not happen?
incr = 0;
break;
}
}
}
} else {
// simpler, we know exactly how much to advance
for (int j=1; j<rg.length; j++) {
for (int k=0; k<j; k++) {
if (!rg[j].nextPosition()) {
return false; // PPs exhausted
}
}
}
}
}
return true; // PPs available
}
// in lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
private boolean initFirstTime() throws IOException {
//System.err.println("initFirstTime: doc: "+min.doc);
checkedRpts = true;
placeFirstPositions();
LinkedHashMap<Term,Integer> rptTerms = repeatingTerms();
hasRpts = !rptTerms.isEmpty();
if (hasRpts) {
rptStack = new PhrasePositions[numPostings]; // needed with repetitions
ArrayList<ArrayList<PhrasePositions>> rgs = gatherRptGroups(rptTerms);
sortRptGroups(rgs);
if (!advanceRepeatGroups()) {
return false; // PPs exhausted
}
}
fillQueue();
return true; // PPs available
}
// in lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
private ArrayList<ArrayList<PhrasePositions>> gatherRptGroups(LinkedHashMap<Term,Integer> rptTerms) throws IOException {
PhrasePositions[] rpp = repeatingPPs(rptTerms);
ArrayList<ArrayList<PhrasePositions>> res = new ArrayList<ArrayList<PhrasePositions>>();
if (!hasMultiTermRpts) {
// simpler - no multi-terms - can base on positions in first doc
for (int i=0; i<rpp.length; i++) {
PhrasePositions pp = rpp[i];
if (pp.rptGroup >=0) continue; // already marked as a repetition
int tpPos = tpPos(pp);
for (int j=i+1; j<rpp.length; j++) {
PhrasePositions pp2 = rpp[j];
if (
pp2.rptGroup >=0 // already marked as a repetition
|| pp2.offset == pp.offset // not a repetition: two PPs are originally in same offset in the query!
|| tpPos(pp2) != tpPos) { // not a repetition
continue;
}
// a repetition
int g = pp.rptGroup;
if (g < 0) {
g = res.size();
pp.rptGroup = g;
ArrayList<PhrasePositions> rl = new ArrayList<PhrasePositions>(2);
rl.add(pp);
res.add(rl);
}
pp2.rptGroup = g;
res.get(g).add(pp2);
}
}
} else {
// more involved - has multi-terms
ArrayList<HashSet<PhrasePositions>> tmp = new ArrayList<HashSet<PhrasePositions>>();
ArrayList<OpenBitSet> bb = ppTermsBitSets(rpp, rptTerms);
unionTermGroups(bb);
HashMap<Term,Integer> tg = termGroups(rptTerms, bb);
HashSet<Integer> distinctGroupIDs = new HashSet<Integer>(tg.values());
for (int i=0; i<distinctGroupIDs.size(); i++) {
tmp.add(new HashSet<PhrasePositions>());
}
for (PhrasePositions pp : rpp) {
for (Term t: pp.terms) {
if (rptTerms.containsKey(t)) {
int g = tg.get(t);
tmp.get(g).add(pp);
assert pp.rptGroup==-1 || pp.rptGroup==g;
pp.rptGroup = g;
}
}
}
for (HashSet<PhrasePositions> hs : tmp) {
res.add(new ArrayList<PhrasePositions>(hs));
}
}
return res;
}
// in lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
private HashMap<Term,Integer> termGroups(LinkedHashMap<Term,Integer> tord, ArrayList<OpenBitSet> bb) throws IOException {
HashMap<Term,Integer> tg = new HashMap<Term,Integer>();
Term[] t = tord.keySet().toArray(new Term[0]);
for (int i=0; i<bb.size(); i++) { // i is the group no.
DocIdSetIterator bits = bb.get(i).iterator();
int ord;
while ((ord=bits.nextDoc())!=NO_MORE_DOCS) {
tg.put(t[ord],i);
}
}
return tg;
}
// in lucene/core/src/java/org/apache/lucene/search/TermQuery.java
Override
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException {
assert termStates.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight (" + termStates.topReaderContext + ") is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);
final TermsEnum termsEnum = getTermsEnum(context);
if (termsEnum == null) {
return null;
}
DocsEnum docs = termsEnum.docs(acceptDocs, null, true);
if (docs != null) {
return new TermScorer(this, docs, createDocScorer(context));
} else {
// Index does not store freq info
docs = termsEnum.docs(acceptDocs, null, false);
assert docs != null;
return new MatchOnlyTermScorer(this, docs, createDocScorer(context));
}
}
// in lucene/core/src/java/org/apache/lucene/search/TermQuery.java
ExactSimScorer createDocScorer(AtomicReaderContext context)
throws IOException {
return similarity.exactSimScorer(stats, context);
}
// in lucene/core/src/java/org/apache/lucene/search/TermQuery.java
TermsEnum getTermsEnum(AtomicReaderContext context) throws IOException {
final TermState state = termStates.get(context.ord);
if (state == null) { // term is not present in that reader
assert termNotInReader(context.reader(), term.field(), term.bytes()) : "no termstate found but term exists in reader term=" + term;
return null;
}
//System.out.println("LD=" + reader.getLiveDocs() + " set?=" + (reader.getLiveDocs() != null ? reader.getLiveDocs().get(0) : "null"));
final TermsEnum termsEnum = context.reader().terms(term.field()).iterator(null);
termsEnum.seekExact(term.bytes(), state);
return termsEnum;
}
// in lucene/core/src/java/org/apache/lucene/search/TermQuery.java
private boolean termNotInReader(AtomicReader reader, String field, BytesRef bytes) throws IOException {
// only called from assert
//System.out.println("TQ.termNotInReader reader=" + reader + " term=" + field + ":" + bytes.utf8ToString());
return reader.docFreq(field, bytes) == 0;
}
// in lucene/core/src/java/org/apache/lucene/search/TermQuery.java
Override
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
Scorer scorer = scorer(context, true, false, context.reader().getLiveDocs());
if (scorer != null) {
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
float freq = scorer.freq();
ExactSimScorer docScorer = similarity.exactSimScorer(stats, context);
ComplexExplanation result = new ComplexExplanation();
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "termFreq=" + freq));
result.addDetail(scoreExplanation);
result.setValue(scoreExplanation.getValue());
result.setMatch(true);
return result;
}
}
return new ComplexExplanation(false, 0.0f, "no matching term");
}
// in lucene/core/src/java/org/apache/lucene/search/TermQuery.java
Override
public Weight createWeight(IndexSearcher searcher) throws IOException {
final IndexReaderContext context = searcher.getTopReaderContext();
final TermContext termState;
if (perReaderTermState == null || perReaderTermState.topReaderContext != context) {
// make TermQuery single-pass if we don't have a PRTS or if the context differs!
termState = TermContext.build(context, term, true); // cache term lookups!
} else {
// PRTS was pre-build for this IS
termState = this.perReaderTermState;
}
// we must not ignore the given docFreq - if set use the given value (lie)
if (docFreq != -1)
termState.setDocFreq(docFreq);
return new TermWeight(searcher, termState);
}
// in lucene/core/src/java/org/apache/lucene/search/MultiCollector.java
Override
public void collect(int doc) throws IOException {
for (Collector c : collectors) {
c.collect(doc);
}
}
// in lucene/core/src/java/org/apache/lucene/search/MultiCollector.java
Override
public void setNextReader(AtomicReaderContext context) throws IOException {
for (Collector c : collectors) {
c.setNextReader(context);
}
}
// in lucene/core/src/java/org/apache/lucene/search/MultiCollector.java
Override
public void setScorer(Scorer s) throws IOException {
for (Collector c : collectors) {
c.setScorer(s);
}
}
// in lucene/core/src/java/org/apache/lucene/search/CachingWrapperFilter.java
protected DocIdSet docIdSetToCache(DocIdSet docIdSet, AtomicReader reader) throws IOException {
if (docIdSet == null) {
// this is better than returning null, as the nonnull result can be cached
return DocIdSet.EMPTY_DOCIDSET;
} else if (docIdSet.isCacheable()) {
return docIdSet;
} else {
final DocIdSetIterator it = docIdSet.iterator();
// null is allowed to be returned by iterator(),
// in this case we wrap with the empty set,
// which is cacheable.
if (it == null) {
return DocIdSet.EMPTY_DOCIDSET;
} else {
final FixedBitSet bits = new FixedBitSet(reader.maxDoc());
bits.or(it);
return bits;
}
}
}
// in lucene/core/src/java/org/apache/lucene/search/CachingWrapperFilter.java
Override
public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException {
final AtomicReader reader = context.reader();
// Only cache if incoming acceptDocs is == live docs;
// if Lucene passes in more interesting acceptDocs in
// the future (@UweSays: it already does when you chain FilteredQuery) we don't want to over-cache:
final Bits liveDocs = reader.getLiveDocs();
final boolean doCacheAcceptDocs = (recacheDeletes && acceptDocs == liveDocs);
final Object key;
final Bits cacheAcceptDocs;
if (doCacheAcceptDocs) {
assert acceptDocs == liveDocs;
key = reader.getCombinedCoreAndDeletesKey();
cacheAcceptDocs = acceptDocs;
} else {
key = reader.getCoreCacheKey();
cacheAcceptDocs = null;
}
DocIdSet docIdSet = cache.get(key);
if (docIdSet != null) {
hitCount++;
} else {
missCount++;
docIdSet = docIdSetToCache(filter.getDocIdSet(context, cacheAcceptDocs), reader);
cache.put(key, docIdSet);
}
if (doCacheAcceptDocs) {
return docIdSet;
} else {
return BitsFilteredDocIdSet.wrap(docIdSet, acceptDocs);
}
}
// in lucene/core/src/java/org/apache/lucene/search/FieldValueHitQueue.java
public static <T extends FieldValueHitQueue.Entry> FieldValueHitQueue<T> create(SortField[] fields, int size) throws IOException {
if (fields.length == 0) {
throw new IllegalArgumentException("Sort must contain at least one field");
}
if (fields.length == 1) {
return new OneComparatorFieldValueHitQueue<T>(fields, size);
} else {
return new MultiComparatorsFieldValueHitQueue<T>(fields, size);
}
}
// in lucene/core/src/java/org/apache/lucene/search/RegexpQuery.java
public Automaton getAutomaton(String name) throws IOException {
return null;
}
// in lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
public Document doc(int docID) throws CorruptIndexException, IOException {
return reader.document(docID);
}
// in lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
public void doc(int docID, StoredFieldVisitor fieldVisitor) throws CorruptIndexException, IOException {
reader.document(docID, fieldVisitor);
}
// in lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
public final Document document(int docID, Set<String> fieldsToLoad) throws CorruptIndexException, IOException {
return reader.document(docID, fieldsToLoad);
}
// in lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
public TopDocs searchAfter(ScoreDoc after, Query query, int n) throws IOException {
return search(createNormalizedWeight(query), after, n);
}
// in lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
public TopDocs searchAfter(ScoreDoc after, Query query, Filter filter, int n) throws IOException {
return search(createNormalizedWeight(wrapFilter(query, filter)), after, n);
}
// in lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
public TopDocs search(Query query, int n)
throws IOException {
return search(query, null, n);
}
// in lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
public TopDocs search(Query query, Filter filter, int n)
throws IOException {
return search(createNormalizedWeight(wrapFilter(query, filter)), null, n);
}
// in lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
public void search(Query query, Filter filter, Collector results)
throws IOException {
search(leafContexts, createNormalizedWeight(wrapFilter(query, filter)), results);
}
// in lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
public void search(Query query, Collector results)
throws IOException {
search(leafContexts, createNormalizedWeight(query), results);
}
// in lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
public TopFieldDocs search(Query query, Filter filter, int n,
Sort sort) throws IOException {
return search(createNormalizedWeight(wrapFilter(query, filter)), n, sort, false, false);
}
// in lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
public TopFieldDocs search(Query query, Filter filter, int n,
Sort sort, boolean doDocScores, boolean doMaxScore) throws IOException {
return search(createNormalizedWeight(wrapFilter(query, filter)), n, sort, doDocScores, doMaxScore);
}
// in lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
public TopDocs searchAfter(ScoreDoc after, Query query, Filter filter, int n, Sort sort) throws IOException {
if (after != null && !(after instanceof FieldDoc)) {
// TODO: if we fix type safety of TopFieldDocs we can
// remove this
throw new IllegalArgumentException("after must be a FieldDoc; got " + after);
}
return search(createNormalizedWeight(wrapFilter(query, filter)), (FieldDoc) after, n, sort, true, false, false);
}
// in lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
public TopFieldDocs search(Query query, int n,
Sort sort) throws IOException {
return search(createNormalizedWeight(query), n, sort, false, false);
}
// in lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
public TopDocs searchAfter(ScoreDoc after, Query query, int n, Sort sort) throws IOException {
if (after != null && !(after instanceof FieldDoc)) {
// TODO: if we fix type safety of TopFieldDocs we can
// remove this
throw new IllegalArgumentException("after must be a FieldDoc; got " + after);
}
return search(createNormalizedWeight(query), (FieldDoc) after, n, sort, true, false, false);
}
// in lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
public TopDocs searchAfter(ScoreDoc after, Query query, Filter filter, int n, Sort sort,
boolean doDocScores, boolean doMaxScore) throws IOException {
if (after != null && !(after instanceof FieldDoc)) {
// TODO: if we fix type safety of TopFieldDocs we can
// remove this
throw new IllegalArgumentException("after must be a FieldDoc; got " + after);
}
return search(createNormalizedWeight(wrapFilter(query, filter)), (FieldDoc) after, n, sort, true,
doDocScores, doMaxScore);
}
// in lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
protected TopDocs search(Weight weight, ScoreDoc after, int nDocs) throws IOException {
if (executor == null) {
return search(leafContexts, weight, after, nDocs);
} else {
final HitQueue hq = new HitQueue(nDocs, false);
final Lock lock = new ReentrantLock();
final ExecutionHelper<TopDocs> runner = new ExecutionHelper<TopDocs>(executor);
for (int i = 0; i < leafSlices.length; i++) { // search each sub
runner.submit(
new SearcherCallableNoSort(lock, this, leafSlices[i], weight, after, nDocs, hq));
}
int totalHits = 0;
float maxScore = Float.NEGATIVE_INFINITY;
for (final TopDocs topDocs : runner) {
if(topDocs.totalHits != 0) {
totalHits += topDocs.totalHits;
maxScore = Math.max(maxScore, topDocs.getMaxScore());
}
}
final ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
for (int i = hq.size() - 1; i >= 0; i--) // put docs in array
scoreDocs[i] = hq.pop();
return new TopDocs(totalHits, scoreDocs, maxScore);
}
}
// in lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
protected TopDocs search(AtomicReaderContext[] leaves, Weight weight, ScoreDoc after, int nDocs) throws IOException {
// single thread
int limit = reader.maxDoc();
if (limit == 0) {
limit = 1;
}
nDocs = Math.min(nDocs, limit);
TopScoreDocCollector collector = TopScoreDocCollector.create(nDocs, after, !weight.scoresDocsOutOfOrder());
search(leaves, weight, collector);
return collector.topDocs();
}
// in lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
protected TopFieldDocs search(Weight weight,
final int nDocs, Sort sort,
boolean doDocScores, boolean doMaxScore) throws IOException {
return search(weight, null, nDocs, sort, true, doDocScores, doMaxScore);
}
// in lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
protected TopFieldDocs search(Weight weight, FieldDoc after, int nDocs,
Sort sort, boolean fillFields,
boolean doDocScores, boolean doMaxScore)
throws IOException {
if (sort == null) throw new NullPointerException();
if (executor == null) {
// use all leaves here!
return search(leafContexts, weight, after, nDocs, sort, fillFields, doDocScores, doMaxScore);
} else {
final TopFieldCollector topCollector = TopFieldCollector.create(sort, nDocs,
after,
fillFields,
doDocScores,
doMaxScore,
false);
final Lock lock = new ReentrantLock();
final ExecutionHelper<TopFieldDocs> runner = new ExecutionHelper<TopFieldDocs>(executor);
for (int i = 0; i < leafSlices.length; i++) { // search each leaf slice
runner.submit(
new SearcherCallableWithSort(lock, this, leafSlices[i], weight, after, nDocs, topCollector, sort, doDocScores, doMaxScore));
}
int totalHits = 0;
float maxScore = Float.NEGATIVE_INFINITY;
for (final TopFieldDocs topFieldDocs : runner) {
if (topFieldDocs.totalHits != 0) {
totalHits += topFieldDocs.totalHits;
maxScore = Math.max(maxScore, topFieldDocs.getMaxScore());
}
}
final TopFieldDocs topDocs = (TopFieldDocs) topCollector.topDocs();
return new TopFieldDocs(totalHits, topDocs.scoreDocs, topDocs.fields, topDocs.getMaxScore());
}
}
// in lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
protected TopFieldDocs search(AtomicReaderContext[] leaves, Weight weight, FieldDoc after, int nDocs,
Sort sort, boolean fillFields, boolean doDocScores, boolean doMaxScore) throws IOException {
// single thread
int limit = reader.maxDoc();
if (limit == 0) {
limit = 1;
}
nDocs = Math.min(nDocs, limit);
TopFieldCollector collector = TopFieldCollector.create(sort, nDocs, after,
fillFields, doDocScores,
doMaxScore, !weight.scoresDocsOutOfOrder());
search(leaves, weight, collector);
return (TopFieldDocs) collector.topDocs();
}
// in lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
protected void search(AtomicReaderContext[] leaves, Weight weight, Collector collector)
throws IOException {
// TODO: should we make this
// threaded...? the Collector could be sync'd?
// always use single thread:
for (int i = 0; i < leaves.length; i++) { // search each subreader
collector.setNextReader(leaves[i]);
Scorer scorer = weight.scorer(leaves[i], !collector.acceptsDocsOutOfOrder(), true, leaves[i].reader().getLiveDocs());
if (scorer != null) {
scorer.score(collector);
}
}
}
// in lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
public Query rewrite(Query original) throws IOException {
Query query = original;
for (Query rewrittenQuery = query.rewrite(reader); rewrittenQuery != query;
rewrittenQuery = query.rewrite(reader)) {
query = rewrittenQuery;
}
return query;
}
// in lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
public Explanation explain(Query query, int doc) throws IOException {
return explain(createNormalizedWeight(query), doc);
}
// in lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
protected Explanation explain(Weight weight, int doc) throws IOException {
int n = ReaderUtil.subIndex(doc, leafContexts);
int deBasedDoc = doc - leafContexts[n].docBase;
return weight.explain(leafContexts[n], deBasedDoc);
}
// in lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
public Weight createNormalizedWeight(Query query) throws IOException {
query = rewrite(query);
Weight weight = query.createWeight(this);
float v = weight.getValueForNormalization();
float norm = getSimilarity().queryNorm(v);
if (Float.isInfinite(norm) || Float.isNaN(norm)) {
norm = 1.0f;
}
weight.normalize(norm, 1.0f);
return weight;
}
// in lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
public TopDocs call() throws IOException {
final TopDocs docs = searcher.search(slice.leaves, weight, after, nDocs);
final ScoreDoc[] scoreDocs = docs.scoreDocs;
//it would be so nice if we had a thread-safe insert
lock.lock();
try {
for (int j = 0; j < scoreDocs.length; j++) { // merge scoreDocs into hq
final ScoreDoc scoreDoc = scoreDocs[j];
if (scoreDoc == hq.insertWithOverflow(scoreDoc)) {
break;
}
}
} finally {
lock.unlock();
}
return docs;
}
// in lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
public TopFieldDocs call() throws IOException {
assert slice.leaves.length == 1;
final TopFieldDocs docs = searcher.search(slice.leaves, weight, after, nDocs, sort, true, doDocScores, doMaxScore);
lock.lock();
try {
final int base = slice.leaves[0].docBase;
hq.setNextReader(slice.leaves[0]);
hq.setScorer(fakeScorer);
for(ScoreDoc scoreDoc : docs.scoreDocs) {
fakeScorer.doc = scoreDoc.doc - base;
fakeScorer.score = scoreDoc.score;
hq.collect(scoreDoc.doc-base);
}
// Carry over maxScore from sub:
if (doMaxScore && docs.getMaxScore() > hq.maxScore) {
hq.maxScore = docs.getMaxScore();
}
} finally {
lock.unlock();
}
return docs;
}
// in lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
public TermStatistics termStatistics(Term term, TermContext context) throws IOException {
return new TermStatistics(term.bytes(), context.docFreq(), context.totalTermFreq());
}
// in lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
public CollectionStatistics collectionStatistics(String field) throws IOException {
final int docCount;
final long sumTotalTermFreq;
final long sumDocFreq;
assert field != null;
Terms terms = MultiFields.getTerms(reader, field);
if (terms == null) {
docCount = 0;
sumTotalTermFreq = 0;
sumDocFreq = 0;
} else {
docCount = terms.getDocCount();
sumTotalTermFreq = terms.getSumTotalTermFreq();
sumDocFreq = terms.getSumDocFreq();
}
return new CollectionStatistics(field, reader.maxDoc(), docCount, sumTotalTermFreq, sumDocFreq);
}
// in lucene/core/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java
Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
final AtomicReader reader = context.reader();
final Fields fields = reader.fields();
if (fields == null) {
// reader has no fields
return DocIdSet.EMPTY_DOCIDSET;
}
final Terms terms = fields.terms(query.field);
if (terms == null) {
// field does not exist
return DocIdSet.EMPTY_DOCIDSET;
}
final TermsEnum termsEnum = query.getTermsEnum(terms);
assert termsEnum != null;
if (termsEnum.next() != null) {
// fill into a FixedBitSet
final FixedBitSet bitSet = new FixedBitSet(context.reader().maxDoc());
DocsEnum docsEnum = null;
do {
// System.out.println(" iter termCount=" + termCount + " term=" +
// enumerator.term().toBytesString());
docsEnum = termsEnum.docs(acceptDocs, docsEnum, false);
int docid;
while ((docid = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
bitSet.set(docid);
}
} while (termsEnum.next() != null);
// System.out.println(" done termCount=" + termCount);
return bitSet;
} else {
return DocIdSet.EMPTY_DOCIDSET;
}
}
// in lucene/core/src/java/org/apache/lucene/search/TermCollectingRewrite.java
protected final void addClause(Q topLevel, Term term, int docCount, float boost) throws IOException {
addClause(topLevel, term, docCount, boost, null);
}
// in lucene/core/src/java/org/apache/lucene/search/TermCollectingRewrite.java
final void collectTerms(IndexReader reader, MultiTermQuery query, TermCollector collector) throws IOException {
IndexReaderContext topReaderContext = reader.getTopReaderContext();
Comparator<BytesRef> lastTermComp = null;
final AtomicReaderContext[] leaves = topReaderContext.leaves();
for (AtomicReaderContext context : leaves) {
final Fields fields = context.reader().fields();
if (fields == null) {
// reader has no fields
continue;
}
final Terms terms = fields.terms(query.field);
if (terms == null) {
// field does not exist
continue;
}
final TermsEnum termsEnum = getTermsEnum(query, terms, collector.attributes);
assert termsEnum != null;
if (termsEnum == TermsEnum.EMPTY)
continue;
// Check comparator compatibility:
final Comparator<BytesRef> newTermComp = termsEnum.getComparator();
if (lastTermComp != null && newTermComp != null && newTermComp != lastTermComp)
throw new RuntimeException("term comparator should not change between segments: "+lastTermComp+" != "+newTermComp);
lastTermComp = newTermComp;
collector.setReaderContext(topReaderContext, context);
collector.setNextEnum(termsEnum);
BytesRef bytes;
while ((bytes = termsEnum.next()) != null) {
if (!collector.collect(bytes))
return; // interrupt whole term collection, so also don't iterate other subReaders
}
}
}
// in lucene/core/src/java/org/apache/lucene/search/MatchOnlyTermScorer.java
Override
public int nextDoc() throws IOException {
return docsEnum.nextDoc();
}
// in lucene/core/src/java/org/apache/lucene/search/MatchOnlyTermScorer.java
Override
public int advance(int target) throws IOException {
return docsEnum.advance(target);
}
// in lucene/core/src/java/org/apache/lucene/search/NGramPhraseQuery.java
Override
public Query rewrite(IndexReader reader) throws IOException {
if(getSlop() != 0) return super.rewrite(reader);
// check whether optimizable or not
if(n < 2 || // non-overlap n-gram cannot be optimized
getTerms().length < 3) // too short to optimize
return super.rewrite(reader);
// check all posIncrement is 1
// if not, cannot optimize
int[] positions = getPositions();
Term[] terms = getTerms();
int prevPosition = positions[0];
for(int i = 1; i < positions.length; i++){
int pos = positions[i];
if(prevPosition + 1 != pos) return super.rewrite(reader);
prevPosition = pos;
}
// now create the new optimized phrase query for n-gram
PhraseQuery optimized = new PhraseQuery();
int pos = 0;
final int lastPos = terms.length - 1;
for(int i = 0; i < terms.length; i++){
if(pos % n == 0 || pos >= lastPos){
optimized.add(terms[i], positions[i]);
}
pos++;
}
return optimized;
}
// in lucene/core/src/java/org/apache/lucene/search/Scorer.java
public void score(Collector collector) throws IOException {
collector.setScorer(this);
int doc;
while ((doc = nextDoc()) != NO_MORE_DOCS) {
collector.collect(doc);
}
}
// in lucene/core/src/java/org/apache/lucene/search/Scorer.java
public boolean score(Collector collector, int max, int firstDocID) throws IOException {
collector.setScorer(this);
int doc = firstDocID;
while (doc < max) {
collector.collect(doc);
doc = nextDoc();
}
return doc != NO_MORE_DOCS;
}
// in lucene/core/src/java/org/apache/lucene/search/Scorer.java
public float freq() throws IOException {
throw new UnsupportedOperationException(this + " does not implement freq()");
}
// in lucene/core/src/java/org/apache/lucene/search/FieldComparator.java
Override
public FieldComparator<T> setNextReader(AtomicReaderContext context) throws IOException {
if (missingValue != null) {
docsWithField = FieldCache.DEFAULT.getDocsWithField(context.reader(), field);
// optimization to remove unneeded checks on the bit interface:
if (docsWithField instanceof Bits.MatchAllBits) {
docsWithField = null;
}
} else {
docsWithField = null;
}
return this;
}
// in lucene/core/src/java/org/apache/lucene/search/FieldComparator.java
Override
public FieldComparator<Byte> setNextReader(AtomicReaderContext context) throws IOException {
// NOTE: must do this before calling super otherwise
// we compute the docsWithField Bits twice!
currentReaderValues = FieldCache.DEFAULT.getBytes(context.reader(), field, parser, missingValue != null);
return super.setNextReader(context);
}
// in lucene/core/src/java/org/apache/lucene/search/FieldComparator.java
Override
public FieldComparator<Double> setNextReader(AtomicReaderContext context) throws IOException {
// NOTE: must do this before calling super otherwise
// we compute the docsWithField Bits twice!
currentReaderValues = FieldCache.DEFAULT.getDoubles(context.reader(), field, parser, missingValue != null);
return super.setNextReader(context);
}
// in lucene/core/src/java/org/apache/lucene/search/FieldComparator.java
Override
public FieldComparator<Double> setNextReader(AtomicReaderContext context) throws IOException {
final DocValues docValues = context.reader().docValues(field);
if (docValues != null) {
currentReaderValues = docValues.getSource();
} else {
currentReaderValues = DocValues.getDefaultSource(DocValues.Type.FLOAT_64);
}
return this;
}
// in lucene/core/src/java/org/apache/lucene/search/FieldComparator.java
Override
public FieldComparator<Float> setNextReader(AtomicReaderContext context) throws IOException {
// NOTE: must do this before calling super otherwise
// we compute the docsWithField Bits twice!
currentReaderValues = FieldCache.DEFAULT.getFloats(context.reader(), field, parser, missingValue != null);
return super.setNextReader(context);
}
// in lucene/core/src/java/org/apache/lucene/search/FieldComparator.java
Override
public FieldComparator<Short> setNextReader(AtomicReaderContext context) throws IOException {
// NOTE: must do this before calling super otherwise
// we compute the docsWithField Bits twice!
currentReaderValues = FieldCache.DEFAULT.getShorts(context.reader(), field, parser, missingValue != null);
return super.setNextReader(context);
}
// in lucene/core/src/java/org/apache/lucene/search/FieldComparator.java
Override
public FieldComparator<Integer> setNextReader(AtomicReaderContext context) throws IOException {
// NOTE: must do this before calling super otherwise
// we compute the docsWithField Bits twice!
currentReaderValues = FieldCache.DEFAULT.getInts(context.reader(), field, parser, missingValue != null);
return super.setNextReader(context);
}
// in lucene/core/src/java/org/apache/lucene/search/FieldComparator.java
Override
public FieldComparator<Long> setNextReader(AtomicReaderContext context) throws IOException {
DocValues docValues = context.reader().docValues(field);
if (docValues != null) {
currentReaderValues = docValues.getSource();
} else {
currentReaderValues = DocValues.getDefaultSource(DocValues.Type.FIXED_INTS_64);
}
return this;
}
// in lucene/core/src/java/org/apache/lucene/search/FieldComparator.java
Override
public FieldComparator<Long> setNextReader(AtomicReaderContext context) throws IOException {
// NOTE: must do this before calling super otherwise
// we compute the docsWithField Bits twice!
currentReaderValues = FieldCache.DEFAULT.getLongs(context.reader(), field, parser, missingValue != null);
return super.setNextReader(context);
}
// in lucene/core/src/java/org/apache/lucene/search/FieldComparator.java
Override
public int compareBottom(int doc) throws IOException {
float score = scorer.score();
assert !Float.isNaN(score);
return bottom > score ? -1 : (bottom < score ? 1 : 0);
}
// in lucene/core/src/java/org/apache/lucene/search/FieldComparator.java
Override
public void copy(int slot, int doc) throws IOException {
scores[slot] = scorer.score();
assert !Float.isNaN(scores[slot]);
}
// in lucene/core/src/java/org/apache/lucene/search/FieldComparator.java
Override
public int compareDocToValue(int doc, Float valueObj) throws IOException {
final float value = valueObj.floatValue();
float docValue = scorer.score();
assert !Float.isNaN(docValue);
if (docValue < value) {
// reverse of FloatComparator
return 1;
} else if (docValue > value) {
// reverse of FloatComparator
return -1;
} else {
return 0;
}
}
// in lucene/core/src/java/org/apache/lucene/search/FieldComparator.java
Override
public FieldComparator<BytesRef> setNextReader(AtomicReaderContext context) throws IOException {
return TermOrdValComparator.this.setNextReader(context);
}
// in lucene/core/src/java/org/apache/lucene/search/FieldComparator.java
Override
public FieldComparator<BytesRef> setNextReader(AtomicReaderContext context) throws IOException {
final int docBase = context.docBase;
termsIndex = FieldCache.DEFAULT.getTermsIndex(context.reader(), field);
final PackedInts.Reader docToOrd = termsIndex.getDocToOrd();
FieldComparator<BytesRef> perSegComp = null;
if (docToOrd.hasArray()) {
final Object arr = docToOrd.getArray();
if (arr instanceof byte[]) {
perSegComp = new ByteOrdComparator((byte[]) arr, termsIndex, docBase);
} else if (arr instanceof short[]) {
perSegComp = new ShortOrdComparator((short[]) arr, termsIndex, docBase);
} else if (arr instanceof int[]) {
perSegComp = new IntOrdComparator((int[]) arr, termsIndex, docBase);
}
// Don't specialize the long[] case since it's not
// possible, ie, worse case is MAX_INT-1 docs with
// every one having a unique value.
}
if (perSegComp == null) {
perSegComp = new AnyOrdComparator(docToOrd, termsIndex, docBase);
}
currentReaderGen++;
if (bottomSlot != -1) {
perSegComp.setBottom(bottomSlot);
}
return perSegComp;
}
// in lucene/core/src/java/org/apache/lucene/search/FieldComparator.java
Override
public FieldComparator<BytesRef> setNextReader(AtomicReaderContext context) throws IOException {
return TermOrdValDocValuesComparator.this.setNextReader(context);
}
// in lucene/core/src/java/org/apache/lucene/search/FieldComparator.java
Override
public FieldComparator<BytesRef> setNextReader(AtomicReaderContext context) throws IOException {
final int docBase = context.docBase;
final DocValues dv = context.reader().docValues(field);
if (dv == null) {
// This may mean entire segment had no docs with
// this DV field; use default field value (empty
// byte[]) in this case:
termsIndex = DocValues.getDefaultSortedSource(DocValues.Type.BYTES_VAR_SORTED, context.reader().maxDoc());
} else {
termsIndex = dv.getSource().asSortedSource();
if (termsIndex == null) {
// This means segment has doc values, but they are
// not able to provide a sorted source; consider
// this a hard error:
throw new IllegalStateException("DocValues exist for field \"" + field + "\", but not as a sorted source: type=" + dv.getSource().getType() + " reader=" + context.reader());
}
}
comp = termsIndex.getComparator();
FieldComparator<BytesRef> perSegComp = null;
if (termsIndex.hasPackedDocToOrd()) {
final PackedInts.Reader docToOrd = termsIndex.getDocToOrd();
if (docToOrd.hasArray()) {
final Object arr = docToOrd.getArray();
assert arr != null;
if (arr instanceof byte[]) {
// 8 bit packed
perSegComp = new ByteOrdComparator((byte[]) arr, termsIndex, docBase);
} else if (arr instanceof short[]) {
// 16 bit packed
perSegComp = new ShortOrdComparator((short[]) arr, termsIndex, docBase);
} else if (arr instanceof int[]) {
// 32 bit packed
perSegComp = new IntOrdComparator((int[]) arr, termsIndex, docBase);
}
}
if (perSegComp == null) {
perSegComp = new AnyPackedDocToOrdComparator(docToOrd, docBase);
}
} else {
if (perSegComp == null) {
perSegComp = new AnyOrdComparator(docBase);
}
}
currentReaderGen++;
if (bottomSlot != -1) {
perSegComp.setBottom(bottomSlot);
}
return perSegComp;
}
// in lucene/core/src/java/org/apache/lucene/search/FieldComparator.java
Override
public FieldComparator<BytesRef> setNextReader(AtomicReaderContext context) throws IOException {
docTerms = FieldCache.DEFAULT.getTerms(context.reader(), field);
return this;
}
// in lucene/core/src/java/org/apache/lucene/search/FieldComparator.java
Override
public FieldComparator<BytesRef> setNextReader(AtomicReaderContext context) throws IOException {
final DocValues dv = context.reader().docValues(field);
if (dv != null) {
docTerms = dv.getSource();
} else {
docTerms = DocValues.getDefaultSource(DocValues.Type.BYTES_VAR_DEREF);
}
return this;
}
// in lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java
Override
public int nextDoc() throws IOException {
if (reqScorer == null) {
return doc;
}
doc = reqScorer.nextDoc();
if (doc == NO_MORE_DOCS) {
reqScorer = null; // exhausted, nothing left
return doc;
}
if (exclDisi == null) {
return doc;
}
return doc = toNonExcluded();
}
// in lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java
private int toNonExcluded() throws IOException {
int exclDoc = exclDisi.docID();
int reqDoc = reqScorer.docID(); // may be excluded
do {
if (reqDoc < exclDoc) {
return reqDoc; // reqScorer advanced to before exclScorer, ie. not excluded
} else if (reqDoc > exclDoc) {
exclDoc = exclDisi.advance(reqDoc);
if (exclDoc == NO_MORE_DOCS) {
exclDisi = null; // exhausted, no more exclusions
return reqDoc;
}
if (exclDoc > reqDoc) {
return reqDoc; // not excluded
}
}
} while ((reqDoc = reqScorer.nextDoc()) != NO_MORE_DOCS);
reqScorer = null; // exhausted, nothing left
return NO_MORE_DOCS;
}
// in lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java
Override
public float score() throws IOException {
return reqScorer.score(); // reqScorer may be null when next() or skipTo() already return false
}
// in lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java
Override
public int advance(int target) throws IOException {
if (reqScorer == null) {
return doc = NO_MORE_DOCS;
}
if (exclDisi == null) {
return doc = reqScorer.advance(target);
}
if (reqScorer.advance(target) == NO_MORE_DOCS) {
reqScorer = null;
return doc = NO_MORE_DOCS;
}
return doc = toNonExcluded();
}
// in lucene/core/src/java/org/apache/lucene/search/QueryWrapperFilter.java
Override
public DocIdSet getDocIdSet(final AtomicReaderContext context, final Bits acceptDocs) throws IOException {
// get a private context that is used to rewrite, createWeight and score eventually
final AtomicReaderContext privateContext = context.reader().getTopReaderContext();
final Weight weight = new IndexSearcher(privateContext).createNormalizedWeight(query);
return new DocIdSet() {
@Override
public DocIdSetIterator iterator() throws IOException {
return weight.scorer(privateContext, true, false, acceptDocs);
}
@Override
public boolean isCacheable() { return false; }
};
}
// in lucene/core/src/java/org/apache/lucene/search/QueryWrapperFilter.java
Override
public DocIdSetIterator iterator() throws IOException {
return weight.scorer(privateContext, true, false, acceptDocs);
}
// in lucene/core/src/java/org/apache/lucene/search/NumericRangeQuery.java
Override
protected final BytesRef nextSeekTerm(BytesRef term) throws IOException {
while (rangeBounds.size() >= 2) {
nextRange();
// if the new upper bound is before the term parameter, the sub-range is never a hit
if (term != null && termComp.compare(term, currentUpperBound) > 0)
continue;
// never seek backwards, so use current term if lower bound is smaller
return (term != null && termComp.compare(term, currentLowerBound) > 0) ?
term : currentLowerBound;
}
// no more sub-range enums available
assert rangeBounds.isEmpty();
currentLowerBound = currentUpperBound = null;
return null;
}
// in lucene/core/src/java/org/apache/lucene/search/TopTermsRewrite.java
Override
public final Q rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException {
final int maxSize = Math.min(size, getMaxSize());
final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<ScoreTerm>();
collectTerms(reader, query, new TermCollector() {
private final MaxNonCompetitiveBoostAttribute maxBoostAtt =
attributes.addAttribute(MaxNonCompetitiveBoostAttribute.class);
private final Map<BytesRef,ScoreTerm> visitedTerms = new HashMap<BytesRef,ScoreTerm>();
private TermsEnum termsEnum;
private Comparator<BytesRef> termComp;
private BoostAttribute boostAtt;
private ScoreTerm st;
@Override
public void setNextEnum(TermsEnum termsEnum) throws IOException {
this.termsEnum = termsEnum;
this.termComp = termsEnum.getComparator();
assert compareToLastTerm(null);
// lazy init the initial ScoreTerm because comparator is not known on ctor:
if (st == null)
st = new ScoreTerm(this.termComp, new TermContext(topReaderContext));
boostAtt = termsEnum.attributes().addAttribute(BoostAttribute.class);
}
// for assert:
private BytesRef lastTerm;
private boolean compareToLastTerm(BytesRef t) throws IOException {
if (lastTerm == null && t != null) {
lastTerm = BytesRef.deepCopyOf(t);
} else if (t == null) {
lastTerm = null;
} else {
assert termsEnum.getComparator().compare(lastTerm, t) < 0: "lastTerm=" + lastTerm + " t=" + t;
lastTerm.copyBytes(t);
}
return true;
}
@Override
public boolean collect(BytesRef bytes) throws IOException {
final float boost = boostAtt.getBoost();
// make sure within a single seg we always collect
// terms in order
assert compareToLastTerm(bytes);
//System.out.println("TTR.collect term=" + bytes.utf8ToString() + " boost=" + boost + " ord=" + readerContext.ord);
// ignore uncompetitive hits
if (stQueue.size() == maxSize) {
final ScoreTerm t = stQueue.peek();
if (boost < t.boost)
return true;
if (boost == t.boost && termComp.compare(bytes, t.bytes) > 0)
return true;
}
ScoreTerm t = visitedTerms.get(bytes);
final TermState state = termsEnum.termState();
assert state != null;
if (t != null) {
// if the term is already in the PQ, only update docFreq of term in PQ
assert t.boost == boost : "boost should be equal in all segment TermsEnums";
t.termState.register(state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
} else {
// add new entry in PQ, we must clone the term, else it may get overwritten!
st.bytes.copyBytes(bytes);
st.boost = boost;
visitedTerms.put(st.bytes, st);
assert st.termState.docFreq() == 0;
st.termState.register(state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
stQueue.offer(st);
// possibly drop entries from queue
if (stQueue.size() > maxSize) {
st = stQueue.poll();
visitedTerms.remove(st.bytes);
st.termState.clear(); // reset the termstate!
} else {
st = new ScoreTerm(termComp, new TermContext(topReaderContext));
}
assert stQueue.size() <= maxSize : "the PQ size must be limited to maxSize";
// set maxBoostAtt with values to help FuzzyTermsEnum to optimize
if (stQueue.size() == maxSize) {
t = stQueue.peek();
maxBoostAtt.setMaxNonCompetitiveBoost(t.boost);
maxBoostAtt.setCompetitiveTerm(t.bytes);
}
}
return true;
}
});
final Q q = getTopLevelQuery();
final ScoreTerm[] scoreTerms = stQueue.toArray(new ScoreTerm[stQueue.size()]);
ArrayUtil.mergeSort(scoreTerms, scoreTermSortByTermComp);
for (final ScoreTerm st : scoreTerms) {
final Term term = new Term(query.field, st.bytes);
assert reader.docFreq(term) == st.termState.docFreq() : "reader DF is " + reader.docFreq(term) + " vs " + st.termState.docFreq() + " term=" + term;
addClause(q, term, st.termState.docFreq(), query.getBoost() * st.boost, st.termState); // add to query
}
return q;
}
// in lucene/core/src/java/org/apache/lucene/search/TopTermsRewrite.java
Override
public void setNextEnum(TermsEnum termsEnum) throws IOException {
this.termsEnum = termsEnum;
this.termComp = termsEnum.getComparator();
assert compareToLastTerm(null);
// lazy init the initial ScoreTerm because comparator is not known on ctor:
if (st == null)
st = new ScoreTerm(this.termComp, new TermContext(topReaderContext));
boostAtt = termsEnum.attributes().addAttribute(BoostAttribute.class);
}
// in lucene/core/src/java/org/apache/lucene/search/TopTermsRewrite.java
private boolean compareToLastTerm(BytesRef t) throws IOException {
if (lastTerm == null && t != null) {
lastTerm = BytesRef.deepCopyOf(t);
} else if (t == null) {
lastTerm = null;
} else {
assert termsEnum.getComparator().compare(lastTerm, t) < 0: "lastTerm=" + lastTerm + " t=" + t;
lastTerm.copyBytes(t);
}
return true;
}
// in lucene/core/src/java/org/apache/lucene/search/TopTermsRewrite.java
Override
public boolean collect(BytesRef bytes) throws IOException {
final float boost = boostAtt.getBoost();
// make sure within a single seg we always collect
// terms in order
assert compareToLastTerm(bytes);
//System.out.println("TTR.collect term=" + bytes.utf8ToString() + " boost=" + boost + " ord=" + readerContext.ord);
// ignore uncompetitive hits
if (stQueue.size() == maxSize) {
final ScoreTerm t = stQueue.peek();
if (boost < t.boost)
return true;
if (boost == t.boost && termComp.compare(bytes, t.bytes) > 0)
return true;
}
ScoreTerm t = visitedTerms.get(bytes);
final TermState state = termsEnum.termState();
assert state != null;
if (t != null) {
// if the term is already in the PQ, only update docFreq of term in PQ
assert t.boost == boost : "boost should be equal in all segment TermsEnums";
t.termState.register(state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
} else {
// add new entry in PQ, we must clone the term, else it may get overwritten!
st.bytes.copyBytes(bytes);
st.boost = boost;
visitedTerms.put(st.bytes, st);
assert st.termState.docFreq() == 0;
st.termState.register(state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
stQueue.offer(st);
// possibly drop entries from queue
if (stQueue.size() > maxSize) {
st = stQueue.poll();
visitedTerms.remove(st.bytes);
st.termState.clear(); // reset the termstate!
} else {
st = new ScoreTerm(termComp, new TermContext(topReaderContext));
}
assert stQueue.size() <= maxSize : "the PQ size must be limited to maxSize";
// set maxBoostAtt with values to help FuzzyTermsEnum to optimize
if (stQueue.size() == maxSize) {
t = stQueue.peek();
maxBoostAtt.setMaxNonCompetitiveBoost(t.boost);
maxBoostAtt.setCompetitiveTerm(t.bytes);
}
}
return true;
}
// in lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
Override
public Query rewrite(IndexReader reader) throws IOException {
if (terms.isEmpty()) {
BooleanQuery bq = new BooleanQuery();
bq.setBoost(getBoost());
return bq;
} else if (terms.size() == 1) {
TermQuery tq = new TermQuery(terms.get(0));
tq.setBoost(getBoost());
return tq;
} else
return super.rewrite(reader);
}
// in lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
Override
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException {
assert !terms.isEmpty();
final AtomicReader reader = context.reader();
final Bits liveDocs = acceptDocs;
PostingsAndFreq[] postingsFreqs = new PostingsAndFreq[terms.size()];
final Terms fieldTerms = reader.terms(field);
if (fieldTerms == null) {
return null;
}
// Reuse single TermsEnum below:
final TermsEnum te = fieldTerms.iterator(null);
for (int i = 0; i < terms.size(); i++) {
final Term t = terms.get(i);
final TermState state = states[i].get(context.ord);
if (state == null) { /* term doesnt exist in this segment */
assert termNotInReader(reader, field, t.bytes()): "no termstate found but term exists in reader";
return null;
}
te.seekExact(t.bytes(), state);
DocsAndPositionsEnum postingsEnum = te.docsAndPositions(liveDocs, null, false);
// PhraseQuery on a field that did not index
// positions.
if (postingsEnum == null) {
assert te.seekExact(t.bytes(), false) : "termstate found but no term exists in reader";
// term does exist, but has no positions
throw new IllegalStateException("field \"" + t.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + t.text() + ")");
}
postingsFreqs[i] = new PostingsAndFreq(postingsEnum, te.docFreq(), positions.get(i).intValue(), t);
}
// sort by increasing docFreq order
if (slop == 0) {
ArrayUtil.mergeSort(postingsFreqs);
}
if (slop == 0) { // optimize exact case
ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.exactSimScorer(stats, context));
if (s.noDocs) {
return null;
} else {
return s;
}
} else {
return
new SloppyPhraseScorer(this, postingsFreqs, slop, similarity.sloppySimScorer(stats, context));
}
}
// in lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
private boolean termNotInReader(AtomicReader reader, String field, BytesRef bytes) throws IOException {
return reader.docFreq(field, bytes) == 0;
}
// in lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
Override
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
Scorer scorer = scorer(context, true, false, context.reader().getLiveDocs());
if (scorer != null) {
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
float freq = scorer.freq();
SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
ComplexExplanation result = new ComplexExplanation();
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
result.addDetail(scoreExplanation);
result.setValue(scoreExplanation.getValue());
result.setMatch(true);
return result;
}
}
return new ComplexExplanation(false, 0.0f, "no matching term");
}
// in lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java
Override
public Weight createWeight(IndexSearcher searcher) throws IOException {
return new PhraseWeight(searcher);
}
// in lucene/core/src/java/org/apache/lucene/search/SearcherManager.java
Override
protected void decRef(IndexSearcher reference) throws IOException {
reference.getIndexReader().decRef();
}
// in lucene/core/src/java/org/apache/lucene/search/SearcherManager.java
Override
protected IndexSearcher refreshIfNeeded(IndexSearcher referenceToRefresh) throws IOException {
final IndexReader r = referenceToRefresh.getIndexReader();
assert r instanceof DirectoryReader: "searcher's IndexReader should be a DirectoryReader, but got " + r;
final IndexReader newReader = DirectoryReader.openIfChanged((DirectoryReader) r);
if (newReader == null) {
return null;
} else {
return getSearcher(searcherFactory, newReader);
}
}
// in lucene/core/src/java/org/apache/lucene/search/SearcherManager.java
public boolean isSearcherCurrent() throws IOException {
final IndexSearcher searcher = acquire();
try {
final IndexReader r = searcher.getIndexReader();
assert r instanceof DirectoryReader: "searcher's IndexReader should be a DirectoryReader, but got " + r;
return ((DirectoryReader) r).isCurrent();
} finally {
release(searcher);
}
}
// in lucene/core/src/java/org/apache/lucene/search/SearcherManager.java
static IndexSearcher getSearcher(SearcherFactory searcherFactory, IndexReader reader) throws IOException {
boolean success = false;
final IndexSearcher searcher;
try {
searcher = searcherFactory.newSearcher(reader);
if (searcher.getIndexReader() != reader) {
throw new IllegalStateException("SearcherFactory must wrap exactly the provided reader (got " + searcher.getIndexReader() + " but expected " + reader + ")");
}
success = true;
} finally {
if (!success) {
reader.decRef();
}
}
return searcher;
}
// in lucene/core/src/java/org/apache/lucene/search/MatchOnlyConjunctionTermsScorer.java
Override
public float score() throws IOException {
float sum = 0.0f;
for (DocsAndFreqs docs : docsAndFreqs) {
sum += docs.docScorer.score(lastDoc, 1);
}
return sum * coord;
}
// in lucene/core/src/java/org/apache/lucene/store/RAMOutputStream.java
public void writeTo(IndexOutput out) throws IOException {
flush();
final long end = file.length;
long pos = 0;
int buffer = 0;
while (pos < end) {
int length = BUFFER_SIZE;
long nextPos = pos + length;
if (nextPos > end) { // at the last buffer
length = (int)(end - pos);
}
out.writeBytes(file.getBuffer(buffer++), length);
pos = nextPos;
}
}
// in lucene/core/src/java/org/apache/lucene/store/RAMOutputStream.java
public void writeTo(byte[] bytes, int offset) throws IOException {
flush();
final long end = file.length;
long pos = 0;
int buffer = 0;
int bytesUpto = offset;
while (pos < end) {
int length = BUFFER_SIZE;
long nextPos = pos + length;
if (nextPos > end) { // at the last buffer
length = (int)(end - pos);
}
System.arraycopy(file.getBuffer(buffer++), 0, bytes, bytesUpto, length);
bytesUpto += length;
pos = nextPos;
}
}
// in lucene/core/src/java/org/apache/lucene/store/RAMOutputStream.java
Override
public void close() throws IOException {
flush();
}
// in lucene/core/src/java/org/apache/lucene/store/RAMOutputStream.java
Override
public void seek(long pos) throws IOException {
// set the file length in case we seek back
// and flush() has not been called yet
setFileLength();
if (pos < bufferStart || pos >= bufferStart + bufferLength) {
currentBufferIndex = (int) (pos / BUFFER_SIZE);
switchCurrentBuffer();
}
bufferPosition = (int) (pos % BUFFER_SIZE);
}
// in lucene/core/src/java/org/apache/lucene/store/RAMOutputStream.java
Override
public void writeByte(byte b) throws IOException {
if (bufferPosition == bufferLength) {
currentBufferIndex++;
switchCurrentBuffer();
}
currentBuffer[bufferPosition++] = b;
}
// in lucene/core/src/java/org/apache/lucene/store/RAMOutputStream.java
Override
public void writeBytes(byte[] b, int offset, int len) throws IOException {
assert b != null;
while (len > 0) {
if (bufferPosition == bufferLength) {
currentBufferIndex++;
switchCurrentBuffer();
}
int remainInBuffer = currentBuffer.length - bufferPosition;
int bytesToCopy = len < remainInBuffer ? len : remainInBuffer;
System.arraycopy(b, offset, currentBuffer, bufferPosition, bytesToCopy);
offset += bytesToCopy;
len -= bytesToCopy;
bufferPosition += bytesToCopy;
}
}
// in lucene/core/src/java/org/apache/lucene/store/RAMOutputStream.java
private final void switchCurrentBuffer() throws IOException {
if (currentBufferIndex == file.numBuffers()) {
currentBuffer = file.addBuffer(BUFFER_SIZE);
} else {
currentBuffer = file.getBuffer(currentBufferIndex);
}
bufferPosition = 0;
bufferStart = (long) BUFFER_SIZE * (long) currentBufferIndex;
bufferLength = currentBuffer.length;
}
// in lucene/core/src/java/org/apache/lucene/store/RAMOutputStream.java
Override
public void flush() throws IOException {
setFileLength();
}
// in lucene/core/src/java/org/apache/lucene/store/RAMOutputStream.java
Override
public void copyBytes(DataInput input, long numBytes) throws IOException {
assert numBytes >= 0: "numBytes=" + numBytes;
while (numBytes > 0) {
if (bufferPosition == bufferLength) {
currentBufferIndex++;
switchCurrentBuffer();
}
int toCopy = currentBuffer.length - bufferPosition;
if (numBytes < toCopy) {
toCopy = (int) numBytes;
}
input.readBytes(currentBuffer, bufferPosition, toCopy, false);
numBytes -= toCopy;
bufferPosition += toCopy;
}
}
// in lucene/core/src/java/org/apache/lucene/store/ChecksumIndexOutput.java
Override
public void writeByte(byte b) throws IOException {
digest.update(b);
main.writeByte(b);
}
// in lucene/core/src/java/org/apache/lucene/store/ChecksumIndexOutput.java
Override
public void writeBytes(byte[] b, int offset, int length) throws IOException {
digest.update(b, offset, length);
main.writeBytes(b, offset, length);
}
// in lucene/core/src/java/org/apache/lucene/store/ChecksumIndexOutput.java
Override
public void flush() throws IOException {
main.flush();
}
// in lucene/core/src/java/org/apache/lucene/store/ChecksumIndexOutput.java
Override
public void close() throws IOException {
main.close();
}
// in lucene/core/src/java/org/apache/lucene/store/ChecksumIndexOutput.java
public void finishCommit() throws IOException {
main.writeLong(getChecksum());
}
// in lucene/core/src/java/org/apache/lucene/store/ChecksumIndexOutput.java
Override
public long length() throws IOException {
return main.length();
}
// in lucene/core/src/java/org/apache/lucene/store/NoLockFactory.java
Override
public boolean obtain() throws IOException {
return true;
}
// in lucene/core/src/java/org/apache/lucene/store/NIOFSDirectory.java
Override
public IndexInput openInput(String name, IOContext context) throws IOException {
ensureOpen();
return new NIOFSIndexInput(new File(getDirectory(), name), context, getReadChunkSize());
}
// in lucene/core/src/java/org/apache/lucene/store/NIOFSDirectory.java
public IndexInputSlicer createSlicer(final String name,
final IOContext context) throws IOException {
ensureOpen();
final File path = new File(getDirectory(), name);
final Descriptor descriptor = new Descriptor(path, "r");
return new Directory.IndexInputSlicer() {
@Override
public void close() throws IOException {
descriptor.close();
}
@Override
public IndexInput openSlice(String sliceDescription, long offset, long length) throws IOException {
return new NIOFSIndexInput(sliceDescription, path, descriptor, descriptor.getChannel(), offset,
length, BufferedIndexInput.bufferSize(context), getReadChunkSize());
}
@Override
public IndexInput openFullSlice() throws IOException {
return openSlice("full-slice", 0, descriptor.length);
}
};
}
// in lucene/core/src/java/org/apache/lucene/store/NIOFSDirectory.java
Override
public void close() throws IOException {
descriptor.close();
}
// in lucene/core/src/java/org/apache/lucene/store/NIOFSDirectory.java
Override
public IndexInput openSlice(String sliceDescription, long offset, long length) throws IOException {
return new NIOFSIndexInput(sliceDescription, path, descriptor, descriptor.getChannel(), offset,
length, BufferedIndexInput.bufferSize(context), getReadChunkSize());
}
// in lucene/core/src/java/org/apache/lucene/store/NIOFSDirectory.java
Override
public IndexInput openFullSlice() throws IOException {
return openSlice("full-slice", 0, descriptor.length);
}
// in lucene/core/src/java/org/apache/lucene/store/NIOFSDirectory.java
Override
public void close() throws IOException {
if (!isClone && file.isOpen) {
// Close the channel & file
try {
channel.close();
} finally {
file.close();
}
}
}
// in lucene/core/src/java/org/apache/lucene/store/NIOFSDirectory.java
Override
protected void readInternal(byte[] b, int offset, int len) throws IOException {
final ByteBuffer bb;
// Determine the ByteBuffer we should use
if (b == buffer && 0 == offset) {
// Use our own pre-wrapped byteBuf:
assert byteBuf != null;
byteBuf.clear();
byteBuf.limit(len);
bb = byteBuf;
} else {
bb = ByteBuffer.wrap(b, offset, len);
}
int readOffset = bb.position();
int readLength = bb.limit() - readOffset;
assert readLength == len;
long pos = getFilePointer() + off;
if (pos + len > end) {
throw new EOFException("read past EOF: " + this);
}
try {
while (readLength > 0) {
final int limit;
if (readLength > chunkSize) {
// LUCENE-1566 - work around JVM Bug by breaking
// very large reads into chunks
limit = readOffset + chunkSize;
} else {
limit = readOffset + readLength;
}
bb.limit(limit);
int i = channel.read(bb, pos);
pos += i;
readOffset += i;
readLength -= i;
}
} catch (OutOfMemoryError e) {
// propagate OOM up and add a hint for 32bit VM Users hitting the bug
// with a large chunk size in the fast path.
final OutOfMemoryError outOfMemoryError = new OutOfMemoryError(
"OutOfMemoryError likely caused by the Sun VM Bug described in "
+ "https://issues.apache.org/jira/browse/LUCENE-1566; try calling FSDirectory.setReadChunkSize "
+ "with a value smaller than the current chunk size (" + chunkSize + ")");
outOfMemoryError.initCause(e);
throw outOfMemoryError;
} catch (IOException ioe) {
throw new IOException(ioe.getMessage() + ": " + this, ioe);
}
}
// in lucene/core/src/java/org/apache/lucene/store/Lock.java
public boolean obtain(long lockWaitTimeout) throws LockObtainFailedException, IOException {
failureReason = null;
boolean locked = obtain();
if (lockWaitTimeout < 0 && lockWaitTimeout != LOCK_OBTAIN_WAIT_FOREVER)
throw new IllegalArgumentException("lockWaitTimeout should be LOCK_OBTAIN_WAIT_FOREVER or a non-negative number (got " + lockWaitTimeout + ")");
long maxSleepCount = lockWaitTimeout / LOCK_POLL_INTERVAL;
long sleepCount = 0;
while (!locked) {
if (lockWaitTimeout != LOCK_OBTAIN_WAIT_FOREVER && sleepCount++ >= maxSleepCount) {
String reason = "Lock obtain timed out: " + this.toString();
if (failureReason != null) {
reason += ": " + failureReason;
}
LockObtainFailedException e = new LockObtainFailedException(reason);
if (failureReason != null) {
e.initCause(failureReason);
}
throw e;
}
try {
Thread.sleep(LOCK_POLL_INTERVAL);
} catch (InterruptedException ie) {
throw new ThreadInterruptedException(ie);
}
locked = obtain();
}
return locked;
}
// in lucene/core/src/java/org/apache/lucene/store/Lock.java
public Object run() throws LockObtainFailedException, IOException {
boolean locked = false;
try {
locked = lock.obtain(lockWaitTimeout);
return doBody();
} finally {
if (locked)
lock.release();
}
}
// in lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java
Override
public byte readByte() throws IOException {
final byte b = main.readByte();
digest.update(b);
return b;
}
// in lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java
Override
public void readBytes(byte[] b, int offset, int len)
throws IOException {
main.readBytes(b, offset, len);
digest.update(b, offset, len);
}
// in lucene/core/src/java/org/apache/lucene/store/ChecksumIndexInput.java
Override
public void close() throws IOException {
main.close();
}
// in lucene/core/src/java/org/apache/lucene/store/NativeFSLockFactory.java
Override
public void clearLock(String lockName) throws IOException {
// Note that this isn't strictly required anymore
// because the existence of these files does not mean
// they are locked, but, still do this in case people
// really want to see the files go away:
if (lockDir.exists()) {
// Try to release the lock first - if it's held by another process, this
// method should not silently fail.
// NOTE: makeLock fixes the lock name by prefixing it w/ lockPrefix.
// Therefore it should be called before the code block next which prefixes
// the given name.
makeLock(lockName).release();
if (lockPrefix != null) {
lockName = lockPrefix + "-" + lockName;
}
// As mentioned above, we don't care if the deletion of the file failed.
new File(lockDir, lockName).delete();
}
}
// in lucene/core/src/java/org/apache/lucene/store/NativeFSLockFactory.java
Override
public synchronized boolean obtain() throws IOException {
if (lockExists()) {
// Our instance is already locked:
return false;
}
// Ensure that lockDir exists and is a directory.
if (!lockDir.exists()) {
if (!lockDir.mkdirs())
throw new IOException("Cannot create directory: " +
lockDir.getAbsolutePath());
} else if (!lockDir.isDirectory()) {
// TODO: NoSuchDirectoryException instead?
throw new IOException("Found regular file where directory expected: " +
lockDir.getAbsolutePath());
}
String canonicalPath = path.getCanonicalPath();
boolean markedHeld = false;
try {
// Make sure nobody else in-process has this lock held
// already, and, mark it held if not:
synchronized(LOCK_HELD) {
if (LOCK_HELD.contains(canonicalPath)) {
// Someone else in this JVM already has the lock:
return false;
} else {
// This "reserves" the fact that we are the one
// thread trying to obtain this lock, so we own
// the only instance of a channel against this
// file:
LOCK_HELD.add(canonicalPath);
markedHeld = true;
}
}
try {
f = new RandomAccessFile(path, "rw");
} catch (IOException e) {
// On Windows, we can get intermittent "Access
// Denied" here. So, we treat this as failure to
// acquire the lock, but, store the reason in case
// there is in fact a real error case.
failureReason = e;
f = null;
}
if (f != null) {
try {
channel = f.getChannel();
try {
lock = channel.tryLock();
} catch (IOException e) {
// At least on OS X, we will sometimes get an
// intermittent "Permission Denied" IOException,
// which seems to simply mean "you failed to get
// the lock". But other IOExceptions could be
// "permanent" (eg, locking is not supported via
// the filesystem). So, we record the failure
// reason here; the timeout obtain (usually the
// one calling us) will use this as "root cause"
// if it fails to get the lock.
failureReason = e;
} finally {
if (lock == null) {
try {
channel.close();
} finally {
channel = null;
}
}
}
} finally {
if (channel == null) {
try {
f.close();
} finally {
f = null;
}
}
}
}
} finally {
if (markedHeld && !lockExists()) {
synchronized(LOCK_HELD) {
if (LOCK_HELD.contains(canonicalPath)) {
LOCK_HELD.remove(canonicalPath);
}
}
}
}
return lockExists();
}
// in lucene/core/src/java/org/apache/lucene/store/NativeFSLockFactory.java
Override
public synchronized void release() throws IOException {
if (lockExists()) {
try {
lock.release();
} finally {
lock = null;
try {
channel.close();
} finally {
channel = null;
try {
f.close();
} finally {
f = null;
synchronized(LOCK_HELD) {
LOCK_HELD.remove(path.getCanonicalPath());
}
}
}
}
// LUCENE-2421: we don't care anymore if the file cannot be deleted
// because it's held up by another process (e.g. AntiVirus). NativeFSLock
// does not depend on the existence/absence of the lock file
path.delete();
} else {
// if we don't hold the lock, and somebody still called release(), for
// example as a result of calling IndexWriter.unlock(), we should attempt
// to obtain the lock and release it. If the obtain fails, it means the
// lock cannot be released, and we should throw a proper exception rather
// than silently failing/not doing anything.
boolean obtained = false;
try {
if (!(obtained = obtain())) {
throw new LockReleaseFailedException(
"Cannot forcefully unlock a NativeFSLock which is held by another indexer component: "
+ path);
}
} finally {
if (obtained) {
release();
}
}
}
}
// in lucene/core/src/java/org/apache/lucene/store/DataInput.java
public void readBytes(byte[] b, int offset, int len, boolean useBuffer)
throws IOException
{
// Default to ignoring useBuffer entirely
readBytes(b, offset, len);
}
// in lucene/core/src/java/org/apache/lucene/store/DataInput.java
public short readShort() throws IOException {
return (short) (((readByte() & 0xFF) << 8) | (readByte() & 0xFF));
}
// in lucene/core/src/java/org/apache/lucene/store/DataInput.java
public int readInt() throws IOException {
return ((readByte() & 0xFF) << 24) | ((readByte() & 0xFF) << 16)
| ((readByte() & 0xFF) << 8) | (readByte() & 0xFF);
}
// in lucene/core/src/java/org/apache/lucene/store/DataInput.java
public int readVInt() throws IOException {
/* This is the original code of this method,
* but a Hotspot bug (see LUCENE-2975) corrupts the for-loop if
* readByte() is inlined. So the loop was unwinded!
byte b = readByte();
int i = b & 0x7F;
for (int shift = 7; (b & 0x80) != 0; shift += 7) {
b = readByte();
i |= (b & 0x7F) << shift;
}
return i;
*/
byte b = readByte();
if (b >= 0) return b;
int i = b & 0x7F;
b = readByte();
i |= (b & 0x7F) << 7;
if (b >= 0) return i;
b = readByte();
i |= (b & 0x7F) << 14;
if (b >= 0) return i;
b = readByte();
i |= (b & 0x7F) << 21;
if (b >= 0) return i;
b = readByte();
// Warning: the next ands use 0x0F / 0xF0 - beware copy/paste errors:
i |= (b & 0x0F) << 28;
if ((b & 0xF0) == 0) return i;
throw new IOException("Invalid vInt detected (too many bits)");
}
// in lucene/core/src/java/org/apache/lucene/store/DataInput.java
public long readLong() throws IOException {
return (((long)readInt()) << 32) | (readInt() & 0xFFFFFFFFL);
}
// in lucene/core/src/java/org/apache/lucene/store/DataInput.java
public long readVLong() throws IOException {
/* This is the original code of this method,
* but a Hotspot bug (see LUCENE-2975) corrupts the for-loop if
* readByte() is inlined. So the loop was unwinded!
byte b = readByte();
long i = b & 0x7F;
for (int shift = 7; (b & 0x80) != 0; shift += 7) {
b = readByte();
i |= (b & 0x7FL) << shift;
}
return i;
*/
byte b = readByte();
if (b >= 0) return b;
long i = b & 0x7FL;
b = readByte();
i |= (b & 0x7FL) << 7;
if (b >= 0) return i;
b = readByte();
i |= (b & 0x7FL) << 14;
if (b >= 0) return i;
b = readByte();
i |= (b & 0x7FL) << 21;
if (b >= 0) return i;
b = readByte();
i |= (b & 0x7FL) << 28;
if (b >= 0) return i;
b = readByte();
i |= (b & 0x7FL) << 35;
if (b >= 0) return i;
b = readByte();
i |= (b & 0x7FL) << 42;
if (b >= 0) return i;
b = readByte();
i |= (b & 0x7FL) << 49;
if (b >= 0) return i;
b = readByte();
i |= (b & 0x7FL) << 56;
if (b >= 0) return i;
throw new IOException("Invalid vLong detected (negative values disallowed)");
}
// in lucene/core/src/java/org/apache/lucene/store/DataInput.java
public String readString() throws IOException {
int length = readVInt();
final byte[] bytes = new byte[length];
readBytes(bytes, 0, length);
return new String(bytes, 0, length, IOUtils.CHARSET_UTF_8);
}
// in lucene/core/src/java/org/apache/lucene/store/DataInput.java
public Map<String,String> readStringStringMap() throws IOException {
final Map<String,String> map = new HashMap<String,String>();
final int count = readInt();
for(int i=0;i<count;i++) {
final String key = readString();
final String val = readString();
map.put(key, val);
}
return map;
}
// in lucene/core/src/java/org/apache/lucene/store/DataInput.java
public Set<String> readStringSet() throws IOException {
final Set<String> set = new HashSet<String>();
final int count = readInt();
for(int i=0;i<count;i++) {
set.add(readString());
}
return set;
}
// in lucene/core/src/java/org/apache/lucene/store/SimpleFSLockFactory.java
Override
public void clearLock(String lockName) throws IOException {
if (lockDir.exists()) {
if (lockPrefix != null) {
lockName = lockPrefix + "-" + lockName;
}
File lockFile = new File(lockDir, lockName);
if (lockFile.exists() && !lockFile.delete()) {
throw new IOException("Cannot delete " + lockFile);
}
}
}
// in lucene/core/src/java/org/apache/lucene/store/SimpleFSLockFactory.java
Override
public boolean obtain() throws IOException {
// Ensure that lockDir exists and is a directory:
if (!lockDir.exists()) {
if (!lockDir.mkdirs())
throw new IOException("Cannot create directory: " +
lockDir.getAbsolutePath());
} else if (!lockDir.isDirectory()) {
// TODO: NoSuchDirectoryException instead?
throw new IOException("Found regular file where directory expected: " +
lockDir.getAbsolutePath());
}
return lockFile.createNewFile();
}
// in lucene/core/src/java/org/apache/lucene/store/LockVerifyServer.java
public static void main(String[] args) throws IOException {
if (args.length != 1) {
System.out.println("\nUsage: java org.apache.lucene.store.LockVerifyServer port\n");
System.exit(1);
}
final int port = Integer.parseInt(args[0]);
ServerSocket s = new ServerSocket(port);
s.setReuseAddress(true);
System.out.println("\nReady on port " + port + "...");
int lockedID = 0;
long startTime = System.currentTimeMillis();
while(true) {
Socket cs = s.accept();
OutputStream out = cs.getOutputStream();
InputStream in = cs.getInputStream();
int id = in.read();
int command = in.read();
boolean err = false;
if (command == 1) {
// Locked
if (lockedID != 0) {
err = true;
System.out.println(getTime(startTime) + " ERROR: id " + id + " got lock, but " + lockedID + " already holds the lock");
}
lockedID = id;
} else if (command == 0) {
if (lockedID != id) {
err = true;
System.out.println(getTime(startTime) + " ERROR: id " + id + " released the lock, but " + lockedID + " is the one holding the lock");
}
lockedID = 0;
} else
throw new RuntimeException("unrecognized command " + command);
System.out.print(".");
if (err)
out.write(1);
else
out.write(0);
out.close();
in.close();
cs.close();
}
}
// in lucene/core/src/java/org/apache/lucene/store/SingleInstanceLockFactory.java
Override
public void clearLock(String lockName) throws IOException {
synchronized(locks) {
if (locks.contains(lockName)) {
locks.remove(lockName);
}
}
}
// in lucene/core/src/java/org/apache/lucene/store/SingleInstanceLockFactory.java
Override
public boolean obtain() throws IOException {
synchronized(locks) {
return locks.add(lockName);
}
}
// in lucene/core/src/java/org/apache/lucene/store/CompoundFileDirectory.java
private static final Map<String, FileEntry> readEntries(
IndexInputSlicer handle, Directory dir, String name) throws IOException {
// read the first VInt. If it is negative, it's the version number
// otherwise it's the count (pre-3.1 indexes)
final IndexInput stream = handle.openFullSlice();
final Map<String, FileEntry> mapping;
boolean success = false;
try {
final int firstInt = stream.readVInt();
if (firstInt == CompoundFileWriter.FORMAT_CURRENT) {
IndexInput input = null;
try {
final String entriesFileName = IndexFileNames.segmentFileName(
IndexFileNames.stripExtension(name), "",
IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION);
input = dir.openInput(entriesFileName, IOContext.READONCE);
final int readInt = input.readInt(); // unused right now
assert readInt == CompoundFileWriter.ENTRY_FORMAT_CURRENT;
final int numEntries = input.readVInt();
mapping = new HashMap<String, CompoundFileDirectory.FileEntry>(
numEntries);
for (int i = 0; i < numEntries; i++) {
final FileEntry fileEntry = new FileEntry();
final String id = input.readString();
assert !mapping.containsKey(id): "id=" + id + " was written multiple times in the CFS";
mapping.put(id, fileEntry);
fileEntry.offset = input.readLong();
fileEntry.length = input.readLong();
}
return mapping;
} finally {
IOUtils.close(input);
}
} else {
// TODO remove once 3.x is not supported anymore
mapping = readLegacyEntries(stream, firstInt);
}
success = true;
return mapping;
} finally {
if (success) {
IOUtils.close(stream);
} else {
IOUtils.closeWhileHandlingException(stream);
}
}
}
// in lucene/core/src/java/org/apache/lucene/store/CompoundFileDirectory.java
private static Map<String, FileEntry> readLegacyEntries(IndexInput stream,
int firstInt) throws CorruptIndexException, IOException {
final Map<String,FileEntry> entries = new HashMap<String,FileEntry>();
final int count;
final boolean stripSegmentName;
if (firstInt < CompoundFileWriter.FORMAT_PRE_VERSION) {
if (firstInt < CompoundFileWriter.FORMAT_CURRENT) {
throw new CorruptIndexException("Incompatible format version: "
+ firstInt + " expected " + CompoundFileWriter.FORMAT_CURRENT + " (resource: " + stream + ")");
}
// It's a post-3.1 index, read the count.
count = stream.readVInt();
stripSegmentName = false;
} else {
count = firstInt;
stripSegmentName = true;
}
// read the directory and init files
long streamLength = stream.length();
FileEntry entry = null;
for (int i=0; i<count; i++) {
long offset = stream.readLong();
if (offset < 0 || offset > streamLength) {
throw new CorruptIndexException("Invalid CFS entry offset: " + offset + " (resource: " + stream + ")");
}
String id = stream.readString();
if (stripSegmentName) {
// Fix the id to not include the segment names. This is relevant for
// pre-3.1 indexes.
id = IndexFileNames.stripSegmentName(id);
}
if (entry != null) {
// set length of the previous entry
entry.length = offset - entry.offset;
}
entry = new FileEntry();
entry.offset = offset;
assert !entries.containsKey(id);
entries.put(id, entry);
}
// set the length of the final entry
if (entry != null) {
entry.length = streamLength - entry.offset;
}
return entries;
}
// in lucene/core/src/java/org/apache/lucene/store/CompoundFileDirectory.java
Override
public synchronized void close() throws IOException {
if (!isOpen) {
// allow double close - usually to be consistent with other closeables
return; // already closed
}
isOpen = false;
if (writer != null) {
assert openForWrite;
writer.close();
} else {
IOUtils.close(handle);
}
}
// in lucene/core/src/java/org/apache/lucene/store/CompoundFileDirectory.java
Override
public synchronized IndexInput openInput(String name, IOContext context) throws IOException {
ensureOpen();
assert !openForWrite;
final String id = IndexFileNames.stripSegmentName(name);
final FileEntry entry = entries.get(id);
if (entry == null) {
throw new FileNotFoundException("No sub-file with id " + id + " found (fileName=" + name + " files: " + entries.keySet() + ")");
}
return handle.openSlice(name, entry.offset, entry.length);
}
// in lucene/core/src/java/org/apache/lucene/store/CompoundFileDirectory.java
Override
public long fileLength(String name) throws IOException {
ensureOpen();
if (this.writer != null) {
return writer.fileLength(name);
}
FileEntry e = entries.get(IndexFileNames.stripSegmentName(name));
if (e == null)
throw new FileNotFoundException(name);
return e.length;
}
// in lucene/core/src/java/org/apache/lucene/store/CompoundFileDirectory.java
Override
public IndexOutput createOutput(String name, IOContext context) throws IOException {
ensureOpen();
return writer.createOutput(name, context);
}
// in lucene/core/src/java/org/apache/lucene/store/CompoundFileDirectory.java
Override
public void sync(Collection<String> names) throws IOException {
throw new UnsupportedOperationException();
}
// in lucene/core/src/java/org/apache/lucene/store/CompoundFileDirectory.java
Override
public IndexInputSlicer createSlicer(final String name, IOContext context)
throws IOException {
ensureOpen();
assert !openForWrite;
final String id = IndexFileNames.stripSegmentName(name);
final FileEntry entry = entries.get(id);
if (entry == null) {
throw new FileNotFoundException("No sub-file with id " + id + " found (fileName=" + name + " files: " + entries.keySet() + ")");
}
return new IndexInputSlicer() {
@Override
public void close() throws IOException {
}
@Override
public IndexInput openSlice(String sliceDescription, long offset, long length) throws IOException {
return handle.openSlice(sliceDescription, entry.offset + offset, length);
}
@Override
public IndexInput openFullSlice() throws IOException {
return openSlice("full-slice", 0, entry.length);
}
};
}
// in lucene/core/src/java/org/apache/lucene/store/CompoundFileDirectory.java
Override
public void close() throws IOException {
}
// in lucene/core/src/java/org/apache/lucene/store/CompoundFileDirectory.java
Override
public IndexInput openSlice(String sliceDescription, long offset, long length) throws IOException {
return handle.openSlice(sliceDescription, entry.offset + offset, length);
}
// in lucene/core/src/java/org/apache/lucene/store/CompoundFileDirectory.java
Override
public IndexInput openFullSlice() throws IOException {
return openSlice("full-slice", 0, entry.length);
}
// in lucene/core/src/java/org/apache/lucene/store/SimpleFSDirectory.java
Override
public IndexInput openInput(String name, IOContext context) throws IOException {
ensureOpen();
final File path = new File(directory, name);
return new SimpleFSIndexInput("SimpleFSIndexInput(path=\"" + path.getPath() + "\")", path, context, getReadChunkSize());
}
// in lucene/core/src/java/org/apache/lucene/store/SimpleFSDirectory.java
public IndexInputSlicer createSlicer(final String name,
final IOContext context) throws IOException {
ensureOpen();
final File file = new File(getDirectory(), name);
final Descriptor descriptor = new Descriptor(file, "r");
return new IndexInputSlicer() {
@Override
public void close() throws IOException {
descriptor.close();
}
@Override
public IndexInput openSlice(String sliceDescription, long offset, long length) throws IOException {
return new SimpleFSIndexInput("SimpleFSIndexInput(" + sliceDescription + " in path=\"" + file.getPath() + "\" slice=" + offset + ":" + (offset+length) + ")", descriptor, offset,
length, BufferedIndexInput.bufferSize(context), getReadChunkSize());
}
@Override
public IndexInput openFullSlice() throws IOException {
return openSlice("full-slice", 0, descriptor.length);
}
};
}
// in lucene/core/src/java/org/apache/lucene/store/SimpleFSDirectory.java
Override
public void close() throws IOException {
descriptor.close();
}
// in lucene/core/src/java/org/apache/lucene/store/SimpleFSDirectory.java
Override
public IndexInput openSlice(String sliceDescription, long offset, long length) throws IOException {
return new SimpleFSIndexInput("SimpleFSIndexInput(" + sliceDescription + " in path=\"" + file.getPath() + "\" slice=" + offset + ":" + (offset+length) + ")", descriptor, offset,
length, BufferedIndexInput.bufferSize(context), getReadChunkSize());
}
// in lucene/core/src/java/org/apache/lucene/store/SimpleFSDirectory.java
Override
public IndexInput openFullSlice() throws IOException {
return openSlice("full-slice", 0, descriptor.length);
}
// in lucene/core/src/java/org/apache/lucene/store/SimpleFSDirectory.java
Override
public void close() throws IOException {
if (isOpen) {
isOpen=false;
super.close();
}
}
// in lucene/core/src/java/org/apache/lucene/store/SimpleFSDirectory.java
Override
protected void readInternal(byte[] b, int offset, int len)
throws IOException {
synchronized (file) {
long position = off + getFilePointer();
if (position != file.position) {
file.seek(position);
file.position = position;
}
int total = 0;
if (position + len > end) {
throw new EOFException("read past EOF: " + this);
}
try {
do {
final int readLength;
if (total + chunkSize > len) {
readLength = len - total;
} else {
// LUCENE-1566 - work around JVM Bug by breaking very large reads into chunks
readLength = chunkSize;
}
final int i = file.read(b, offset + total, readLength);
file.position += i;
total += i;
} while (total < len);
} catch (OutOfMemoryError e) {
// propagate OOM up and add a hint for 32bit VM Users hitting the bug
// with a large chunk size in the fast path.
final OutOfMemoryError outOfMemoryError = new OutOfMemoryError(
"OutOfMemoryError likely caused by the Sun VM Bug described in "
+ "https://issues.apache.org/jira/browse/LUCENE-1566; try calling FSDirectory.setReadChunkSize "
+ "with a value smaller than the current chunk size (" + chunkSize + ")");
outOfMemoryError.initCause(e);
throw outOfMemoryError;
} catch (IOException ioe) {
throw new IOException(ioe.getMessage() + ": " + this, ioe);
}
}
}
// in lucene/core/src/java/org/apache/lucene/store/SimpleFSDirectory.java
Override
public void close() throws IOException {
// only close the file if this is not a clone
if (!isClone) file.close();
}
// in lucene/core/src/java/org/apache/lucene/store/SimpleFSDirectory.java
boolean isFDValid() throws IOException {
return file.getFD().valid();
}
// in lucene/core/src/java/org/apache/lucene/store/SimpleFSDirectory.java
Override
public void copyBytes(IndexOutput out, long numBytes) throws IOException {
numBytes -= flushBuffer(out, numBytes);
// If out is FSIndexOutput, the copy will be optimized
out.copyBytes(this, numBytes);
}
// in lucene/core/src/java/org/apache/lucene/store/TrackingDirectoryWrapper.java
Override
public String[] listAll() throws IOException {
return other.listAll();
}
// in lucene/core/src/java/org/apache/lucene/store/TrackingDirectoryWrapper.java
Override
public boolean fileExists(String name) throws IOException {
return other.fileExists(name);
}
// in lucene/core/src/java/org/apache/lucene/store/TrackingDirectoryWrapper.java
Override
public void deleteFile(String name) throws IOException {
createdFileNames.remove(name);
other.deleteFile(name);
}
// in lucene/core/src/java/org/apache/lucene/store/TrackingDirectoryWrapper.java
Override
public long fileLength(String name) throws IOException {
return other.fileLength(name);
}
// in lucene/core/src/java/org/apache/lucene/store/TrackingDirectoryWrapper.java
Override
public IndexOutput createOutput(String name, IOContext context) throws IOException {
createdFileNames.add(name);
return other.createOutput(name, context);
}
// in lucene/core/src/java/org/apache/lucene/store/TrackingDirectoryWrapper.java
Override
public void sync(Collection<String> names) throws IOException {
other.sync(names);
}
// in lucene/core/src/java/org/apache/lucene/store/TrackingDirectoryWrapper.java
Override
public IndexInput openInput(String name, IOContext context) throws IOException {
return other.openInput(name, context);
}
// in lucene/core/src/java/org/apache/lucene/store/TrackingDirectoryWrapper.java
Override
public void clearLock(String name) throws IOException {
other.clearLock(name);
}
// in lucene/core/src/java/org/apache/lucene/store/TrackingDirectoryWrapper.java
Override
public void close() throws IOException {
other.close();
}
// in lucene/core/src/java/org/apache/lucene/store/TrackingDirectoryWrapper.java
Override
public void setLockFactory(LockFactory lockFactory) throws IOException {
other.setLockFactory(lockFactory);
}
// in lucene/core/src/java/org/apache/lucene/store/TrackingDirectoryWrapper.java
Override
public void copy(Directory to, String src, String dest, IOContext context) throws IOException {
createdFileNames.add(dest);
other.copy(to, src, dest, context);
}
// in lucene/core/src/java/org/apache/lucene/store/TrackingDirectoryWrapper.java
Override
public Directory.IndexInputSlicer createSlicer(final String name, final IOContext context) throws IOException {
return other.createSlicer(name, context);
}
// in lucene/core/src/java/org/apache/lucene/store/CompoundFileWriter.java
private synchronized IndexOutput getOutput() throws IOException {
if (dataOut == null) {
boolean success = false;
try {
dataOut = directory.createOutput(dataFileName, IOContext.DEFAULT);
dataOut.writeVInt(FORMAT_CURRENT);
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(dataOut);
}
}
}
return dataOut;
}
// in lucene/core/src/java/org/apache/lucene/store/CompoundFileWriter.java
public void close() throws IOException {
if (closed) {
return;
}
IOException priorException = null;
IndexOutput entryTableOut = null;
try {
if (!pendingEntries.isEmpty() || outputTaken.get()) {
throw new IllegalStateException("CFS has pending open files");
}
closed = true;
// open the compound stream
getOutput();
assert dataOut != null;
long finalLength = dataOut.getFilePointer();
assert assertFileLength(finalLength, dataOut);
} catch (IOException e) {
priorException = e;
} finally {
IOUtils.closeWhileHandlingException(priorException, dataOut);
}
try {
entryTableOut = directory.createOutput(entryTableName, IOContext.DEFAULT);
writeEntryTable(entries.values(), entryTableOut);
} catch (IOException e) {
priorException = e;
} finally {
IOUtils.closeWhileHandlingException(priorException, entryTableOut);
}
}
// in lucene/core/src/java/org/apache/lucene/store/CompoundFileWriter.java
private static boolean assertFileLength(long expected, IndexOutput out)
throws IOException {
out.flush();
assert expected == out.length() : "expected: " + expected + " was "
+ out.length();
return true;
}
// in lucene/core/src/java/org/apache/lucene/store/CompoundFileWriter.java
private final long copyFileEntry(IndexOutput dataOut, FileEntry fileEntry)
throws IOException, MergeAbortedException {
final IndexInput is = fileEntry.dir.openInput(fileEntry.file, IOContext.READONCE);
boolean success = false;
try {
final long startPtr = dataOut.getFilePointer();
final long length = fileEntry.length;
dataOut.copyBytes(is, length);
// Verify that the output length diff is equal to original file
long endPtr = dataOut.getFilePointer();
long diff = endPtr - startPtr;
if (diff != length)
throw new IOException("Difference in the output file offsets " + diff
+ " does not match the original file length " + length);
fileEntry.offset = startPtr;
success = true;
return length;
} finally {
if (success) {
IOUtils.close(is);
// copy successful - delete file
fileEntry.dir.deleteFile(fileEntry.file);
} else {
IOUtils.closeWhileHandlingException(is);
}
}
}
// in lucene/core/src/java/org/apache/lucene/store/CompoundFileWriter.java
protected void writeEntryTable(Collection<FileEntry> entries,
IndexOutput entryOut) throws IOException {
entryOut.writeInt(ENTRY_FORMAT_CURRENT);
entryOut.writeVInt(entries.size());
for (FileEntry fe : entries) {
entryOut.writeString(IndexFileNames.stripSegmentName(fe.file));
entryOut.writeLong(fe.offset);
entryOut.writeLong(fe.length);
}
}
// in lucene/core/src/java/org/apache/lucene/store/CompoundFileWriter.java
IndexOutput createOutput(String name, IOContext context) throws IOException {
ensureOpen();
boolean success = false;
boolean outputLocked = false;
try {
assert name != null : "name must not be null";
if (entries.containsKey(name)) {
throw new IllegalArgumentException("File " + name + " already exists");
}
final FileEntry entry = new FileEntry();
entry.file = name;
entries.put(name, entry);
final String id = IndexFileNames.stripSegmentName(name);
assert !seenIDs.contains(id): "file=\"" + name + "\" maps to id=\"" + id + "\", which was already written";
seenIDs.add(id);
final DirectCFSIndexOutput out;
if ((outputLocked = outputTaken.compareAndSet(false, true))) {
out = new DirectCFSIndexOutput(getOutput(), entry, false);
} else {
entry.dir = this.directory;
if (directory.fileExists(name)) {
throw new IllegalArgumentException("File " + name + " already exists");
}
out = new DirectCFSIndexOutput(directory.createOutput(name, context), entry,
true);
}
success = true;
return out;
} finally {
if (!success) {
entries.remove(name);
if (outputLocked) { // release the output lock if not successful
assert outputTaken.get();
releaseOutputLock();
}
}
}
}
// in lucene/core/src/java/org/apache/lucene/store/CompoundFileWriter.java
private final void prunePendingEntries() throws IOException {
// claim the output and copy all pending files in
if (outputTaken.compareAndSet(false, true)) {
try {
while (!pendingEntries.isEmpty()) {
FileEntry entry = pendingEntries.poll();
copyFileEntry(getOutput(), entry);
entries.put(entry.file, entry);
}
} finally {
final boolean compareAndSet = outputTaken.compareAndSet(true, false);
assert compareAndSet;
}
}
}
// in lucene/core/src/java/org/apache/lucene/store/CompoundFileWriter.java
long fileLength(String name) throws IOException {
FileEntry fileEntry = entries.get(name);
if (fileEntry == null) {
throw new FileNotFoundException(name + " does not exist");
}
return fileEntry.length;
}
// in lucene/core/src/java/org/apache/lucene/store/CompoundFileWriter.java
Override
public void flush() throws IOException {
delegate.flush();
}
// in lucene/core/src/java/org/apache/lucene/store/CompoundFileWriter.java
Override
public void close() throws IOException {
if (!closed) {
closed = true;
entry.length = writtenBytes;
if (isSeparate) {
delegate.close();
// we are a separate file - push into the pending entries
pendingEntries.add(entry);
} else {
// we have been written into the CFS directly - release the lock
releaseOutputLock();
}
// now prune all pending entries and push them into the CFS
prunePendingEntries();
}
}
// in lucene/core/src/java/org/apache/lucene/store/CompoundFileWriter.java
Override
public void seek(long pos) throws IOException {
assert !closed;
delegate.seek(offset + pos);
}
// in lucene/core/src/java/org/apache/lucene/store/CompoundFileWriter.java
Override
public long length() throws IOException {
assert !closed;
return delegate.length() - offset;
}
// in lucene/core/src/java/org/apache/lucene/store/CompoundFileWriter.java
Override
public void writeByte(byte b) throws IOException {
assert !closed;
writtenBytes++;
delegate.writeByte(b);
}
// in lucene/core/src/java/org/apache/lucene/store/CompoundFileWriter.java
Override
public void writeBytes(byte[] b, int offset, int length) throws IOException {
assert !closed;
writtenBytes += length;
delegate.writeBytes(b, offset, length);
}
// in lucene/core/src/java/org/apache/lucene/store/FSDirectory.java
private static File getCanonicalPath(File file) throws IOException {
return new File(file.getCanonicalPath());
}
// in lucene/core/src/java/org/apache/lucene/store/FSDirectory.java
public static FSDirectory open(File path) throws IOException {
return open(path, null);
}
// in lucene/core/src/java/org/apache/lucene/store/FSDirectory.java
public static FSDirectory open(File path, LockFactory lockFactory) throws IOException {
if ((Constants.WINDOWS || Constants.SUN_OS || Constants.LINUX)
&& Constants.JRE_IS_64BIT && MMapDirectory.UNMAP_SUPPORTED) {
return new MMapDirectory(path, lockFactory);
} else if (Constants.WINDOWS) {
return new SimpleFSDirectory(path, lockFactory);
} else {
return new NIOFSDirectory(path, lockFactory);
}
}
// in lucene/core/src/java/org/apache/lucene/store/FSDirectory.java
Override
public void setLockFactory(LockFactory lockFactory) throws IOException {
super.setLockFactory(lockFactory);
// for filesystem based LockFactory, delete the lockPrefix, if the locks are placed
// in index dir. If no index dir is given, set ourselves
if (lockFactory instanceof FSLockFactory) {
final FSLockFactory lf = (FSLockFactory) lockFactory;
final File dir = lf.getLockDir();
// if the lock factory has no lockDir set, use the this directory as lockDir
if (dir == null) {
lf.setLockDir(directory);
lf.setLockPrefix(null);
} else if (dir.getCanonicalPath().equals(directory.getCanonicalPath())) {
lf.setLockPrefix(null);
}
}
}
// in lucene/core/src/java/org/apache/lucene/store/FSDirectory.java
public static String[] listAll(File dir) throws IOException {
if (!dir.exists())
throw new NoSuchDirectoryException("directory '" + dir + "' does not exist");
else if (!dir.isDirectory())
throw new NoSuchDirectoryException("file '" + dir + "' exists but is not a directory");
// Exclude subdirs
String[] result = dir.list(new FilenameFilter() {
public boolean accept(File dir, String file) {
return !new File(dir, file).isDirectory();
}
});
if (result == null)
throw new IOException("directory '" + dir + "' exists and is a directory, but cannot be listed: list() returned null");
return result;
}
// in lucene/core/src/java/org/apache/lucene/store/FSDirectory.java
Override
public String[] listAll() throws IOException {
ensureOpen();
return listAll(directory);
}
// in lucene/core/src/java/org/apache/lucene/store/FSDirectory.java
Override
public long fileLength(String name) throws IOException {
ensureOpen();
File file = new File(directory, name);
final long len = file.length();
if (len == 0 && !file.exists()) {
throw new FileNotFoundException(name);
} else {
return len;
}
}
// in lucene/core/src/java/org/apache/lucene/store/FSDirectory.java
Override
public void deleteFile(String name) throws IOException {
ensureOpen();
File file = new File(directory, name);
if (!file.delete())
throw new IOException("Cannot delete " + file);
staleFiles.remove(name);
}
// in lucene/core/src/java/org/apache/lucene/store/FSDirectory.java
Override
public IndexOutput createOutput(String name, IOContext context) throws IOException {
ensureOpen();
ensureCanWrite(name);
return new FSIndexOutput(this, name, context.context == IOContext.Context.MERGE ? mergeWriteRateLimiter : null);
}
// in lucene/core/src/java/org/apache/lucene/store/FSDirectory.java
protected void ensureCanWrite(String name) throws IOException {
if (!directory.exists())
if (!directory.mkdirs())
throw new IOException("Cannot create directory: " + directory);
File file = new File(directory, name);
if (file.exists() && !file.delete()) // delete existing, if any
throw new IOException("Cannot overwrite: " + file);
}
// in lucene/core/src/java/org/apache/lucene/store/FSDirectory.java
Override
public void sync(Collection<String> names) throws IOException {
ensureOpen();
Set<String> toSync = new HashSet<String>(names);
toSync.retainAll(staleFiles);
for (String name : toSync)
fsync(name);
staleFiles.removeAll(toSync);
}
// in lucene/core/src/java/org/apache/lucene/store/FSDirectory.java
Override
public void flushBuffer(byte[] b, int offset, int size) throws IOException {
assert isOpen;
if (rateLimiter != null) {
rateLimiter.pause(size);
}
file.write(b, offset, size);
}
// in lucene/core/src/java/org/apache/lucene/store/FSDirectory.java
Override
public void close() throws IOException {
parent.onIndexOutputClosed(this);
// only close the file if it has not been closed yet
if (isOpen) {
boolean success = false;
try {
super.close();
success = true;
} finally {
isOpen = false;
if (!success) {
try {
file.close();
} catch (Throwable t) {
// Suppress so we don't mask original exception
}
} else {
file.close();
}
}
}
}
// in lucene/core/src/java/org/apache/lucene/store/FSDirectory.java
Override
public void seek(long pos) throws IOException {
super.seek(pos);
file.seek(pos);
}
// in lucene/core/src/java/org/apache/lucene/store/FSDirectory.java
Override
public long length() throws IOException {
return file.length();
}
// in lucene/core/src/java/org/apache/lucene/store/FSDirectory.java
Override
public void setLength(long length) throws IOException {
file.setLength(length);
}
// in lucene/core/src/java/org/apache/lucene/store/FSDirectory.java
protected void fsync(String name) throws IOException {
File fullFile = new File(directory, name);
boolean success = false;
int retryCount = 0;
IOException exc = null;
while (!success && retryCount < 5) {
retryCount++;
RandomAccessFile file = null;
try {
try {
file = new RandomAccessFile(fullFile, "rw");
file.getFD().sync();
success = true;
} finally {
if (file != null)
file.close();
}
} catch (IOException ioe) {
if (exc == null)
exc = ioe;
try {
// Pause 5 msec
Thread.sleep(5);
} catch (InterruptedException ie) {
throw new ThreadInterruptedException(ie);
}
}
}
if (!success)
// Throw original exception
throw exc;
}
// in lucene/core/src/java/org/apache/lucene/store/RAMDirectory.java
Override
public final long fileLength(String name) throws IOException {
ensureOpen();
RAMFile file = fileMap.get(name);
if (file == null) {
throw new FileNotFoundException(name);
}
return file.getLength();
}
// in lucene/core/src/java/org/apache/lucene/store/RAMDirectory.java
Override
public void deleteFile(String name) throws IOException {
ensureOpen();
RAMFile file = fileMap.remove(name);
if (file != null) {
file.directory = null;
sizeInBytes.addAndGet(-file.sizeInBytes);
} else {
throw new FileNotFoundException(name);
}
}
// in lucene/core/src/java/org/apache/lucene/store/RAMDirectory.java
Override
public IndexOutput createOutput(String name, IOContext context) throws IOException {
ensureOpen();
RAMFile file = newRAMFile();
RAMFile existing = fileMap.remove(name);
if (existing != null) {
sizeInBytes.addAndGet(-existing.sizeInBytes);
existing.directory = null;
}
fileMap.put(name, file);
return new RAMOutputStream(file);
}
// in lucene/core/src/java/org/apache/lucene/store/RAMDirectory.java
Override
public void sync(Collection<String> names) throws IOException {
}
// in lucene/core/src/java/org/apache/lucene/store/RAMDirectory.java
Override
public IndexInput openInput(String name, IOContext context) throws IOException {
ensureOpen();
RAMFile file = fileMap.get(name);
if (file == null) {
throw new FileNotFoundException(name);
}
return new RAMInputStream(name, file);
}
// in lucene/core/src/java/org/apache/lucene/store/BufferedIndexInput.java
Override
public final byte readByte() throws IOException {
if (bufferPosition >= bufferLength)
refill();
return buffer[bufferPosition++];
}
// in lucene/core/src/java/org/apache/lucene/store/BufferedIndexInput.java
Override
public final void readBytes(byte[] b, int offset, int len) throws IOException {
readBytes(b, offset, len, true);
}
// in lucene/core/src/java/org/apache/lucene/store/BufferedIndexInput.java
Override
public final void readBytes(byte[] b, int offset, int len, boolean useBuffer) throws IOException {
if(len <= (bufferLength-bufferPosition)){
// the buffer contains enough data to satisfy this request
if(len>0) // to allow b to be null if len is 0...
System.arraycopy(buffer, bufferPosition, b, offset, len);
bufferPosition+=len;
} else {
// the buffer does not have enough data. First serve all we've got.
int available = bufferLength - bufferPosition;
if(available > 0){
System.arraycopy(buffer, bufferPosition, b, offset, available);
offset += available;
len -= available;
bufferPosition += available;
}
// and now, read the remaining 'len' bytes:
if (useBuffer && len<bufferSize){
// If the amount left to read is small enough, and
// we are allowed to use our buffer, do it in the usual
// buffered way: fill the buffer and copy from it:
refill();
if(bufferLength<len){
// Throw an exception when refill() could not read len bytes:
System.arraycopy(buffer, 0, b, offset, bufferLength);
throw new EOFException("read past EOF: " + this);
} else {
System.arraycopy(buffer, 0, b, offset, len);
bufferPosition=len;
}
} else {
// The amount left to read is larger than the buffer
// or we've been asked to not use our buffer -
// there's no performance reason not to read it all
// at once. Note that unlike the previous code of
// this function, there is no need to do a seek
// here, because there's no need to reread what we
// had in the buffer.
long after = bufferStart+bufferPosition+len;
if(after > length())
throw new EOFException("read past EOF: " + this);
readInternal(b, offset, len);
bufferStart = after;
bufferPosition = 0;
bufferLength = 0; // trigger refill() on read
}
}
}
// in lucene/core/src/java/org/apache/lucene/store/BufferedIndexInput.java
Override
public final short readShort() throws IOException {
if (2 <= (bufferLength-bufferPosition)) {
return (short) (((buffer[bufferPosition++] & 0xFF) << 8) | (buffer[bufferPosition++] & 0xFF));
} else {
return super.readShort();
}
}
// in lucene/core/src/java/org/apache/lucene/store/BufferedIndexInput.java
Override
public final int readInt() throws IOException {
if (4 <= (bufferLength-bufferPosition)) {
return ((buffer[bufferPosition++] & 0xFF) << 24) | ((buffer[bufferPosition++] & 0xFF) << 16)
| ((buffer[bufferPosition++] & 0xFF) << 8) | (buffer[bufferPosition++] & 0xFF);
} else {
return super.readInt();
}
}
// in lucene/core/src/java/org/apache/lucene/store/BufferedIndexInput.java
Override
public final long readLong() throws IOException {
if (8 <= (bufferLength-bufferPosition)) {
final int i1 = ((buffer[bufferPosition++] & 0xff) << 24) | ((buffer[bufferPosition++] & 0xff) << 16) |
((buffer[bufferPosition++] & 0xff) << 8) | (buffer[bufferPosition++] & 0xff);
final int i2 = ((buffer[bufferPosition++] & 0xff) << 24) | ((buffer[bufferPosition++] & 0xff) << 16) |
((buffer[bufferPosition++] & 0xff) << 8) | (buffer[bufferPosition++] & 0xff);
return (((long)i1) << 32) | (i2 & 0xFFFFFFFFL);
} else {
return super.readLong();
}
}
// in lucene/core/src/java/org/apache/lucene/store/BufferedIndexInput.java
Override
public final int readVInt() throws IOException {
if (5 <= (bufferLength-bufferPosition)) {
byte b = buffer[bufferPosition++];
if (b >= 0) return b;
int i = b & 0x7F;
b = buffer[bufferPosition++];
i |= (b & 0x7F) << 7;
if (b >= 0) return i;
b = buffer[bufferPosition++];
i |= (b & 0x7F) << 14;
if (b >= 0) return i;
b = buffer[bufferPosition++];
i |= (b & 0x7F) << 21;
if (b >= 0) return i;
b = buffer[bufferPosition++];
// Warning: the next ands use 0x0F / 0xF0 - beware copy/paste errors:
i |= (b & 0x0F) << 28;
if ((b & 0xF0) == 0) return i;
throw new IOException("Invalid vInt detected (too many bits)");
} else {
return super.readVInt();
}
}
// in lucene/core/src/java/org/apache/lucene/store/BufferedIndexInput.java
Override
public final long readVLong() throws IOException {
if (9 <= bufferLength-bufferPosition) {
byte b = buffer[bufferPosition++];
if (b >= 0) return b;
long i = b & 0x7FL;
b = buffer[bufferPosition++];
i |= (b & 0x7FL) << 7;
if (b >= 0) return i;
b = buffer[bufferPosition++];
i |= (b & 0x7FL) << 14;
if (b >= 0) return i;
b = buffer[bufferPosition++];
i |= (b & 0x7FL) << 21;
if (b >= 0) return i;
b = buffer[bufferPosition++];
i |= (b & 0x7FL) << 28;
if (b >= 0) return i;
b = buffer[bufferPosition++];
i |= (b & 0x7FL) << 35;
if (b >= 0) return i;
b = buffer[bufferPosition++];
i |= (b & 0x7FL) << 42;
if (b >= 0) return i;
b = buffer[bufferPosition++];
i |= (b & 0x7FL) << 49;
if (b >= 0) return i;
b = buffer[bufferPosition++];
i |= (b & 0x7FL) << 56;
if (b >= 0) return i;
throw new IOException("Invalid vLong detected (negative values disallowed)");
} else {
return super.readVLong();
}
}
// in lucene/core/src/java/org/apache/lucene/store/BufferedIndexInput.java
private void refill() throws IOException {
long start = bufferStart + bufferPosition;
long end = start + bufferSize;
if (end > length()) // don't read past EOF
end = length();
int newLength = (int)(end - start);
if (newLength <= 0)
throw new EOFException("read past EOF: " + this);
if (buffer == null) {
newBuffer(new byte[bufferSize]); // allocate buffer lazily
seekInternal(bufferStart);
}
readInternal(buffer, 0, newLength);
bufferLength = newLength;
bufferStart = start;
bufferPosition = 0;
}
// in lucene/core/src/java/org/apache/lucene/store/BufferedIndexInput.java
Override
public final void seek(long pos) throws IOException {
if (pos >= bufferStart && pos < (bufferStart + bufferLength))
bufferPosition = (int)(pos - bufferStart); // seek within buffer
else {
bufferStart = pos;
bufferPosition = 0;
bufferLength = 0; // trigger refill() on read()
seekInternal(pos);
}
}
// in lucene/core/src/java/org/apache/lucene/store/BufferedIndexInput.java
protected final int flushBuffer(IndexOutput out, long numBytes) throws IOException {
int toCopy = bufferLength - bufferPosition;
if (toCopy > numBytes) {
toCopy = (int) numBytes;
}
if (toCopy > 0) {
out.writeBytes(buffer, bufferPosition, toCopy);
bufferPosition += toCopy;
}
return toCopy;
}
// in lucene/core/src/java/org/apache/lucene/store/BufferedIndexInput.java
Override
public void copyBytes(IndexOutput out, long numBytes) throws IOException {
assert numBytes >= 0: "numBytes=" + numBytes;
while (numBytes > 0) {
if (bufferLength == bufferPosition) {
refill();
}
numBytes -= flushBuffer(out, numBytes);
}
}
// in lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java
final void cleanMapping(final ByteBuffer buffer) throws IOException {
if (useUnmapHack) {
try {
AccessController.doPrivileged(new PrivilegedExceptionAction<Object>() {
public Object run() throws Exception {
final Method getCleanerMethod = buffer.getClass()
.getMethod("cleaner");
getCleanerMethod.setAccessible(true);
final Object cleaner = getCleanerMethod.invoke(buffer);
if (cleaner != null) {
cleaner.getClass().getMethod("clean")
.invoke(cleaner);
}
return null;
}
});
} catch (PrivilegedActionException e) {
final IOException ioe = new IOException("unable to unmap the mapped buffer");
ioe.initCause(e.getCause());
throw ioe;
}
}
// in lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java
Override
public IndexInput openInput(String name, IOContext context) throws IOException {
ensureOpen();
File f = new File(getDirectory(), name);
RandomAccessFile raf = new RandomAccessFile(f, "r");
try {
return new MMapIndexInput("MMapIndexInput(path=\"" + f + "\")", raf, 0, raf.length(), chunkSizePower);
} finally {
raf.close();
}
}
// in lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java
public IndexInputSlicer createSlicer(final String name, final IOContext context) throws IOException {
ensureOpen();
final File f = new File(getDirectory(), name);
final RandomAccessFile raf = new RandomAccessFile(f, "r");
return new IndexInputSlicer() {
@Override
public void close() throws IOException {
raf.close();
}
@Override
public IndexInput openSlice(String sliceDescription, long offset, long length) throws IOException {
return new MMapIndexInput("MMapIndexInput(" + sliceDescription + " in path=\"" + f + "\" slice=" + offset + ":" + (offset+length) + ")", raf, offset, length, chunkSizePower);
}
@Override
public IndexInput openFullSlice() throws IOException {
return openSlice("full-slice", 0, raf.length());
}
};
}
// in lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java
Override
public void close() throws IOException {
raf.close();
}
// in lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java
Override
public IndexInput openSlice(String sliceDescription, long offset, long length) throws IOException {
return new MMapIndexInput("MMapIndexInput(" + sliceDescription + " in path=\"" + f + "\" slice=" + offset + ":" + (offset+length) + ")", raf, offset, length, chunkSizePower);
}
// in lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java
Override
public IndexInput openFullSlice() throws IOException {
return openSlice("full-slice", 0, raf.length());
}
// in lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java
Override
public byte readByte() throws IOException {
try {
return curBuf.get();
} catch (BufferUnderflowException e) {
do {
curBufIndex++;
if (curBufIndex >= buffers.length) {
throw new EOFException("read past EOF: " + this);
}
curBuf = buffers[curBufIndex];
curBuf.position(0);
} while (!curBuf.hasRemaining());
return curBuf.get();
} catch (NullPointerException npe) {
throw new AlreadyClosedException("MMapIndexInput already closed: " + this);
}
}
// in lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java
Override
public void readBytes(byte[] b, int offset, int len) throws IOException {
try {
curBuf.get(b, offset, len);
} catch (BufferUnderflowException e) {
int curAvail = curBuf.remaining();
while (len > curAvail) {
curBuf.get(b, offset, curAvail);
len -= curAvail;
offset += curAvail;
curBufIndex++;
if (curBufIndex >= buffers.length) {
throw new EOFException("read past EOF: " + this);
}
curBuf = buffers[curBufIndex];
curBuf.position(0);
curAvail = curBuf.remaining();
}
curBuf.get(b, offset, len);
} catch (NullPointerException npe) {
throw new AlreadyClosedException("MMapIndexInput already closed: " + this);
}
}
// in lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java
Override
public short readShort() throws IOException {
try {
return curBuf.getShort();
} catch (BufferUnderflowException e) {
return super.readShort();
} catch (NullPointerException npe) {
throw new AlreadyClosedException("MMapIndexInput already closed: " + this);
}
}
// in lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java
Override
public int readInt() throws IOException {
try {
return curBuf.getInt();
} catch (BufferUnderflowException e) {
return super.readInt();
} catch (NullPointerException npe) {
throw new AlreadyClosedException("MMapIndexInput already closed: " + this);
}
}
// in lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java
Override
public long readLong() throws IOException {
try {
return curBuf.getLong();
} catch (BufferUnderflowException e) {
return super.readLong();
} catch (NullPointerException npe) {
throw new AlreadyClosedException("MMapIndexInput already closed: " + this);
}
}
// in lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java
Override
public void seek(long pos) throws IOException {
// we use >> here to preserve negative, so we will catch AIOOBE:
final int bi = (int) (pos >> chunkSizePower);
try {
final ByteBuffer b = buffers[bi];
b.position((int) (pos & chunkSizeMask));
// write values, on exception all is unchanged
this.curBufIndex = bi;
this.curBuf = b;
} catch (ArrayIndexOutOfBoundsException aioobe) {
if (pos < 0L) {
throw new IllegalArgumentException("Seeking to negative position: " + this);
}
throw new EOFException("seek past EOF: " + this);
} catch (IllegalArgumentException iae) {
if (pos < 0L) {
throw new IllegalArgumentException("Seeking to negative position: " + this);
}
throw new EOFException("seek past EOF: " + this);
} catch (NullPointerException npe) {
throw new AlreadyClosedException("MMapIndexInput already closed: " + this);
}
}
// in lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java
Override
public void close() throws IOException {
try {
if (isClone || buffers == null) return;
// for extra safety unset also all clones' buffers:
synchronized(this.clones) {
for (final MMapIndexInput clone : this.clones) {
assert clone.isClone;
clone.unsetBuffers();
}
this.clones.clear();
}
curBuf = null; curBufIndex = 0; // nuke curr pointer early
for (int bufNr = 0; bufNr < buffers.length; bufNr++) {
cleanMapping(buffers[bufNr]);
}
} finally {
unsetBuffers();
}
}
// in lucene/core/src/java/org/apache/lucene/store/DataOutput.java
public void writeBytes(byte[] b, int length) throws IOException {
writeBytes(b, 0, length);
}
// in lucene/core/src/java/org/apache/lucene/store/DataOutput.java
public void writeInt(int i) throws IOException {
writeByte((byte)(i >> 24));
writeByte((byte)(i >> 16));
writeByte((byte)(i >> 8));
writeByte((byte) i);
}
// in lucene/core/src/java/org/apache/lucene/store/DataOutput.java
public void writeShort(short i) throws IOException {
writeByte((byte)(i >> 8));
writeByte((byte) i);
}
// in lucene/core/src/java/org/apache/lucene/store/DataOutput.java
public final void writeVInt(int i) throws IOException {
while ((i & ~0x7F) != 0) {
writeByte((byte)((i & 0x7F) | 0x80));
i >>>= 7;
}
writeByte((byte)i);
}
// in lucene/core/src/java/org/apache/lucene/store/DataOutput.java
public void writeLong(long i) throws IOException {
writeInt((int) (i >> 32));
writeInt((int) i);
}
// in lucene/core/src/java/org/apache/lucene/store/DataOutput.java
public final void writeVLong(long i) throws IOException {
assert i >= 0L;
while ((i & ~0x7FL) != 0L) {
writeByte((byte)((i & 0x7FL) | 0x80L));
i >>>= 7;
}
writeByte((byte)i);
}
// in lucene/core/src/java/org/apache/lucene/store/DataOutput.java
public void writeString(String s) throws IOException {
final BytesRef utf8Result = new BytesRef(10);
UnicodeUtil.UTF16toUTF8(s, 0, s.length(), utf8Result);
writeVInt(utf8Result.length);
writeBytes(utf8Result.bytes, 0, utf8Result.length);
}
// in lucene/core/src/java/org/apache/lucene/store/DataOutput.java
public void copyBytes(DataInput input, long numBytes) throws IOException {
assert numBytes >= 0: "numBytes=" + numBytes;
long left = numBytes;
if (copyBuffer == null)
copyBuffer = new byte[COPY_BUFFER_SIZE];
while(left > 0) {
final int toCopy;
if (left > COPY_BUFFER_SIZE)
toCopy = COPY_BUFFER_SIZE;
else
toCopy = (int) left;
input.readBytes(copyBuffer, 0, toCopy);
writeBytes(copyBuffer, 0, toCopy);
left -= toCopy;
}
}
// in lucene/core/src/java/org/apache/lucene/store/DataOutput.java
public void writeStringStringMap(Map<String,String> map) throws IOException {
if (map == null) {
writeInt(0);
} else {
writeInt(map.size());
for(final Map.Entry<String, String> entry: map.entrySet()) {
writeString(entry.getKey());
writeString(entry.getValue());
}
}
}
// in lucene/core/src/java/org/apache/lucene/store/DataOutput.java
public void writeStringSet(Set<String> set) throws IOException {
if (set == null) {
writeInt(0);
} else {
writeInt(set.size());
for(String value : set) {
writeString(value);
}
}
}
// in lucene/core/src/java/org/apache/lucene/store/InputStreamDataInput.java
Override
public byte readByte() throws IOException {
int v = is.read();
if (v == -1) throw new EOFException();
return (byte) v;
}
// in lucene/core/src/java/org/apache/lucene/store/InputStreamDataInput.java
Override
public void readBytes(byte[] b, int offset, int len) throws IOException {
while (len > 0) {
final int cnt = is.read(b, offset, len);
if (cnt < 0) {
// Partially read the input, but no more data available in the stream.
throw new EOFException();
}
len -= cnt;
offset += cnt;
}
}
// in lucene/core/src/java/org/apache/lucene/store/InputStreamDataInput.java
Override
public void close() throws IOException {
is.close();
}
// in lucene/core/src/java/org/apache/lucene/store/RAMInputStream.java
Override
public byte readByte() throws IOException {
if (bufferPosition >= bufferLength) {
currentBufferIndex++;
switchCurrentBuffer(true);
}
return currentBuffer[bufferPosition++];
}
// in lucene/core/src/java/org/apache/lucene/store/RAMInputStream.java
Override
public void readBytes(byte[] b, int offset, int len) throws IOException {
while (len > 0) {
if (bufferPosition >= bufferLength) {
currentBufferIndex++;
switchCurrentBuffer(true);
}
int remainInBuffer = bufferLength - bufferPosition;
int bytesToCopy = len < remainInBuffer ? len : remainInBuffer;
System.arraycopy(currentBuffer, bufferPosition, b, offset, bytesToCopy);
offset += bytesToCopy;
len -= bytesToCopy;
bufferPosition += bytesToCopy;
}
}
// in lucene/core/src/java/org/apache/lucene/store/RAMInputStream.java
private final void switchCurrentBuffer(boolean enforceEOF) throws IOException {
bufferStart = (long) BUFFER_SIZE * (long) currentBufferIndex;
if (currentBufferIndex >= file.numBuffers()) {
// end of file reached, no more buffers left
if (enforceEOF) {
throw new EOFException("read past EOF: " + this);
} else {
// Force EOF if a read takes place at this position
currentBufferIndex--;
bufferPosition = BUFFER_SIZE;
}
} else {
currentBuffer = file.getBuffer(currentBufferIndex);
bufferPosition = 0;
long buflen = length - bufferStart;
bufferLength = buflen > BUFFER_SIZE ? BUFFER_SIZE : (int) buflen;
}
}
// in lucene/core/src/java/org/apache/lucene/store/RAMInputStream.java
Override
public void copyBytes(IndexOutput out, long numBytes) throws IOException {
assert numBytes >= 0: "numBytes=" + numBytes;
long left = numBytes;
while (left > 0) {
if (bufferPosition == bufferLength) {
++currentBufferIndex;
switchCurrentBuffer(true);
}
final int bytesInBuffer = bufferLength - bufferPosition;
final int toCopy = (int) (bytesInBuffer < left ? bytesInBuffer : left);
out.writeBytes(currentBuffer, bufferPosition, toCopy);
bufferPosition += toCopy;
left -= toCopy;
}
assert left == 0: "Insufficient bytes to copy: numBytes=" + numBytes + " copied=" + (numBytes - left);
}
// in lucene/core/src/java/org/apache/lucene/store/RAMInputStream.java
Override
public void seek(long pos) throws IOException {
if (currentBuffer==null || pos < bufferStart || pos >= bufferStart + BUFFER_SIZE) {
currentBufferIndex = (int) (pos / BUFFER_SIZE);
switchCurrentBuffer(false);
}
bufferPosition = (int) (pos % BUFFER_SIZE);
}
// in lucene/core/src/java/org/apache/lucene/store/OutputStreamDataOutput.java
Override
public void writeByte(byte b) throws IOException {
os.write(b);
}
// in lucene/core/src/java/org/apache/lucene/store/OutputStreamDataOutput.java
Override
public void writeBytes(byte[] b, int offset, int length) throws IOException {
os.write(b, offset, length);
}
// in lucene/core/src/java/org/apache/lucene/store/OutputStreamDataOutput.java
Override
public void close() throws IOException {
os.close();
}
// in lucene/core/src/java/org/apache/lucene/store/FileSwitchDirectory.java
Override
public void close() throws IOException {
if (doClose) {
try {
secondaryDir.close();
} finally {
primaryDir.close();
}
doClose = false;
}
}
// in lucene/core/src/java/org/apache/lucene/store/FileSwitchDirectory.java
Override
public String[] listAll() throws IOException {
Set<String> files = new HashSet<String>();
// LUCENE-3380: either or both of our dirs could be FSDirs,
// but if one underlying delegate is an FSDir and mkdirs() has not
// yet been called, because so far everything is written to the other,
// in this case, we don't want to throw a NoSuchDirectoryException
NoSuchDirectoryException exc = null;
try {
for(String f : primaryDir.listAll()) {
files.add(f);
}
} catch (NoSuchDirectoryException e) {
exc = e;
}
try {
for(String f : secondaryDir.listAll()) {
files.add(f);
}
} catch (NoSuchDirectoryException e) {
// we got NoSuchDirectoryException from both dirs
// rethrow the first.
if (exc != null) {
throw exc;
}
// we got NoSuchDirectoryException from the secondary,
// and the primary is empty.
if (files.isEmpty()) {
throw e;
}
}
// we got NoSuchDirectoryException from the primary,
// and the secondary is empty.
if (exc != null && files.isEmpty()) {
throw exc;
}
return files.toArray(new String[files.size()]);
}
// in lucene/core/src/java/org/apache/lucene/store/FileSwitchDirectory.java
Override
public boolean fileExists(String name) throws IOException {
return getDirectory(name).fileExists(name);
}
// in lucene/core/src/java/org/apache/lucene/store/FileSwitchDirectory.java
Override
public void deleteFile(String name) throws IOException {
getDirectory(name).deleteFile(name);
}
// in lucene/core/src/java/org/apache/lucene/store/FileSwitchDirectory.java
Override
public long fileLength(String name) throws IOException {
return getDirectory(name).fileLength(name);
}
// in lucene/core/src/java/org/apache/lucene/store/FileSwitchDirectory.java
Override
public IndexOutput createOutput(String name, IOContext context) throws IOException {
return getDirectory(name).createOutput(name, context);
}
// in lucene/core/src/java/org/apache/lucene/store/FileSwitchDirectory.java
Override
public void sync(Collection<String> names) throws IOException {
List<String> primaryNames = new ArrayList<String>();
List<String> secondaryNames = new ArrayList<String>();
for (String name : names)
if (primaryExtensions.contains(getExtension(name)))
primaryNames.add(name);
else
secondaryNames.add(name);
primaryDir.sync(primaryNames);
secondaryDir.sync(secondaryNames);
}
// in lucene/core/src/java/org/apache/lucene/store/FileSwitchDirectory.java
Override
public IndexInput openInput(String name, IOContext context) throws IOException {
return getDirectory(name).openInput(name, context);
}
// in lucene/core/src/java/org/apache/lucene/store/FileSwitchDirectory.java
Override
public IndexInputSlicer createSlicer(String name, IOContext context)
throws IOException {
return getDirectory(name).createSlicer(name, context);
}
// in lucene/core/src/java/org/apache/lucene/store/IndexOutput.java
public void setLength(long length) throws IOException {}
// in lucene/core/src/java/org/apache/lucene/store/VerifyingLockFactory.java
Override
public synchronized boolean obtain(long lockWaitTimeout)
throws LockObtainFailedException, IOException {
boolean obtained = lock.obtain(lockWaitTimeout);
if (obtained)
verify((byte) 1);
return obtained;
}
// in lucene/core/src/java/org/apache/lucene/store/VerifyingLockFactory.java
Override
public synchronized boolean obtain()
throws LockObtainFailedException, IOException {
return lock.obtain();
}
// in lucene/core/src/java/org/apache/lucene/store/VerifyingLockFactory.java
Override
public synchronized boolean isLocked() throws IOException {
return lock.isLocked();
}
// in lucene/core/src/java/org/apache/lucene/store/VerifyingLockFactory.java
Override
public synchronized void release() throws IOException {
if (isLocked()) {
verify((byte) 0);
lock.release();
}
}
// in lucene/core/src/java/org/apache/lucene/store/VerifyingLockFactory.java
Override
public synchronized void clearLock(String lockName)
throws IOException {
lf.clearLock(lockName);
}
// in lucene/core/src/java/org/apache/lucene/store/BufferedIndexOutput.java
Override
public void writeByte(byte b) throws IOException {
if (bufferPosition >= BUFFER_SIZE)
flush();
buffer[bufferPosition++] = b;
}
// in lucene/core/src/java/org/apache/lucene/store/BufferedIndexOutput.java
Override
public void writeBytes(byte[] b, int offset, int length) throws IOException {
int bytesLeft = BUFFER_SIZE - bufferPosition;
// is there enough space in the buffer?
if (bytesLeft >= length) {
// we add the data to the end of the buffer
System.arraycopy(b, offset, buffer, bufferPosition, length);
bufferPosition += length;
// if the buffer is full, flush it
if (BUFFER_SIZE - bufferPosition == 0)
flush();
} else {
// is data larger then buffer?
if (length > BUFFER_SIZE) {
// we flush the buffer
if (bufferPosition > 0)
flush();
// and write data at once
flushBuffer(b, offset, length);
bufferStart += length;
} else {
// we fill/flush the buffer (until the input is written)
int pos = 0; // position in the input data
int pieceLength;
while (pos < length) {
pieceLength = (length - pos < bytesLeft) ? length - pos : bytesLeft;
System.arraycopy(b, pos + offset, buffer, bufferPosition, pieceLength);
pos += pieceLength;
bufferPosition += pieceLength;
// if the buffer is full, flush it
bytesLeft = BUFFER_SIZE - bufferPosition;
if (bytesLeft == 0) {
flush();
bytesLeft = BUFFER_SIZE;
}
}
}
}
}
// in lucene/core/src/java/org/apache/lucene/store/BufferedIndexOutput.java
Override
public void flush() throws IOException {
flushBuffer(buffer, bufferPosition);
bufferStart += bufferPosition;
bufferPosition = 0;
}
// in lucene/core/src/java/org/apache/lucene/store/BufferedIndexOutput.java
private void flushBuffer(byte[] b, int len) throws IOException {
flushBuffer(b, 0, len);
}
// in lucene/core/src/java/org/apache/lucene/store/BufferedIndexOutput.java
Override
public void close() throws IOException {
flush();
}
// in lucene/core/src/java/org/apache/lucene/store/BufferedIndexOutput.java
Override
public void seek(long pos) throws IOException {
flush();
bufferStart = pos;
}
// in lucene/core/src/java/org/apache/lucene/store/IndexInput.java
public void copyBytes(IndexOutput out, long numBytes) throws IOException {
assert numBytes >= 0: "numBytes=" + numBytes;
byte copyBuf[] = new byte[BufferedIndexInput.BUFFER_SIZE];
while (numBytes > 0) {
final int toCopy = (int) (numBytes > copyBuf.length ? copyBuf.length : numBytes);
readBytes(copyBuf, 0, toCopy);
out.writeBytes(copyBuf, 0, toCopy);
numBytes -= toCopy;
}
}
// in lucene/core/src/java/org/apache/lucene/store/Directory.java
public void clearLock(String name) throws IOException {
if (lockFactory != null) {
lockFactory.clearLock(name);
}
}
// in lucene/core/src/java/org/apache/lucene/store/Directory.java
public void setLockFactory(LockFactory lockFactory) throws IOException {
assert lockFactory != null;
this.lockFactory = lockFactory;
lockFactory.setLockPrefix(this.getLockID());
}
// in lucene/core/src/java/org/apache/lucene/store/Directory.java
public void copy(Directory to, String src, String dest, IOContext context) throws IOException {
IndexOutput os = null;
IndexInput is = null;
IOException priorException = null;
try {
os = to.createOutput(dest, context);
is = openInput(src, context);
is.copyBytes(os, is.length());
} catch (IOException ioe) {
priorException = ioe;
} finally {
IOUtils.closeWhileHandlingException(priorException, os, is);
}
}
// in lucene/core/src/java/org/apache/lucene/store/Directory.java
public IndexInputSlicer createSlicer(final String name, final IOContext context) throws IOException {
ensureOpen();
return new IndexInputSlicer() {
private final IndexInput base = Directory.this.openInput(name, context);
@Override
public IndexInput openSlice(String sliceDescription, long offset, long length) {
return new SlicedIndexInput("SlicedIndexInput(" + sliceDescription + " in " + base + ")", base, offset, length);
}
@Override
public void close() throws IOException {
base.close();
}
@Override
public IndexInput openFullSlice() throws IOException {
return (IndexInput) base.clone();
}
};
}
// in lucene/core/src/java/org/apache/lucene/store/Directory.java
Override
public void close() throws IOException {
base.close();
}
// in lucene/core/src/java/org/apache/lucene/store/Directory.java
Override
public IndexInput openFullSlice() throws IOException {
return (IndexInput) base.clone();
}
// in lucene/core/src/java/org/apache/lucene/store/Directory.java
Override
protected void readInternal(byte[] b, int offset, int len) throws IOException {
long start = getFilePointer();
if(start + len > length)
throw new EOFException("read past EOF: " + this);
base.seek(fileOffset + start);
base.readBytes(b, offset, len, false);
}
// in lucene/core/src/java/org/apache/lucene/store/Directory.java
Override
public void close() throws IOException {
base.close();
}
// in lucene/core/src/java/org/apache/lucene/store/Directory.java
Override
public void copyBytes(IndexOutput out, long numBytes) throws IOException {
// Copy first whatever is in the buffer
numBytes -= flushBuffer(out, numBytes);
// If there are more bytes left to copy, delegate the copy task to the
// base IndexInput, in case it can do an optimized copy.
if (numBytes > 0) {
long start = getFilePointer();
if (start + numBytes > length) {
throw new EOFException("read past EOF: " + this);
}
base.seek(fileOffset + start);
base.copyBytes(out, numBytes);
}
}
// in lucene/core/src/java/org/apache/lucene/store/NRTCachingDirectory.java
Override
public void setLockFactory(LockFactory lf) throws IOException {
delegate.setLockFactory(lf);
}
// in lucene/core/src/java/org/apache/lucene/store/NRTCachingDirectory.java
Override
public void clearLock(String name) throws IOException {
delegate.clearLock(name);
}
// in lucene/core/src/java/org/apache/lucene/store/NRTCachingDirectory.java
Override
public synchronized String[] listAll() throws IOException {
final Set<String> files = new HashSet<String>();
for(String f : cache.listAll()) {
files.add(f);
}
// LUCENE-1468: our NRTCachingDirectory will actually exist (RAMDir!),
// but if the underlying delegate is an FSDir and mkdirs() has not
// yet been called, because so far everything is a cached write,
// in this case, we don't want to throw a NoSuchDirectoryException
try {
for(String f : delegate.listAll()) {
// Cannot do this -- if lucene calls createOutput but
// file already exists then this falsely trips:
//assert !files.contains(f): "file \"" + f + "\" is in both dirs";
files.add(f);
}
} catch (NoSuchDirectoryException ex) {
// however, if there are no cached files, then the directory truly
// does not "exist"
if (files.isEmpty()) {
throw ex;
}
}
return files.toArray(new String[files.size()]);
}
// in lucene/core/src/java/org/apache/lucene/store/NRTCachingDirectory.java
Override
public synchronized boolean fileExists(String name) throws IOException {
return cache.fileExists(name) || delegate.fileExists(name);
}
// in lucene/core/src/java/org/apache/lucene/store/NRTCachingDirectory.java
Override
public synchronized void deleteFile(String name) throws IOException {
if (VERBOSE) {
System.out.println("nrtdir.deleteFile name=" + name);
}
if (cache.fileExists(name)) {
assert !delegate.fileExists(name): "name=" + name;
cache.deleteFile(name);
} else {
delegate.deleteFile(name);
}
}
// in lucene/core/src/java/org/apache/lucene/store/NRTCachingDirectory.java
Override
public synchronized long fileLength(String name) throws IOException {
if (cache.fileExists(name)) {
return cache.fileLength(name);
} else {
return delegate.fileLength(name);
}
}
// in lucene/core/src/java/org/apache/lucene/store/NRTCachingDirectory.java
Override
public IndexOutput createOutput(String name, IOContext context) throws IOException {
if (VERBOSE) {
System.out.println("nrtdir.createOutput name=" + name);
}
if (doCacheWrite(name, context)) {
if (VERBOSE) {
System.out.println(" to cache");
}
try {
delegate.deleteFile(name);
} catch (IOException ioe) {
// This is fine: file may not exist
}
return cache.createOutput(name, context);
} else {
try {
cache.deleteFile(name);
} catch (IOException ioe) {
// This is fine: file may not exist
}
return delegate.createOutput(name, context);
}
}
// in lucene/core/src/java/org/apache/lucene/store/NRTCachingDirectory.java
Override
public void sync(Collection<String> fileNames) throws IOException {
if (VERBOSE) {
System.out.println("nrtdir.sync files=" + fileNames);
}
for(String fileName : fileNames) {
unCache(fileName);
}
delegate.sync(fileNames);
}
// in lucene/core/src/java/org/apache/lucene/store/NRTCachingDirectory.java
Override
public synchronized IndexInput openInput(String name, IOContext context) throws IOException {
if (VERBOSE) {
System.out.println("nrtdir.openInput name=" + name);
}
if (cache.fileExists(name)) {
if (VERBOSE) {
System.out.println(" from cache");
}
return cache.openInput(name, context);
} else {
return delegate.openInput(name, context);
}
}
// in lucene/core/src/java/org/apache/lucene/store/NRTCachingDirectory.java
public synchronized IndexInputSlicer createSlicer(final String name, final IOContext context) throws IOException {
ensureOpen();
if (VERBOSE) {
System.out.println("nrtdir.openInput name=" + name);
}
if (cache.fileExists(name)) {
if (VERBOSE) {
System.out.println(" from cache");
}
return cache.createSlicer(name, context);
} else {
return delegate.createSlicer(name, context);
}
}
// in lucene/core/src/java/org/apache/lucene/store/NRTCachingDirectory.java
Override
public void close() throws IOException {
// NOTE: technically we shouldn't have to do this, ie,
// IndexWriter should have sync'd all files, but we do
// it for defensive reasons... or in case the app is
// doing something custom (creating outputs directly w/o
// using IndexWriter):
for(String fileName : cache.listAll()) {
unCache(fileName);
}
cache.close();
delegate.close();
}
// in lucene/core/src/java/org/apache/lucene/store/NRTCachingDirectory.java
private void unCache(String fileName) throws IOException {
// Only let one thread uncache at a time; this only
// happens during commit() or close():
synchronized(uncacheLock) {
if (VERBOSE) {
System.out.println("nrtdir.unCache name=" + fileName);
}
if (!cache.fileExists(fileName)) {
// Another thread beat us...
return;
}
if (delegate.fileExists(fileName)) {
throw new IOException("cannot uncache file=\"" + fileName + "\": it was separately also created in the delegate directory");
}
final IOContext context = IOContext.DEFAULT;
final IndexOutput out = delegate.createOutput(fileName, context);
IndexInput in = null;
try {
in = cache.openInput(fileName, context);
in.copyBytes(out, in.length());
} finally {
IOUtils.close(in, out);
}
// Lock order: uncacheLock -> this
synchronized(this) {
// Must sync here because other sync methods have
// if (cache.fileExists(name)) { ... } else { ... }:
cache.deleteFile(fileName);
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/NoMergeScheduler.java
Override
public void merge(IndexWriter writer) throws CorruptIndexException, IOException {}
// in lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
final MergeState merge() throws CorruptIndexException, IOException {
// NOTE: it's important to add calls to
// checkAbort.work(...) if you make any changes to this
// method that will spend alot of time. The frequency
// of this check impacts how long
// IndexWriter.close(false) takes to actually stop the
// threads.
mergeState.segmentInfo.setDocCount(setDocMaps());
mergeDocValuesAndNormsFieldInfos();
setMatchingSegmentReaders();
int numMerged = mergeFields();
assert numMerged == mergeState.segmentInfo.getDocCount();
final SegmentWriteState segmentWriteState = new SegmentWriteState(mergeState.infoStream, directory, mergeState.segmentInfo,
mergeState.fieldInfos, termIndexInterval, null, context);
mergeTerms(segmentWriteState);
mergePerDoc(segmentWriteState);
if (mergeState.fieldInfos.hasNorms()) {
mergeNorms(segmentWriteState);
}
if (mergeState.fieldInfos.hasVectors()) {
numMerged = mergeVectors();
assert numMerged == mergeState.segmentInfo.getDocCount();
}
// write the merged infos
FieldInfosWriter fieldInfosWriter = codec.fieldInfosFormat().getFieldInfosWriter();
fieldInfosWriter.write(directory, mergeState.segmentInfo.name, mergeState.fieldInfos, context);
return mergeState;
}
// in lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
public void mergeDocValuesAndNormsFieldInfos() throws IOException {
// mapping from all docvalues fields found to their promoted types
// this is because FieldInfos does not store the
// valueSize
Map<FieldInfo,TypePromoter> docValuesTypes = new HashMap<FieldInfo,TypePromoter>();
Map<FieldInfo,TypePromoter> normValuesTypes = new HashMap<FieldInfo,TypePromoter>();
for (MergeState.IndexReaderAndLiveDocs readerAndLiveDocs : mergeState.readers) {
final AtomicReader reader = readerAndLiveDocs.reader;
FieldInfos readerFieldInfos = reader.getFieldInfos();
for (FieldInfo fi : readerFieldInfos) {
FieldInfo merged = fieldInfosBuilder.add(fi);
// update the type promotion mapping for this reader
if (fi.hasDocValues()) {
TypePromoter previous = docValuesTypes.get(merged);
docValuesTypes.put(merged, mergeDocValuesType(previous, reader.docValues(fi.name)));
}
if (fi.hasNorms()) {
TypePromoter previous = normValuesTypes.get(merged);
normValuesTypes.put(merged, mergeDocValuesType(previous, reader.normValues(fi.name)));
}
}
}
updatePromoted(normValuesTypes, true);
updatePromoted(docValuesTypes, false);
mergeState.fieldInfos = fieldInfosBuilder.finish();
}
// in lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
private int mergeFields() throws CorruptIndexException, IOException {
final StoredFieldsWriter fieldsWriter = codec.storedFieldsFormat().fieldsWriter(directory, mergeState.segmentInfo, context);
try {
return fieldsWriter.merge(mergeState);
} finally {
fieldsWriter.close();
}
}
// in lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
private final int mergeVectors() throws IOException {
final TermVectorsWriter termVectorsWriter = codec.termVectorsFormat().vectorsWriter(directory, mergeState.segmentInfo, context);
try {
return termVectorsWriter.merge(mergeState);
} finally {
termVectorsWriter.close();
}
}
// in lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
private int setDocMaps() throws IOException {
final int numReaders = mergeState.readers.size();
// Remap docIDs
mergeState.docMaps = new MergeState.DocMap[numReaders];
mergeState.docBase = new int[numReaders];
mergeState.readerPayloadProcessor = new PayloadProcessorProvider.ReaderPayloadProcessor[numReaders];
mergeState.currentPayloadProcessor = new PayloadProcessorProvider.PayloadProcessor[numReaders];
int docBase = 0;
int i = 0;
while(i < mergeState.readers.size()) {
final MergeState.IndexReaderAndLiveDocs reader = mergeState.readers.get(i);
mergeState.docBase[i] = docBase;
final MergeState.DocMap docMap = MergeState.DocMap.build(reader);
mergeState.docMaps[i] = docMap;
docBase += docMap.numDocs();
if (mergeState.payloadProcessorProvider != null) {
mergeState.readerPayloadProcessor[i] = mergeState.payloadProcessorProvider.getReaderProcessor(reader.reader);
}
i++;
}
return docBase;
}
// in lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
private final void mergeTerms(SegmentWriteState segmentWriteState) throws CorruptIndexException, IOException {
final List<Fields> fields = new ArrayList<Fields>();
final List<ReaderUtil.Slice> slices = new ArrayList<ReaderUtil.Slice>();
int docBase = 0;
for(int readerIndex=0;readerIndex<mergeState.readers.size();readerIndex++) {
final MergeState.IndexReaderAndLiveDocs r = mergeState.readers.get(readerIndex);
final Fields f = r.reader.fields();
final int maxDoc = r.reader.maxDoc();
if (f != null) {
slices.add(new ReaderUtil.Slice(docBase, maxDoc, readerIndex));
fields.add(f);
}
docBase += maxDoc;
}
final FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(segmentWriteState);
boolean success = false;
try {
consumer.merge(mergeState,
new MultiFields(fields.toArray(Fields.EMPTY_ARRAY),
slices.toArray(ReaderUtil.Slice.EMPTY_ARRAY)));
success = true;
} finally {
if (success) {
IOUtils.close(consumer);
} else {
IOUtils.closeWhileHandlingException(consumer);
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
private void mergePerDoc(SegmentWriteState segmentWriteState) throws IOException {
final PerDocConsumer docsConsumer = codec.docValuesFormat()
.docsConsumer(new PerDocWriteState(segmentWriteState));
// TODO: remove this check when 3.x indexes are no longer supported
// (3.x indexes don't have docvalues)
if (docsConsumer == null) {
return;
}
boolean success = false;
try {
docsConsumer.merge(mergeState);
success = true;
} finally {
if (success) {
IOUtils.close(docsConsumer);
} else {
IOUtils.closeWhileHandlingException(docsConsumer);
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
private void mergeNorms(SegmentWriteState segmentWriteState) throws IOException {
final PerDocConsumer docsConsumer = codec.normsFormat()
.docsConsumer(new PerDocWriteState(segmentWriteState));
// TODO: remove this check when 3.x indexes are no longer supported
// (3.x indexes don't have docvalues)
if (docsConsumer == null) {
return;
}
boolean success = false;
try {
docsConsumer.merge(mergeState);
success = true;
} finally {
if (success) {
IOUtils.close(docsConsumer);
} else {
IOUtils.closeWhileHandlingException(docsConsumer);
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java
private void deleteCommits() throws IOException {
int size = commitsToDelete.size();
if (size > 0) {
// First decref all files that had been referred to by
// the now-deleted commits:
for(int i=0;i<size;i++) {
CommitPoint commit = commitsToDelete.get(i);
if (infoStream.isEnabled("IFD")) {
infoStream.message("IFD", "deleteCommits: now decRef commit \"" + commit.getSegmentsFileName() + "\"");
}
for (final String file : commit.files) {
decRef(file);
}
}
commitsToDelete.clear();
// Now compact commits to remove deleted ones (preserving the sort):
size = commits.size();
int readFrom = 0;
int writeTo = 0;
while(readFrom < size) {
CommitPoint commit = commits.get(readFrom);
if (!commit.deleted) {
if (writeTo != readFrom) {
commits.set(writeTo, commits.get(readFrom));
}
writeTo++;
}
readFrom++;
}
while(size > writeTo) {
commits.remove(size-1);
size--;
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java
public void refresh(String segmentName) throws IOException {
assert locked();
String[] files = directory.listAll();
String segmentPrefix1;
String segmentPrefix2;
if (segmentName != null) {
segmentPrefix1 = segmentName + ".";
segmentPrefix2 = segmentName + "_";
} else {
segmentPrefix1 = null;
segmentPrefix2 = null;
}
for(int i=0;i<files.length;i++) {
String fileName = files[i];
if ((segmentName == null || fileName.startsWith(segmentPrefix1) || fileName.startsWith(segmentPrefix2)) &&
!fileName.endsWith("write.lock") &&
!refCounts.containsKey(fileName) &&
!fileName.equals(IndexFileNames.SEGMENTS_GEN)) {
// Unreferenced file, so remove it
if (infoStream.isEnabled("IFD")) {
infoStream.message("IFD", "refresh [prefix=" + segmentName + "]: removing newly created unreferenced file \"" + fileName + "\"");
}
deleteFile(fileName);
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java
public void refresh() throws IOException {
// Set to null so that we regenerate the list of pending
// files; else we can accumulate same file more than
// once
assert locked();
deletable = null;
refresh(null);
}
// in lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java
public void close() throws IOException {
// DecRef old files from the last checkpoint, if any:
assert locked();
int size = lastFiles.size();
if (size > 0) {
for(int i=0;i<size;i++) {
decRef(lastFiles.get(i));
}
lastFiles.clear();
}
deletePendingFiles();
}
// in lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java
void revisitPolicy() throws IOException {
assert locked();
if (infoStream.isEnabled("IFD")) {
infoStream.message("IFD", "now revisitPolicy");
}
if (commits.size() > 0) {
policy.onCommit(commits);
deleteCommits();
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java
public void deletePendingFiles() throws IOException {
assert locked();
if (deletable != null) {
List<String> oldDeletable = deletable;
deletable = null;
int size = oldDeletable.size();
for(int i=0;i<size;i++) {
if (infoStream.isEnabled("IFD")) {
infoStream.message("IFD", "delete pending file " + oldDeletable.get(i));
}
deleteFile(oldDeletable.get(i));
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java
public void checkpoint(SegmentInfos segmentInfos, boolean isCommit) throws IOException {
assert locked();
assert Thread.holdsLock(writer);
if (infoStream.isEnabled("IFD")) {
infoStream.message("IFD", "now checkpoint \"" + writer.segString(writer.toLiveInfos(segmentInfos)) + "\" [" + segmentInfos.size() + " segments " + "; isCommit = " + isCommit + "]");
}
// Try again now to delete any previously un-deletable
// files (because they were in use, on Windows):
deletePendingFiles();
// Incref the files:
incRef(segmentInfos, isCommit);
if (isCommit) {
// Append to our commits list:
commits.add(new CommitPoint(commitsToDelete, directory, segmentInfos));
// Tell policy so it can remove commits:
policy.onCommit(commits);
// Decref files for commits that were deleted by the policy:
deleteCommits();
} else {
// DecRef old files from the last checkpoint, if any:
for (Collection<String> lastFile : lastFiles) {
decRef(lastFile);
}
lastFiles.clear();
// Save files so we can decr on next checkpoint/commit:
lastFiles.add(segmentInfos.files(directory, false));
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java
void incRef(SegmentInfos segmentInfos, boolean isCommit) throws IOException {
assert locked();
// If this is a commit point, also incRef the
// segments_N file:
for(final String fileName: segmentInfos.files(directory, isCommit)) {
incRef(fileName);
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java
void incRef(Collection<String> files) throws IOException {
assert locked();
for(final String file : files) {
incRef(file);
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java
void incRef(String fileName) throws IOException {
assert locked();
RefCount rc = getRefCount(fileName);
if (infoStream.isEnabled("IFD")) {
if (VERBOSE_REF_COUNTS) {
infoStream.message("IFD", " IncRef \"" + fileName + "\": pre-incr count is " + rc.count);
}
}
rc.IncRef();
}
// in lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java
void decRef(Collection<String> files) throws IOException {
assert locked();
for(final String file : files) {
decRef(file);
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java
void decRef(String fileName) throws IOException {
assert locked();
RefCount rc = getRefCount(fileName);
if (infoStream.isEnabled("IFD")) {
if (VERBOSE_REF_COUNTS) {
infoStream.message("IFD", " DecRef \"" + fileName + "\": pre-decr count is " + rc.count);
}
}
if (0 == rc.DecRef()) {
// This file is no longer referenced by any past
// commit points nor by the in-memory SegmentInfos:
deleteFile(fileName);
refCounts.remove(fileName);
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java
void decRef(SegmentInfos segmentInfos) throws IOException {
assert locked();
for (final String file : segmentInfos.files(directory, false)) {
decRef(file);
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java
void deleteFiles(List<String> files) throws IOException {
assert locked();
for(final String file: files) {
deleteFile(file);
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java
void deleteNewFiles(Collection<String> files) throws IOException {
assert locked();
for (final String fileName: files) {
if (!refCounts.containsKey(fileName)) {
if (infoStream.isEnabled("IFD")) {
infoStream.message("IFD", "delete new file \"" + fileName + "\"");
}
deleteFile(fileName);
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java
void deleteFile(String fileName)
throws IOException {
assert locked();
try {
if (infoStream.isEnabled("IFD")) {
infoStream.message("IFD", "delete \"" + fileName + "\"");
}
directory.deleteFile(fileName);
} catch (IOException e) { // if delete fails
if (directory.fileExists(fileName)) {
// Some operating systems (e.g. Windows) don't
// permit a file to be deleted while it is opened
// for read (e.g. by another process or thread). So
// we assume that when a delete fails it is because
// the file is open in another process, and queue
// the file for subsequent deletion.
if (infoStream.isEnabled("IFD")) {
infoStream.message("IFD", "unable to remove file \"" + fileName + "\": " + e.toString() + "; Will re-try later.");
}
if (deletable == null) {
deletable = new ArrayList<String>();
}
deletable.add(fileName); // add to deletable
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java
Override
public Collection<String> getFileNames() throws IOException {
return files;
}
// in lucene/core/src/java/org/apache/lucene/index/TermsHash.java
Override
void flush(Map<String,InvertedDocConsumerPerField> fieldsToFlush, final SegmentWriteState state) throws IOException {
Map<String,TermsHashConsumerPerField> childFields = new HashMap<String,TermsHashConsumerPerField>();
Map<String,InvertedDocConsumerPerField> nextChildFields;
if (nextTermsHash != null) {
nextChildFields = new HashMap<String,InvertedDocConsumerPerField>();
} else {
nextChildFields = null;
}
for (final Map.Entry<String,InvertedDocConsumerPerField> entry : fieldsToFlush.entrySet()) {
TermsHashPerField perField = (TermsHashPerField) entry.getValue();
childFields.put(entry.getKey(), perField.consumer);
if (nextTermsHash != null) {
nextChildFields.put(entry.getKey(), perField.nextPerField);
}
}
consumer.flush(childFields, state);
if (nextTermsHash != null) {
nextTermsHash.flush(nextChildFields, state);
}
}
// in lucene/core/src/java/org/apache/lucene/index/TermsHash.java
Override
void finishDocument() throws IOException {
consumer.finishDocument(this);
if (nextTermsHash != null) {
nextTermsHash.consumer.finishDocument(nextTermsHash);
}
}
// in lucene/core/src/java/org/apache/lucene/index/TermsHash.java
Override
void startDocument() throws IOException {
consumer.startDocument();
if (nextTermsHash != null) {
nextTermsHash.consumer.startDocument();
}
}
// in lucene/core/src/java/org/apache/lucene/index/SegmentReader.java
Override
protected void doClose() throws IOException {
//System.out.println("SR.close seg=" + si);
core.decRef();
}
// in lucene/core/src/java/org/apache/lucene/index/SegmentReader.java
Override
public void document(int docID, StoredFieldVisitor visitor) throws CorruptIndexException, IOException {
if (docID < 0 || docID >= maxDoc()) {
throw new IllegalArgumentException("docID must be >= 0 and < maxDoc=" + maxDoc() + " (got docID=" + docID + ")");
}
getFieldsReader().visitDocument(docID, visitor);
}
// in lucene/core/src/java/org/apache/lucene/index/SegmentReader.java
Override
public Fields fields() throws IOException {
ensureOpen();
return core.fields;
}
// in lucene/core/src/java/org/apache/lucene/index/SegmentReader.java
Override
public Fields getTermVectors(int docID) throws IOException {
TermVectorsReader termVectorsReader = getTermVectorsReader();
if (termVectorsReader == null) {
return null;
}
return termVectorsReader.get(docID);
}
// in lucene/core/src/java/org/apache/lucene/index/SegmentReader.java
Override
public DocValues docValues(String field) throws IOException {
ensureOpen();
final PerDocProducer perDoc = core.perDocProducer;
if (perDoc == null) {
return null;
}
return perDoc.docValues(field);
}
// in lucene/core/src/java/org/apache/lucene/index/SegmentReader.java
Override
public DocValues normValues(String field) throws IOException {
ensureOpen();
final PerDocProducer perDoc = core.norms;
if (perDoc == null) {
return null;
}
return perDoc.docValues(field);
}
// in lucene/core/src/java/org/apache/lucene/index/Fields.java
public long getUniqueTermCount() throws IOException {
long numTerms = 0;
FieldsEnum it = iterator();
while(true) {
String field = it.next();
if (field == null) {
break;
}
Terms terms = terms(field);
if (terms != null) {
final long termCount = terms.size();
if (termCount == -1) {
return -1;
}
numTerms += termCount;
}
}
return numTerms;
}
// in lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
public static long getLastCommitGeneration(Directory directory) throws IOException {
try {
return getLastCommitGeneration(directory.listAll());
} catch (NoSuchDirectoryException nsde) {
return -1;
}
}
// in lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
public static String getLastCommitSegmentsFileName(String[] files) throws IOException {
return IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS,
"",
getLastCommitGeneration(files));
}
// in lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
public static String getLastCommitSegmentsFileName(Directory directory) throws IOException {
return IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS,
"",
getLastCommitGeneration(directory));
}
// in lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
public final void read(Directory directory, String segmentFileName) throws CorruptIndexException, IOException {
boolean success = false;
// Clear any previous segments:
this.clear();
generation = generationFromSegmentsFileName(segmentFileName);
lastGeneration = generation;
ChecksumIndexInput input = new ChecksumIndexInput(directory.openInput(segmentFileName, IOContext.READ));
try {
// NOTE: as long as we want to throw indexformattooold (vs corruptindexexception), we need
// to read the magic ourselves.
int magic = input.readInt();
if (magic != CodecUtil.CODEC_MAGIC) {
throw new IndexFormatTooOldException(input, magic, CodecUtil.CODEC_MAGIC, CodecUtil.CODEC_MAGIC);
}
// 4.0+
CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_40, VERSION_40);
version = input.readLong();
counter = input.readInt();
int numSegments = input.readInt();
for(int seg=0;seg<numSegments;seg++) {
String segName = input.readString();
Codec codec = Codec.forName(input.readString());
//System.out.println("SIS.read seg=" + seg + " codec=" + codec);
SegmentInfo info = codec.segmentInfoFormat().getSegmentInfosReader().read(directory, segName, IOContext.READ);
info.setCodec(codec);
long delGen = input.readLong();
int delCount = input.readInt();
assert delCount <= info.getDocCount();
add(new SegmentInfoPerCommit(info, delCount, delGen));
}
userData = input.readStringStringMap();
final long checksumNow = input.getChecksum();
final long checksumThen = input.readLong();
if (checksumNow != checksumThen) {
throw new CorruptIndexException("checksum mismatch in segments file (resource: " + input + ")");
}
success = true;
} finally {
if (!success) {
// Clear any segment infos we had loaded so we
// have a clean slate on retry:
this.clear();
IOUtils.closeWhileHandlingException(input);
} else {
input.close();
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
public final void read(Directory directory) throws CorruptIndexException, IOException {
generation = lastGeneration = -1;
new FindSegmentsFile(directory) {
@Override
protected Object doBody(String segmentFileName) throws CorruptIndexException, IOException {
read(directory, segmentFileName);
return null;
}
}.run();
}
// in lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
Override
protected Object doBody(String segmentFileName) throws CorruptIndexException, IOException {
read(directory, segmentFileName);
return null;
}
// in lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
private void write(Directory directory) throws IOException {
String segmentFileName = getNextSegmentFileName();
// Always advance the generation on write:
if (generation == -1) {
generation = 1;
} else {
generation++;
}
ChecksumIndexOutput segnOutput = null;
boolean success = false;
try {
segnOutput = new ChecksumIndexOutput(directory.createOutput(segmentFileName, IOContext.DEFAULT));
CodecUtil.writeHeader(segnOutput, "segments", VERSION_40);
segnOutput.writeLong(version);
segnOutput.writeInt(counter); // write counter
segnOutput.writeInt(size()); // write infos
for (SegmentInfoPerCommit siPerCommit : this) {
SegmentInfo si = siPerCommit.info;
segnOutput.writeString(si.name);
segnOutput.writeString(si.getCodec().getName());
segnOutput.writeLong(siPerCommit.getDelGen());
segnOutput.writeInt(siPerCommit.getDelCount());
assert si.dir == directory;
assert siPerCommit.getDelCount() <= si.getDocCount();
}
segnOutput.writeStringStringMap(userData);
pendingSegnOutput = segnOutput;
success = true;
} finally {
if (!success) {
// We hit an exception above; try to close the file
// but suppress any exception:
IOUtils.closeWhileHandlingException(segnOutput);
try {
// Try not to leave a truncated segments_N file in
// the index:
directory.deleteFile(segmentFileName);
} catch (Throwable t) {
// Suppress so we keep throwing the original exception
}
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
public Object run() throws CorruptIndexException, IOException {
return run(null);
}
// in lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
public Object run(IndexCommit commit) throws CorruptIndexException, IOException {
if (commit != null) {
if (directory != commit.getDirectory())
throw new IOException("the specified commit does not match the specified Directory");
return doBody(commit.getSegmentsFileName());
}
String segmentFileName = null;
long lastGen = -1;
long gen = 0;
int genLookaheadCount = 0;
IOException exc = null;
int retryCount = 0;
boolean useFirstMethod = true;
// Loop until we succeed in calling doBody() without
// hitting an IOException. An IOException most likely
// means a commit was in process and has finished, in
// the time it took us to load the now-old infos files
// (and segments files). It's also possible it's a
// true error (corrupt index). To distinguish these,
// on each retry we must see "forward progress" on
// which generation we are trying to load. If we
// don't, then the original error is real and we throw
// it.
// We have three methods for determining the current
// generation. We try the first two in parallel (when
// useFirstMethod is true), and fall back to the third
// when necessary.
while(true) {
if (useFirstMethod) {
// List the directory and use the highest
// segments_N file. This method works well as long
// as there is no stale caching on the directory
// contents (NOTE: NFS clients often have such stale
// caching):
String[] files = null;
long genA = -1;
files = directory.listAll();
if (files != null) {
genA = getLastCommitGeneration(files);
}
if (infoStream != null) {
message("directory listing genA=" + genA);
}
// Also open segments.gen and read its
// contents. Then we take the larger of the two
// gens. This way, if either approach is hitting
// a stale cache (NFS) we have a better chance of
// getting the right generation.
long genB = -1;
IndexInput genInput = null;
try {
genInput = directory.openInput(IndexFileNames.SEGMENTS_GEN, IOContext.READONCE);
} catch (FileNotFoundException e) {
if (infoStream != null) {
message("segments.gen open: FileNotFoundException " + e);
}
} catch (IOException e) {
if (infoStream != null) {
message("segments.gen open: IOException " + e);
}
}
if (genInput != null) {
try {
int version = genInput.readInt();
if (version == FORMAT_SEGMENTS_GEN_CURRENT) {
long gen0 = genInput.readLong();
long gen1 = genInput.readLong();
if (infoStream != null) {
message("fallback check: " + gen0 + "; " + gen1);
}
if (gen0 == gen1) {
// The file is consistent.
genB = gen0;
}
} else {
throw new IndexFormatTooNewException(genInput, version, FORMAT_SEGMENTS_GEN_CURRENT, FORMAT_SEGMENTS_GEN_CURRENT);
}
} catch (IOException err2) {
// rethrow any format exception
if (err2 instanceof CorruptIndexException) throw err2;
} finally {
genInput.close();
}
}
if (infoStream != null) {
message(IndexFileNames.SEGMENTS_GEN + " check: genB=" + genB);
}
// Pick the larger of the two gen's:
gen = Math.max(genA, genB);
if (gen == -1) {
// Neither approach found a generation
throw new IndexNotFoundException("no segments* file found in " + directory + ": files: " + Arrays.toString(files));
}
}
if (useFirstMethod && lastGen == gen && retryCount >= 2) {
// Give up on first method -- this is 3rd cycle on
// listing directory and checking gen file to
// attempt to locate the segments file.
useFirstMethod = false;
}
// Second method: since both directory cache and
// file contents cache seem to be stale, just
// advance the generation.
if (!useFirstMethod) {
if (genLookaheadCount < defaultGenLookaheadCount) {
gen++;
genLookaheadCount++;
if (infoStream != null) {
message("look ahead increment gen to " + gen);
}
} else {
// All attempts have failed -- throw first exc:
throw exc;
}
} else if (lastGen == gen) {
// This means we're about to try the same
// segments_N last tried.
retryCount++;
} else {
// Segment file has advanced since our last loop
// (we made "progress"), so reset retryCount:
retryCount = 0;
}
lastGen = gen;
segmentFileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS,
"",
gen);
try {
Object v = doBody(segmentFileName);
if (infoStream != null) {
message("success on " + segmentFileName);
}
return v;
} catch (IOException err) {
// Save the original root cause:
if (exc == null) {
exc = err;
}
if (infoStream != null) {
message("primary Exception on '" + segmentFileName + "': " + err + "'; will retry: retryCount=" + retryCount + "; gen = " + gen);
}
if (gen > 1 && useFirstMethod && retryCount == 1) {
// This is our second time trying this same segments
// file (because retryCount is 1), and, there is
// possibly a segments_(N-1) (because gen > 1).
// So, check if the segments_(N-1) exists and
// try it if so:
String prevSegmentFileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS,
"",
gen-1);
final boolean prevExists;
prevExists = directory.fileExists(prevSegmentFileName);
if (prevExists) {
if (infoStream != null) {
message("fallback to prior segment file '" + prevSegmentFileName + "'");
}
try {
Object v = doBody(prevSegmentFileName);
if (infoStream != null) {
message("success on fallback " + prevSegmentFileName);
}
return v;
} catch (IOException err2) {
if (infoStream != null) {
message("secondary Exception on '" + prevSegmentFileName + "': " + err2 + "'; will retry");
}
}
}
}
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
final void rollbackCommit(Directory dir) throws IOException {
if (pendingSegnOutput != null) {
try {
pendingSegnOutput.close();
} catch (Throwable t) {
// Suppress so we keep throwing the original exception
// in our caller
}
// Must carefully compute fileName from "generation"
// since lastGeneration isn't incremented:
try {
final String segmentFileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS,
"",
generation);
dir.deleteFile(segmentFileName);
} catch (Throwable t) {
// Suppress so we keep throwing the original exception
// in our caller
}
pendingSegnOutput = null;
}
}
// in lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
final void prepareCommit(Directory dir) throws IOException {
if (pendingSegnOutput != null) {
throw new IllegalStateException("prepareCommit was already called");
}
write(dir);
}
// in lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
public Collection<String> files(Directory dir, boolean includeSegmentsFile) throws IOException {
HashSet<String> files = new HashSet<String>();
if (includeSegmentsFile) {
final String segmentFileName = getSegmentsFileName();
if (segmentFileName != null) {
/*
* TODO: if lastGen == -1 we get might get null here it seems wrong to
* add null to the files set
*/
files.add(segmentFileName);
}
}
final int size = size();
for(int i=0;i<size;i++) {
final SegmentInfoPerCommit info = info(i);
assert info.info.dir == dir;
if (info.info.dir == dir) {
files.addAll(info.files());
}
}
return files;
}
// in lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
final void finishCommit(Directory dir) throws IOException {
if (pendingSegnOutput == null) {
throw new IllegalStateException("prepareCommit was not called");
}
boolean success = false;
try {
pendingSegnOutput.finishCommit();
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(pendingSegnOutput);
rollbackCommit(dir);
} else {
pendingSegnOutput.close();
pendingSegnOutput = null;
}
}
// NOTE: if we crash here, we have left a segments_N
// file in the directory in a possibly corrupt state (if
// some bytes made it to stable storage and others
// didn't). But, the segments_N file includes checksum
// at the end, which should catch this case. So when a
// reader tries to read it, it will throw a
// CorruptIndexException, which should cause the retry
// logic in SegmentInfos to kick in and load the last
// good (previous) segments_N-1 file.
final String fileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", generation);
success = false;
try {
dir.sync(Collections.singleton(fileName));
success = true;
} finally {
if (!success) {
try {
dir.deleteFile(fileName);
} catch (Throwable t) {
// Suppress so we keep throwing the original exception
}
}
}
lastGeneration = generation;
try {
IndexOutput genOutput = dir.createOutput(IndexFileNames.SEGMENTS_GEN, IOContext.READONCE);
try {
genOutput.writeInt(FORMAT_SEGMENTS_GEN_CURRENT);
genOutput.writeLong(generation);
genOutput.writeLong(generation);
} finally {
genOutput.close();
dir.sync(Collections.singleton(IndexFileNames.SEGMENTS_GEN));
}
} catch (Throwable t) {
// It's OK if we fail to write this file since it's
// used only as one of the retry fallbacks.
try {
dir.deleteFile(IndexFileNames.SEGMENTS_GEN);
} catch (Throwable t2) {
// Ignore; this file is only used in a retry
// fallback on init.
}
if (t instanceof ThreadInterruptedException) {
throw (ThreadInterruptedException) t;
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
final void commit(Directory dir) throws IOException {
prepareCommit(dir);
finishCommit(dir);
}
// in lucene/core/src/java/org/apache/lucene/index/MultiFieldsEnum.java
Override
public String next() throws IOException {
// restore queue
for(int i=0;i<numTop;i++) {
top[i].current = top[i].fields.next();
if (top[i].current != null) {
queue.add(top[i]);
} else {
// no more fields in this sub-reader
}
}
numTop = 0;
// gather equal top fields
if (queue.size() > 0) {
while(true) {
top[numTop++] = queue.pop();
if (queue.size() == 0 || !(queue.top()).current.equals(top[0].current)) {
break;
}
}
currentField = top[0].current;
} else {
currentField = null;
}
return currentField;
}
// in lucene/core/src/java/org/apache/lucene/index/MultiFieldsEnum.java
Override
public Terms terms() throws IOException {
// Ask our parent MultiFields:
return fields.terms(currentField);
}
// in lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java
public TermsEnum reset(TermsEnumIndex[] termsEnumsIndex) throws IOException {
assert termsEnumsIndex.length <= top.length;
numSubs = 0;
numTop = 0;
termComp = null;
queue.clear();
for(int i=0;i<termsEnumsIndex.length;i++) {
final TermsEnumIndex termsEnumIndex = termsEnumsIndex[i];
assert termsEnumIndex != null;
// init our term comp
if (termComp == null) {
queue.termComp = termComp = termsEnumIndex.termsEnum.getComparator();
} else {
// We cannot merge sub-readers that have
// different TermComps
final Comparator<BytesRef> subTermComp = termsEnumIndex.termsEnum.getComparator();
if (subTermComp != null && !subTermComp.equals(termComp)) {
throw new IllegalStateException("sub-readers have different BytesRef.Comparators: " + subTermComp + " vs " + termComp + "; cannot merge");
}
}
final BytesRef term = termsEnumIndex.termsEnum.next();
if (term != null) {
final TermsEnumWithSlice entry = subs[termsEnumIndex.subIndex];
entry.reset(termsEnumIndex.termsEnum, term);
queue.add(entry);
currentSubs[numSubs++] = entry;
} else {
// field has no terms
}
}
if (queue.size() == 0) {
return TermsEnum.EMPTY;
} else {
return this;
}
}
// in lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java
Override
public boolean seekExact(BytesRef term, boolean useCache) throws IOException {
queue.clear();
numTop = 0;
boolean seekOpt = false;
if (lastSeek != null && termComp.compare(lastSeek, term) <= 0) {
seekOpt = true;
}
lastSeek = null;
lastSeekExact = true;
for(int i=0;i<numSubs;i++) {
final boolean status;
// LUCENE-2130: if we had just seek'd already, prior
// to this seek, and the new seek term is after the
// previous one, don't try to re-seek this sub if its
// current term is already beyond this new seek term.
// Doing so is a waste because this sub will simply
// seek to the same spot.
if (seekOpt) {
final BytesRef curTerm = currentSubs[i].current;
if (curTerm != null) {
final int cmp = termComp.compare(term, curTerm);
if (cmp == 0) {
status = true;
} else if (cmp < 0) {
status = false;
} else {
status = currentSubs[i].terms.seekExact(term, useCache);
}
} else {
status = false;
}
} else {
status = currentSubs[i].terms.seekExact(term, useCache);
}
if (status) {
top[numTop++] = currentSubs[i];
current = currentSubs[i].current = currentSubs[i].terms.term();
assert term.equals(currentSubs[i].current);
}
}
// if at least one sub had exact match to the requested
// term then we found match
return numTop > 0;
}
// in lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java
Override
public SeekStatus seekCeil(BytesRef term, boolean useCache) throws IOException {
queue.clear();
numTop = 0;
lastSeekExact = false;
boolean seekOpt = false;
if (lastSeek != null && termComp.compare(lastSeek, term) <= 0) {
seekOpt = true;
}
lastSeekScratch.copyBytes(term);
lastSeek = lastSeekScratch;
for(int i=0;i<numSubs;i++) {
final SeekStatus status;
// LUCENE-2130: if we had just seek'd already, prior
// to this seek, and the new seek term is after the
// previous one, don't try to re-seek this sub if its
// current term is already beyond this new seek term.
// Doing so is a waste because this sub will simply
// seek to the same spot.
if (seekOpt) {
final BytesRef curTerm = currentSubs[i].current;
if (curTerm != null) {
final int cmp = termComp.compare(term, curTerm);
if (cmp == 0) {
status = SeekStatus.FOUND;
} else if (cmp < 0) {
status = SeekStatus.NOT_FOUND;
} else {
status = currentSubs[i].terms.seekCeil(term, useCache);
}
} else {
status = SeekStatus.END;
}
} else {
status = currentSubs[i].terms.seekCeil(term, useCache);
}
if (status == SeekStatus.FOUND) {
top[numTop++] = currentSubs[i];
current = currentSubs[i].current = currentSubs[i].terms.term();
} else {
if (status == SeekStatus.NOT_FOUND) {
currentSubs[i].current = currentSubs[i].terms.term();
assert currentSubs[i].current != null;
queue.add(currentSubs[i]);
} else {
// enum exhausted
currentSubs[i].current = null;
}
}
}
if (numTop > 0) {
// at least one sub had exact match to the requested term
return SeekStatus.FOUND;
} else if (queue.size() > 0) {
// no sub had exact match, but at least one sub found
// a term after the requested term -- advance to that
// next term:
pullTop();
return SeekStatus.NOT_FOUND;
} else {
return SeekStatus.END;
}
}
// in lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java
Override
public void seekExact(long ord) throws IOException {
throw new UnsupportedOperationException();
}
// in lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java
Override
public long ord() throws IOException {
throw new UnsupportedOperationException();
}
// in lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java
private void pushTop() throws IOException {
// call next() on each top, and put back into queue
for(int i=0;i<numTop;i++) {
top[i].current = top[i].terms.next();
if (top[i].current != null) {
queue.add(top[i]);
} else {
// no more fields in this reader
}
}
numTop = 0;
}
// in lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java
Override
public BytesRef next() throws IOException {
if (lastSeekExact) {
// Must seekCeil at this point, so those subs that
// didn't have the term can find the following term.
// NOTE: we could save some CPU by only seekCeil the
// subs that didn't match the last exact seek... but
// most impls short-circuit if you seekCeil to term
// they are already on.
final SeekStatus status = seekCeil(current);
assert status == SeekStatus.FOUND;
lastSeekExact = false;
}
lastSeek = null;
// restore queue
pushTop();
// gather equal top fields
if (queue.size() > 0) {
pullTop();
} else {
current = null;
}
return current;
}
// in lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java
Override
public int docFreq() throws IOException {
int sum = 0;
for(int i=0;i<numTop;i++) {
sum += top[i].terms.docFreq();
}
return sum;
}
// in lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java
Override
public long totalTermFreq() throws IOException {
long sum = 0;
for(int i=0;i<numTop;i++) {
final long v = top[i].terms.totalTermFreq();
if (v == -1) {
return v;
}
sum += v;
}
return sum;
}
// in lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java
Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
MultiDocsEnum docsEnum;
// Can only reuse if incoming enum is also a MultiDocsEnum
if (reuse != null && reuse instanceof MultiDocsEnum) {
docsEnum = (MultiDocsEnum) reuse;
// ... and was previously created w/ this MultiTermsEnum:
if (!docsEnum.canReuse(this)) {
docsEnum = new MultiDocsEnum(this, subs.length);
}
} else {
docsEnum = new MultiDocsEnum(this, subs.length);
}
final MultiBits multiLiveDocs;
if (liveDocs instanceof MultiBits) {
multiLiveDocs = (MultiBits) liveDocs;
} else {
multiLiveDocs = null;
}
int upto = 0;
for(int i=0;i<numTop;i++) {
final TermsEnumWithSlice entry = top[i];
final Bits b;
if (multiLiveDocs != null) {
// optimize for common case: requested skip docs is a
// congruent sub-slice of MultiBits: in this case, we
// just pull the liveDocs from the sub reader, rather
// than making the inefficient
// Slice(Multi(sub-readers)):
final MultiBits.SubResult sub = multiLiveDocs.getMatchingSub(entry.subSlice);
if (sub.matches) {
b = sub.result;
} else {
// custom case: requested skip docs is foreign:
// must slice it on every access
b = new BitsSlice(liveDocs, entry.subSlice);
}
} else if (liveDocs != null) {
b = new BitsSlice(liveDocs, entry.subSlice);
} else {
// no deletions
b = null;
}
assert entry.index < docsEnum.subDocsEnum.length: entry.index + " vs " + docsEnum.subDocsEnum.length + "; " + subs.length;
final DocsEnum subDocsEnum = entry.terms.docs(b, docsEnum.subDocsEnum[entry.index], needsFreqs);
if (subDocsEnum != null) {
docsEnum.subDocsEnum[entry.index] = subDocsEnum;
subDocs[upto].docsEnum = subDocsEnum;
subDocs[upto].slice = entry.subSlice;
upto++;
} else {
// One of our subs cannot provide freqs:
assert needsFreqs;
return null;
}
}
if (upto == 0) {
return null;
} else {
return docsEnum.reset(subDocs, upto);
}
}
// in lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java
Override
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
MultiDocsAndPositionsEnum docsAndPositionsEnum;
// Can only reuse if incoming enum is also a MultiDocsAndPositionsEnum
if (reuse != null && reuse instanceof MultiDocsAndPositionsEnum) {
docsAndPositionsEnum = (MultiDocsAndPositionsEnum) reuse;
// ... and was previously created w/ this MultiTermsEnum:
if (!docsAndPositionsEnum.canReuse(this)) {
docsAndPositionsEnum = new MultiDocsAndPositionsEnum(this, subs.length);
}
} else {
docsAndPositionsEnum = new MultiDocsAndPositionsEnum(this, subs.length);
}
final MultiBits multiLiveDocs;
if (liveDocs instanceof MultiBits) {
multiLiveDocs = (MultiBits) liveDocs;
} else {
multiLiveDocs = null;
}
int upto = 0;
for(int i=0;i<numTop;i++) {
final TermsEnumWithSlice entry = top[i];
final Bits b;
if (multiLiveDocs != null) {
// Optimize for common case: requested skip docs is a
// congruent sub-slice of MultiBits: in this case, we
// just pull the liveDocs from the sub reader, rather
// than making the inefficient
// Slice(Multi(sub-readers)):
final MultiBits.SubResult sub = multiLiveDocs.getMatchingSub(top[i].subSlice);
if (sub.matches) {
b = sub.result;
} else {
// custom case: requested skip docs is foreign:
// must slice it on every access (very
// inefficient)
b = new BitsSlice(liveDocs, top[i].subSlice);
}
} else if (liveDocs != null) {
b = new BitsSlice(liveDocs, top[i].subSlice);
} else {
// no deletions
b = null;
}
assert entry.index < docsAndPositionsEnum.subDocsAndPositionsEnum.length: entry.index + " vs " + docsAndPositionsEnum.subDocsAndPositionsEnum.length + "; " + subs.length;
final DocsAndPositionsEnum subPostings = entry.terms.docsAndPositions(b, docsAndPositionsEnum.subDocsAndPositionsEnum[entry.index], needsOffsets);
if (subPostings != null) {
docsAndPositionsEnum.subDocsAndPositionsEnum[entry.index] = subPostings;
subDocsAndPositions[upto].docsAndPositionsEnum = subPostings;
subDocsAndPositions[upto].slice = entry.subSlice;
upto++;
} else {
if (entry.terms.docs(b, null, false) != null) {
// At least one of our subs does not store
// offsets or positions -- we can't correctly
// produce a MultiDocsAndPositions enum
return null;
}
}
}
if (upto == 0) {
return null;
} else {
return docsAndPositionsEnum.reset(subDocsAndPositions, upto);
}
}
// in lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
public DocValues pull(AtomicReader reader, String field) throws IOException {
return reader.normValues(field);
}
// in lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
public boolean stopLoadingOnNull(AtomicReader reader, String field) throws IOException {
// for norms we drop all norms if one leaf reader has no norms and the field is present
FieldInfos fieldInfos = reader.getFieldInfos();
FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
return fieldInfo != null && fieldInfo.omitsNorms();
}
// in lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
public DocValues pull(AtomicReader reader, String field) throws IOException {
return reader.docValues(field);
}
// in lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
public boolean stopLoadingOnNull(AtomicReader reader, String field) throws IOException {
return false;
}
// in lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
public static DocValues getDocValues(IndexReader r, final String field) throws IOException {
return getDocValues(r, field, DEFAULT_PULLER);
}
// in lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
public static DocValues getNormDocValues(IndexReader r, final String field) throws IOException {
return getDocValues(r, field, NORMS_PULLER);
}
// in lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
private static DocValues getDocValues(IndexReader r, final String field, final DocValuesPuller puller) throws IOException {
if (r instanceof AtomicReader) {
// already an atomic reader
return puller.pull((AtomicReader) r, field);
}
assert r instanceof CompositeReader;
final IndexReader[] subs = ((CompositeReader) r).getSequentialSubReaders();
if (subs.length == 0) {
// no fields
return null;
} else if (subs.length == 1) {
return getDocValues(subs[0], field, puller);
} else {
final List<DocValuesSlice> slices = new ArrayList<DocValuesSlice>();
final TypePromoter promotedType[] = new TypePromoter[1];
promotedType[0] = TypePromoter.getIdentityPromoter();
// gather all docvalues fields, accumulating a promoted type across
// potentially incompatible types
new ReaderUtil.Gather(r) {
boolean stop = false;
@Override
protected void add(int base, AtomicReader r) throws IOException {
if (stop) {
return;
}
final DocValues d = puller.pull(r, field);
if (d != null) {
TypePromoter incoming = TypePromoter.create(d.getType(), d.getValueSize());
promotedType[0] = promotedType[0].promote(incoming);
} else if (puller.stopLoadingOnNull(r, field)){
promotedType[0] = TypePromoter.getIdentityPromoter(); // set to identity to return null
stop = true;
}
slices.add(new DocValuesSlice(d, base, r.maxDoc()));
}
}.run();
// return null if no docvalues encountered anywhere
if (promotedType[0] == TypePromoter.getIdentityPromoter()) {
return null;
}
// populate starts and fill gaps with empty docvalues
int starts[] = new int[slices.size()];
for (int i = 0; i < slices.size(); i++) {
DocValuesSlice slice = slices.get(i);
starts[i] = slice.start;
if (slice.docValues == null) {
Type promoted = promotedType[0].type();
switch(promoted) {
case BYTES_FIXED_DEREF:
case BYTES_FIXED_STRAIGHT:
case BYTES_FIXED_SORTED:
assert promotedType[0].getValueSize() >= 0;
slice.docValues = new EmptyFixedDocValues(slice.length, promoted, promotedType[0].getValueSize());
break;
default:
slice.docValues = new EmptyDocValues(slice.length, promoted);
}
}
}
return new MultiDocValues(slices.toArray(new DocValuesSlice[slices.size()]), starts, promotedType[0]);
}
}
// in lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
Override
protected void add(int base, AtomicReader r) throws IOException {
if (stop) {
return;
}
final DocValues d = puller.pull(r, field);
if (d != null) {
TypePromoter incoming = TypePromoter.create(d.getType(), d.getValueSize());
promotedType[0] = promotedType[0].promote(incoming);
} else if (puller.stopLoadingOnNull(r, field)){
promotedType[0] = TypePromoter.getIdentityPromoter(); // set to identity to return null
stop = true;
}
slices.add(new DocValuesSlice(d, base, r.maxDoc()));
}
// in lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
Override
public Source load() throws IOException {
return new MultiSource(slices, starts, false, type);
}
// in lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
Override
public Source load() throws IOException {
return emptySource;
}
// in lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
Override
public Source getDirectSource() throws IOException {
return emptySource;
}
// in lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
Override
public Source load() throws IOException {
return emptyFixedSource;
}
// in lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
Override
public Source getDirectSource() throws IOException {
return emptyFixedSource;
}
// in lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
Override
public void consume(BytesRef ref, int ord, long offset) throws IOException {
pagedBytes.copy(ref);
if (ordToOffset != null) {
if (ord+1 >= ordToOffset.length) {
ordToOffset = ArrayUtil.grow(ordToOffset, ord + 2);
}
ordToOffset[ord+1] = offset;
}
}
// in lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
Override
public Source getDirectSource() throws IOException {
return new MultiSource(slices, starts, true, type);
}
// in lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java
public synchronized SegmentReader getReader(IOContext context) throws IOException {
//System.out.println(" livedocs=" + rld.liveDocs);
if (reader == null) {
// We steal returned ref:
reader = new SegmentReader(info, writer.getConfig().getReaderTermsIndexDivisor(), context);
if (liveDocs == null) {
liveDocs = reader.getLiveDocs();
}
//System.out.println("ADD seg=" + rld.info + " isMerge=" + isMerge + " " + readerMap.size() + " in pool");
//System.out.println(Thread.currentThread().getName() + ": getReader seg=" + info.name);
}
// Ref for caller
reader.incRef();
return reader;
}
// in lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java
public synchronized SegmentReader getMergeReader(IOContext context) throws IOException {
//System.out.println(" livedocs=" + rld.liveDocs);
if (mergeReader == null) {
if (reader != null) {
// Just use the already opened non-merge reader
// for merging. In the NRT case this saves us
// pointless double-open:
//System.out.println("PROMOTE non-merge reader seg=" + rld.info);
// Ref for us:
reader.incRef();
mergeReader = reader;
//System.out.println(Thread.currentThread().getName() + ": getMergeReader share seg=" + info.name);
} else {
//System.out.println(Thread.currentThread().getName() + ": getMergeReader seg=" + info.name);
// We steal returned ref:
mergeReader = new SegmentReader(info, -1, context);
if (liveDocs == null) {
liveDocs = mergeReader.getLiveDocs();
}
}
}
// Ref for caller
mergeReader.incRef();
return mergeReader;
}
// in lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java
public synchronized void release(SegmentReader sr) throws IOException {
assert info == sr.getSegmentInfo();
sr.decRef();
}
// in lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java
public synchronized void dropReaders() throws IOException {
if (reader != null) {
//System.out.println(" pool.drop info=" + info + " rc=" + reader.getRefCount());
reader.decRef();
reader = null;
}
if (mergeReader != null) {
//System.out.println(" pool.drop info=" + info + " merge rc=" + mergeReader.getRefCount());
mergeReader.decRef();
mergeReader = null;
}
decRef();
}
// in lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java
public synchronized SegmentReader getReadOnlyClone(IOContext context) throws IOException {
if (reader == null) {
getReader(context).decRef();
assert reader != null;
}
shared = true;
if (liveDocs != null) {
return new SegmentReader(reader.getSegmentInfo(), reader.core, liveDocs, info.info.getDocCount() - info.getDelCount() - pendingDeleteCount);
} else {
assert reader.getLiveDocs() == liveDocs;
reader.incRef();
return reader;
}
}
// in lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java
public synchronized void initWritableLiveDocs() throws IOException {
assert Thread.holdsLock(writer);
assert info.info.getDocCount() > 0;
//System.out.println("initWritableLivedocs seg=" + info + " liveDocs=" + liveDocs + " shared=" + shared);
if (shared) {
// Copy on write: this means we've cloned a
// SegmentReader sharing the current liveDocs
// instance; must now make a private clone so we can
// change it:
LiveDocsFormat liveDocsFormat = info.info.getCodec().liveDocsFormat();
if (liveDocs == null) {
//System.out.println("create BV seg=" + info);
liveDocs = liveDocsFormat.newLiveDocs(info.info.getDocCount());
} else {
liveDocs = liveDocsFormat.newLiveDocs(liveDocs);
}
shared = false;
} else {
assert liveDocs != null;
}
}
// in lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java
public synchronized boolean writeLiveDocs(Directory dir) throws IOException {
//System.out.println("rld.writeLiveDocs seg=" + info + " pendingDelCount=" + pendingDeleteCount);
if (pendingDeleteCount != 0) {
// We have new deletes
assert liveDocs.length() == info.info.getDocCount();
// We can write directly to the actual name (vs to a
// .tmp & renaming it) because the file is not live
// until segments file is written:
info.info.getCodec().liveDocsFormat().writeLiveDocs((MutableBits)liveDocs, dir, info, pendingDeleteCount, IOContext.DEFAULT);
// If we hit an exc in the line above (eg disk full)
// then info remains pointing to the previous
// (successfully written) del docs:
info.advanceDelGen();
info.setDelCount(info.getDelCount() + pendingDeleteCount);
pendingDeleteCount = 0;
return true;
} else {
return false;
}
}
// in lucene/core/src/java/org/apache/lucene/index/Terms.java
public TermsEnum intersect(CompiledAutomaton compiled, final BytesRef startTerm) throws IOException {
// TODO: eventually we could support seekCeil/Exact on
// the returned enum, instead of only being able to seek
// at the start
if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
}
if (startTerm == null) {
return new AutomatonTermsEnum(iterator(null), compiled);
} else {
return new AutomatonTermsEnum(iterator(null), compiled) {
@Override
protected BytesRef nextSeekTerm(BytesRef term) throws IOException {
if (term == null) {
term = startTerm;
}
return super.nextSeekTerm(term);
}
};
}
}
// in lucene/core/src/java/org/apache/lucene/index/Terms.java
Override
protected BytesRef nextSeekTerm(BytesRef term) throws IOException {
if (term == null) {
term = startTerm;
}
return super.nextSeekTerm(term);
}
// in lucene/core/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java
Override
public MergeSpecification findMerges(SegmentInfos segmentInfos) throws CorruptIndexException, IOException {
return base.findMerges(segmentInfos);
}
// in lucene/core/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java
Override
public MergeSpecification findForcedMerges(SegmentInfos segmentInfos, int maxSegmentCount, Map<SegmentInfoPerCommit,Boolean> segmentsToMerge) throws CorruptIndexException, IOException {
// first find all old segments
final Map<SegmentInfoPerCommit,Boolean> oldSegments = new HashMap<SegmentInfoPerCommit,Boolean>();
for (final SegmentInfoPerCommit si : segmentInfos) {
final Boolean v = segmentsToMerge.get(si);
if (v != null && shouldUpgradeSegment(si)) {
oldSegments.put(si, v);
}
}
if (verbose()) {
message("findForcedMerges: segmentsToUpgrade=" + oldSegments);
}
if (oldSegments.isEmpty())
return null;
MergeSpecification spec = base.findForcedMerges(segmentInfos, maxSegmentCount, oldSegments);
if (spec != null) {
// remove all segments that are in merge specification from oldSegments,
// the resulting set contains all segments that are left over
// and will be merged to one additional segment:
for (final OneMerge om : spec.merges) {
oldSegments.keySet().removeAll(om.segments);
}
}
if (!oldSegments.isEmpty()) {
if (verbose()) {
message("findForcedMerges: " + base.getClass().getSimpleName() +
" does not want to merge all old segments, merge remaining ones into new segment: " + oldSegments);
}
final List<SegmentInfoPerCommit> newInfos = new ArrayList<SegmentInfoPerCommit>();
for (final SegmentInfoPerCommit si : segmentInfos) {
if (oldSegments.containsKey(si)) {
newInfos.add(si);
}
}
// add the final merge
if (spec == null) {
spec = new MergeSpecification();
}
spec.add(new OneMerge(newInfos));
}
return spec;
}
// in lucene/core/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java
Override
public MergeSpecification findForcedDeletesMerges(SegmentInfos segmentInfos) throws CorruptIndexException, IOException {
return base.findForcedDeletesMerges(segmentInfos);
}
// in lucene/core/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java
Override
public boolean useCompoundFile(SegmentInfos segments, SegmentInfoPerCommit newSegment) throws IOException {
return base.useCompoundFile(segments, newSegment);
}
// in lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java
synchronized void deleteQueries(final Query... queries) throws IOException {
deleteQueue.addDelete(queries);
flushControl.doOnDelete();
if (flushControl.doApplyAllDeletes()) {
applyAllDeletes(deleteQueue);
}
}
// in lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java
synchronized void deleteTerms(final Term... terms) throws IOException {
final DocumentsWriterDeleteQueue deleteQueue = this.deleteQueue;
deleteQueue.addDelete(terms);
flushControl.doOnDelete();
if (flushControl.doApplyAllDeletes()) {
applyAllDeletes(deleteQueue);
}
}
// in lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java
private void applyAllDeletes(DocumentsWriterDeleteQueue deleteQueue) throws IOException {
if (deleteQueue != null && !flushControl.isFullFlush()) {
ticketQueue.addDeletesAndPurge(this, deleteQueue);
}
indexWriter.applyAllDeletes();
indexWriter.flushCount.incrementAndGet();
}
// in lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java
synchronized void abort() throws IOException {
boolean success = false;
synchronized (this) {
deleteQueue.clear();
}
try {
if (infoStream.isEnabled("DW")) {
infoStream.message("DW", "abort");
}
final int limit = perThreadPool.getActiveThreadState();
for (int i = 0; i < limit; i++) {
final ThreadState perThread = perThreadPool.getThreadState(i);
perThread.lock();
try {
if (perThread.isActive()) { // we might be closed
try {
perThread.dwpt.abort();
} catch (IOException ex) {
// continue
} finally {
perThread.dwpt.checkAndResetHasAborted();
flushControl.doOnAbort(perThread);
}
} else {
assert closed;
}
} finally {
perThread.unlock();
}
}
success = true;
} finally {
if (infoStream.isEnabled("DW")) {
infoStream.message("DW", "done abort; abortedFiles=" + abortedFiles + " success=" + success);
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java
private boolean preUpdate() throws CorruptIndexException, IOException {
ensureOpen();
boolean maybeMerge = false;
if (flushControl.anyStalledThreads() || flushControl.numQueuedFlushes() > 0) {
// Help out flushing any queued DWPTs so we can un-stall:
if (infoStream.isEnabled("DW")) {
infoStream.message("DW", "DocumentsWriter has queued dwpt; will hijack this thread to flush pending segment(s)");
}
do {
// Try pick up pending threads here if possible
DocumentsWriterPerThread flushingDWPT;
while ((flushingDWPT = flushControl.nextPendingFlush()) != null) {
// Don't push the delete here since the update could fail!
maybeMerge |= doFlush(flushingDWPT);
}
if (infoStream.isEnabled("DW")) {
if (flushControl.anyStalledThreads()) {
infoStream.message("DW", "WARNING DocumentsWriter has stalled threads; waiting");
}
}
flushControl.waitIfStalled(); // block if stalled
} while (flushControl.numQueuedFlushes() != 0); // still queued DWPTs try help flushing
if (infoStream.isEnabled("DW")) {
infoStream.message("DW", "continue indexing after helping out flushing DocumentsWriter is healthy");
}
}
return maybeMerge;
}
// in lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java
private boolean postUpdate(DocumentsWriterPerThread flushingDWPT, boolean maybeMerge) throws IOException {
if (flushControl.doApplyAllDeletes()) {
applyAllDeletes(deleteQueue);
}
if (flushingDWPT != null) {
maybeMerge |= doFlush(flushingDWPT);
} else {
final DocumentsWriterPerThread nextPendingFlush = flushControl.nextPendingFlush();
if (nextPendingFlush != null) {
maybeMerge |= doFlush(nextPendingFlush);
}
}
return maybeMerge;
}
// in lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java
boolean updateDocuments(final Iterable<? extends Iterable<? extends IndexableField>> docs, final Analyzer analyzer,
final Term delTerm) throws CorruptIndexException, IOException {
boolean maybeMerge = preUpdate();
final ThreadState perThread = flushControl.obtainAndLock();
final DocumentsWriterPerThread flushingDWPT;
try {
if (!perThread.isActive()) {
ensureOpen();
assert false: "perThread is not active but we are still open";
}
final DocumentsWriterPerThread dwpt = perThread.dwpt;
try {
final int docCount = dwpt.updateDocuments(docs, analyzer, delTerm);
numDocsInRAM.addAndGet(docCount);
} finally {
if (dwpt.checkAndResetHasAborted()) {
flushControl.doOnAbort(perThread);
}
}
final boolean isUpdate = delTerm != null;
flushingDWPT = flushControl.doAfterDocument(perThread, isUpdate);
} finally {
perThread.unlock();
}
return postUpdate(flushingDWPT, maybeMerge);
}
// in lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java
boolean updateDocument(final Iterable<? extends IndexableField> doc, final Analyzer analyzer,
final Term delTerm) throws CorruptIndexException, IOException {
boolean maybeMerge = preUpdate();
final ThreadState perThread = flushControl.obtainAndLock();
final DocumentsWriterPerThread flushingDWPT;
try {
if (!perThread.isActive()) {
ensureOpen();
assert false: "perThread is not active but we are still open";
}
final DocumentsWriterPerThread dwpt = perThread.dwpt;
try {
dwpt.updateDocument(doc, analyzer, delTerm);
numDocsInRAM.incrementAndGet();
} finally {
if (dwpt.checkAndResetHasAborted()) {
flushControl.doOnAbort(perThread);
}
}
final boolean isUpdate = delTerm != null;
flushingDWPT = flushControl.doAfterDocument(perThread, isUpdate);
} finally {
perThread.unlock();
}
return postUpdate(flushingDWPT, maybeMerge);
}
// in lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java
private boolean doFlush(DocumentsWriterPerThread flushingDWPT) throws IOException {
boolean maybeMerge = false;
while (flushingDWPT != null) {
maybeMerge = true;
boolean success = false;
SegmentFlushTicket ticket = null;
try {
assert currentFullFlushDelQueue == null
|| flushingDWPT.deleteQueue == currentFullFlushDelQueue : "expected: "
+ currentFullFlushDelQueue + "but was: " + flushingDWPT.deleteQueue
+ " " + flushControl.isFullFlush();
/*
* Since with DWPT the flush process is concurrent and several DWPT
* could flush at the same time we must maintain the order of the
* flushes before we can apply the flushed segment and the frozen global
* deletes it is buffering. The reason for this is that the global
* deletes mark a certain point in time where we took a DWPT out of
* rotation and freeze the global deletes.
*
* Example: A flush 'A' starts and freezes the global deletes, then
* flush 'B' starts and freezes all deletes occurred since 'A' has
* started. if 'B' finishes before 'A' we need to wait until 'A' is done
* otherwise the deletes frozen by 'B' are not applied to 'A' and we
* might miss to deletes documents in 'A'.
*/
try {
// Each flush is assigned a ticket in the order they acquire the ticketQueue lock
ticket = ticketQueue.addFlushTicket(flushingDWPT);
// flush concurrently without locking
final FlushedSegment newSegment = flushingDWPT.flush();
ticketQueue.addSegment(ticket, newSegment);
// flush was successful once we reached this point - new seg. has been assigned to the ticket!
success = true;
} finally {
if (!success && ticket != null) {
// In the case of a failure make sure we are making progress and
// apply all the deletes since the segment flush failed since the flush
// ticket could hold global deletes see FlushTicket#canPublish()
ticketQueue.markTicketFailed(ticket);
}
}
/*
* Now we are done and try to flush the ticket queue if the head of the
* queue has already finished the flush.
*/
ticketQueue.tryPurge(this);
} finally {
flushControl.doAfterFlush(flushingDWPT);
flushingDWPT.checkAndResetHasAborted();
indexWriter.flushCount.incrementAndGet();
indexWriter.doAfterFlush();
}
flushingDWPT = flushControl.nextPendingFlush();
}
// If deletes alone are consuming > 1/2 our RAM
// buffer, force them all to apply now. This is to
// prevent too-frequent flushing of a long tail of
// tiny segments:
final double ramBufferSizeMB = indexWriter.getConfig().getRAMBufferSizeMB();
if (ramBufferSizeMB != IndexWriterConfig.DISABLE_AUTO_FLUSH &&
flushControl.getDeleteBytesUsed() > (1024*1024*ramBufferSizeMB/2)) {
if (infoStream.isEnabled("DW")) {
infoStream.message("DW", "force apply deletes bytesUsed=" + flushControl.getDeleteBytesUsed() + " vs ramBuffer=" + (1024*1024*ramBufferSizeMB));
}
applyAllDeletes(deleteQueue);
}
return maybeMerge;
}
// in lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java
void finishFlush(FlushedSegment newSegment, FrozenBufferedDeletes bufferedDeletes)
throws IOException {
// Finish the flushed segment and publish it to IndexWriter
if (newSegment == null) {
assert bufferedDeletes != null;
if (bufferedDeletes != null && bufferedDeletes.any()) {
indexWriter.publishFrozenDeletes(bufferedDeletes);
if (infoStream.isEnabled("DW")) {
infoStream.message("DW", "flush: push buffered deletes: " + bufferedDeletes);
}
}
} else {
publishFlushedSegment(newSegment, bufferedDeletes);
}
}
// in lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java
private void publishFlushedSegment(FlushedSegment newSegment, FrozenBufferedDeletes globalPacket)
throws IOException {
assert newSegment != null;
assert newSegment.segmentInfo != null;
final SegmentInfoPerCommit segInfo = indexWriter.prepareFlushedSegment(newSegment);
final BufferedDeletes deletes = newSegment.segmentDeletes;
if (infoStream.isEnabled("DW")) {
infoStream.message("DW", Thread.currentThread().getName() + ": publishFlushedSegment seg-private deletes=" + deletes);
}
FrozenBufferedDeletes packet = null;
if (deletes != null && deletes.any()) {
// Segment private delete
packet = new FrozenBufferedDeletes(deletes, true);
if (infoStream.isEnabled("DW")) {
infoStream.message("DW", "flush: push buffered seg private deletes: " + packet);
}
}
// now publish!
indexWriter.publishFlushedSegment(segInfo, packet, globalPacket);
}
// in lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java
final boolean flushAllThreads()
throws IOException {
final DocumentsWriterDeleteQueue flushingDeleteQueue;
if (infoStream.isEnabled("DW")) {
infoStream.message("DW", Thread.currentThread().getName() + " startFullFlush");
}
synchronized (this) {
pendingChangesInCurrentFullFlush = anyChanges();
flushingDeleteQueue = deleteQueue;
/* Cutover to a new delete queue. This must be synced on the flush control
* otherwise a new DWPT could sneak into the loop with an already flushing
* delete queue */
flushControl.markForFullFlush(); // swaps the delQueue synced on FlushControl
assert setFlushingDeleteQueue(flushingDeleteQueue);
}
assert currentFullFlushDelQueue != null;
assert currentFullFlushDelQueue != deleteQueue;
boolean anythingFlushed = false;
try {
DocumentsWriterPerThread flushingDWPT;
// Help out with flushing:
while ((flushingDWPT = flushControl.nextPendingFlush()) != null) {
anythingFlushed |= doFlush(flushingDWPT);
}
// If a concurrent flush is still in flight wait for it
flushControl.waitForFlush();
if (!anythingFlushed && flushingDeleteQueue.anyChanges()) { // apply deletes if we did not flush any document
if (infoStream.isEnabled("DW")) {
infoStream.message("DW", Thread.currentThread().getName() + ": flush naked frozen global deletes");
}
ticketQueue.addDeletesAndPurge(this, flushingDeleteQueue);
} else {
ticketQueue.forcePurge(this);
}
assert !flushingDeleteQueue.anyChanges() && !ticketQueue.hasTickets();
} finally {
assert flushingDeleteQueue == currentFullFlushDelQueue;
}
return anythingFlushed;
}
// in lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java
Override
public boolean useCompoundFile(SegmentInfos infos, SegmentInfoPerCommit mergedInfo) throws IOException {
final boolean doCFS;
if (!useCompoundFile) {
doCFS = false;
} else if (noCFSRatio == 1.0) {
doCFS = true;
} else {
long totalSize = 0;
for (SegmentInfoPerCommit info : infos) {
totalSize += size(info);
}
doCFS = size(mergedInfo) <= noCFSRatio * totalSize;
}
return doCFS;
}
// in lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java
protected long sizeDocs(SegmentInfoPerCommit info) throws IOException {
if (calibrateSizeByDeletes) {
int delCount = writer.get().numDeletedDocs(info);
assert delCount <= info.info.getDocCount();
return (info.info.getDocCount() - (long)delCount);
} else {
return info.info.getDocCount();
}
}
// in lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java
protected long sizeBytes(SegmentInfoPerCommit info) throws IOException {
long byteSize = info.sizeInBytes();
if (calibrateSizeByDeletes) {
int delCount = writer.get().numDeletedDocs(info);
double delRatio = (info.info.getDocCount() <= 0 ? 0.0f : ((float)delCount / (float)info.info.getDocCount()));
assert delRatio <= 1.0;
return (info.info.getDocCount() <= 0 ? byteSize : (long)(byteSize * (1.0 - delRatio)));
} else {
return byteSize;
}
}
// in lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java
protected boolean isMerged(SegmentInfos infos, int maxNumSegments, Map<SegmentInfoPerCommit,Boolean> segmentsToMerge) throws IOException {
final int numSegments = infos.size();
int numToMerge = 0;
SegmentInfoPerCommit mergeInfo = null;
boolean segmentIsOriginal = false;
for(int i=0;i<numSegments && numToMerge <= maxNumSegments;i++) {
final SegmentInfoPerCommit info = infos.info(i);
final Boolean isOriginal = segmentsToMerge.get(info);
if (isOriginal != null) {
segmentIsOriginal = isOriginal;
numToMerge++;
mergeInfo = info;
}
}
return numToMerge <= maxNumSegments &&
(numToMerge != 1 || !segmentIsOriginal || isMerged(mergeInfo));
}
// in lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java
protected boolean isMerged(SegmentInfoPerCommit info)
throws IOException {
IndexWriter w = writer.get();
assert w != null;
boolean hasDeletions = w.numDeletedDocs(info) > 0;
return !hasDeletions &&
info.info.dir == w.getDirectory() &&
(info.info.getUseCompoundFile() == useCompoundFile || noCFSRatio < 1.0);
}
// in lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java
private MergeSpecification findForcedMergesSizeLimit(
SegmentInfos infos, int maxNumSegments, int last) throws IOException {
MergeSpecification spec = new MergeSpecification();
final List<SegmentInfoPerCommit> segments = infos.asList();
int start = last - 1;
while (start >= 0) {
SegmentInfoPerCommit info = infos.info(start);
if (size(info) > maxMergeSizeForForcedMerge || sizeDocs(info) > maxMergeDocs) {
if (verbose()) {
message("findForcedMergesSizeLimit: skip segment=" + info + ": size is > maxMergeSize (" + maxMergeSizeForForcedMerge + ") or sizeDocs is > maxMergeDocs (" + maxMergeDocs + ")");
}
// need to skip that segment + add a merge for the 'right' segments,
// unless there is only 1 which is merged.
if (last - start - 1 > 1 || (start != last - 1 && !isMerged(infos.info(start + 1)))) {
// there is more than 1 segment to the right of
// this one, or a mergeable single segment.
spec.add(new OneMerge(segments.subList(start + 1, last)));
}
last = start;
} else if (last - start == mergeFactor) {
// mergeFactor eligible segments were found, add them as a merge.
spec.add(new OneMerge(segments.subList(start, last)));
last = start;
}
--start;
}
// Add any left-over segments, unless there is just 1
// already fully merged
if (last > 0 && (++start + 1 < last || !isMerged(infos.info(start)))) {
spec.add(new OneMerge(segments.subList(start, last)));
}
return spec.merges.size() == 0 ? null : spec;
}
// in lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java
private MergeSpecification findForcedMergesMaxNumSegments(SegmentInfos infos, int maxNumSegments, int last) throws IOException {
MergeSpecification spec = new MergeSpecification();
final List<SegmentInfoPerCommit> segments = infos.asList();
// First, enroll all "full" merges (size
// mergeFactor) to potentially be run concurrently:
while (last - maxNumSegments + 1 >= mergeFactor) {
spec.add(new OneMerge(segments.subList(last - mergeFactor, last)));
last -= mergeFactor;
}
// Only if there are no full merges pending do we
// add a final partial (< mergeFactor segments) merge:
if (0 == spec.merges.size()) {
if (maxNumSegments == 1) {
// Since we must merge down to 1 segment, the
// choice is simple:
if (last > 1 || !isMerged(infos.info(0))) {
spec.add(new OneMerge(segments.subList(0, last)));
}
} else if (last > maxNumSegments) {
// Take care to pick a partial merge that is
// least cost, but does not make the index too
// lopsided. If we always just picked the
// partial tail then we could produce a highly
// lopsided index over time:
// We must merge this many segments to leave
// maxNumSegments in the index (from when
// forceMerge was first kicked off):
final int finalMergeSize = last - maxNumSegments + 1;
// Consider all possible starting points:
long bestSize = 0;
int bestStart = 0;
for(int i=0;i<last-finalMergeSize+1;i++) {
long sumSize = 0;
for(int j=0;j<finalMergeSize;j++) {
sumSize += size(infos.info(j+i));
}
if (i == 0 || (sumSize < 2*size(infos.info(i-1)) && sumSize < bestSize)) {
bestStart = i;
bestSize = sumSize;
}
}
spec.add(new OneMerge(segments.subList(bestStart, bestStart + finalMergeSize)));
}
}
return spec.merges.size() == 0 ? null : spec;
}
// in lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java
Override
public MergeSpecification findForcedMerges(SegmentInfos infos,
int maxNumSegments, Map<SegmentInfoPerCommit,Boolean> segmentsToMerge) throws IOException {
assert maxNumSegments > 0;
if (verbose()) {
message("findForcedMerges: maxNumSegs=" + maxNumSegments + " segsToMerge="+ segmentsToMerge);
}
// If the segments are already merged (e.g. there's only 1 segment), or
// there are <maxNumSegements:.
if (isMerged(infos, maxNumSegments, segmentsToMerge)) {
if (verbose()) {
message("already merged; skip");
}
return null;
}
// Find the newest (rightmost) segment that needs to
// be merged (other segments may have been flushed
// since merging started):
int last = infos.size();
while (last > 0) {
final SegmentInfoPerCommit info = infos.info(--last);
if (segmentsToMerge.get(info) != null) {
last++;
break;
}
}
if (last == 0) {
if (verbose()) {
message("last == 0; skip");
}
return null;
}
// There is only one segment already, and it is merged
if (maxNumSegments == 1 && last == 1 && isMerged(infos.info(0))) {
if (verbose()) {
message("already 1 seg; skip");
}
return null;
}
// Check if there are any segments above the threshold
boolean anyTooLarge = false;
for (int i = 0; i < last; i++) {
SegmentInfoPerCommit info = infos.info(i);
if (size(info) > maxMergeSizeForForcedMerge || sizeDocs(info) > maxMergeDocs) {
anyTooLarge = true;
break;
}
}
if (anyTooLarge) {
return findForcedMergesSizeLimit(infos, maxNumSegments, last);
} else {
return findForcedMergesMaxNumSegments(infos, maxNumSegments, last);
}
}
// in lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java
Override
public MergeSpecification findForcedDeletesMerges(SegmentInfos segmentInfos)
throws CorruptIndexException, IOException {
final List<SegmentInfoPerCommit> segments = segmentInfos.asList();
final int numSegments = segments.size();
if (verbose()) {
message("findForcedDeleteMerges: " + numSegments + " segments");
}
MergeSpecification spec = new MergeSpecification();
int firstSegmentWithDeletions = -1;
IndexWriter w = writer.get();
assert w != null;
for(int i=0;i<numSegments;i++) {
final SegmentInfoPerCommit info = segmentInfos.info(i);
int delCount = w.numDeletedDocs(info);
if (delCount > 0) {
if (verbose()) {
message(" segment " + info.info.name + " has deletions");
}
if (firstSegmentWithDeletions == -1)
firstSegmentWithDeletions = i;
else if (i - firstSegmentWithDeletions == mergeFactor) {
// We've seen mergeFactor segments in a row with
// deletions, so force a merge now:
if (verbose()) {
message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive");
}
spec.add(new OneMerge(segments.subList(firstSegmentWithDeletions, i)));
firstSegmentWithDeletions = i;
}
} else if (firstSegmentWithDeletions != -1) {
// End of a sequence of segments with deletions, so,
// merge those past segments even if it's fewer than
// mergeFactor segments
if (verbose()) {
message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive");
}
spec.add(new OneMerge(segments.subList(firstSegmentWithDeletions, i)));
firstSegmentWithDeletions = -1;
}
}
if (firstSegmentWithDeletions != -1) {
if (verbose()) {
message(" add merge " + firstSegmentWithDeletions + " to " + (numSegments-1) + " inclusive");
}
spec.add(new OneMerge(segments.subList(firstSegmentWithDeletions, numSegments)));
}
return spec;
}
// in lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java
Override
public MergeSpecification findMerges(SegmentInfos infos) throws IOException {
final int numSegments = infos.size();
if (verbose()) {
message("findMerges: " + numSegments + " segments");
}
// Compute levels, which is just log (base mergeFactor)
// of the size of each segment
final List<SegmentInfoAndLevel> levels = new ArrayList<SegmentInfoAndLevel>();
final float norm = (float) Math.log(mergeFactor);
final Collection<SegmentInfoPerCommit> mergingSegments = writer.get().getMergingSegments();
for(int i=0;i<numSegments;i++) {
final SegmentInfoPerCommit info = infos.info(i);
long size = size(info);
// Floor tiny segments
if (size < 1) {
size = 1;
}
final SegmentInfoAndLevel infoLevel = new SegmentInfoAndLevel(info, (float) Math.log(size)/norm, i);
levels.add(infoLevel);
if (verbose()) {
final long segBytes = sizeBytes(info);
String extra = mergingSegments.contains(info) ? " [merging]" : "";
if (size >= maxMergeSize) {
extra += " [skip: too large]";
}
message("seg=" + writer.get().segString(info) + " level=" + infoLevel.level + " size=" + String.format("%.3f MB", segBytes/1024/1024.) + extra);
}
}
final float levelFloor;
if (minMergeSize <= 0)
levelFloor = (float) 0.0;
else
levelFloor = (float) (Math.log(minMergeSize)/norm);
// Now, we quantize the log values into levels. The
// first level is any segment whose log size is within
// LEVEL_LOG_SPAN of the max size, or, who has such as
// segment "to the right". Then, we find the max of all
// other segments and use that to define the next level
// segment, etc.
MergeSpecification spec = null;
final int numMergeableSegments = levels.size();
int start = 0;
while(start < numMergeableSegments) {
// Find max level of all segments not already
// quantized.
float maxLevel = levels.get(start).level;
for(int i=1+start;i<numMergeableSegments;i++) {
final float level = levels.get(i).level;
if (level > maxLevel) {
maxLevel = level;
}
}
// Now search backwards for the rightmost segment that
// falls into this level:
float levelBottom;
if (maxLevel <= levelFloor) {
// All remaining segments fall into the min level
levelBottom = -1.0F;
} else {
levelBottom = (float) (maxLevel - LEVEL_LOG_SPAN);
// Force a boundary at the level floor
if (levelBottom < levelFloor && maxLevel >= levelFloor) {
levelBottom = levelFloor;
}
}
int upto = numMergeableSegments-1;
while(upto >= start) {
if (levels.get(upto).level >= levelBottom) {
break;
}
upto--;
}
if (verbose()) {
message(" level " + levelBottom + " to " + maxLevel + ": " + (1+upto-start) + " segments");
}
// Finally, record all merges that are viable at this level:
int end = start + mergeFactor;
while(end <= 1+upto) {
boolean anyTooLarge = false;
boolean anyMerging = false;
for(int i=start;i<end;i++) {
final SegmentInfoPerCommit info = levels.get(i).info;
anyTooLarge |= (size(info) >= maxMergeSize || sizeDocs(info) >= maxMergeDocs);
if (mergingSegments.contains(info)) {
anyMerging = true;
break;
}
}
if (anyMerging) {
// skip
} else if (!anyTooLarge) {
if (spec == null)
spec = new MergeSpecification();
final List<SegmentInfoPerCommit> mergeInfos = new ArrayList<SegmentInfoPerCommit>();
for(int i=start;i<end;i++) {
mergeInfos.add(levels.get(i).info);
assert infos.contains(levels.get(i).info);
}
if (verbose()) {
message(" add merge=" + writer.get().segString(mergeInfos) + " start=" + start + " end=" + end);
}
spec.add(new OneMerge(mergeInfos));
} else if (verbose()) {
message(" " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping");
}
start = end;
end = start + mergeFactor;
}
start = 1+upto;
}
return spec;
}
// in lucene/core/src/java/org/apache/lucene/index/MultiReader.java
Override
protected synchronized void doClose() throws IOException {
IOException ioe = null;
for (int i = 0; i < subReaders.length; i++) {
try {
if (closeSubReaders) {
subReaders[i].close();
} else {
subReaders[i].decRef();
}
} catch (IOException e) {
if (ioe == null) ioe = e;
}
}
// throw the first exception
if (ioe != null) throw ioe;
}
// in lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java
public static DirectoryReader open(final Directory directory) throws CorruptIndexException, IOException {
return StandardDirectoryReader.open(directory, null, DEFAULT_TERMS_INDEX_DIVISOR);
}
// in lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java
public static DirectoryReader open(final Directory directory, int termInfosIndexDivisor) throws CorruptIndexException, IOException {
return StandardDirectoryReader.open(directory, null, termInfosIndexDivisor);
}
// in lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java
public static DirectoryReader open(final IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException {
return writer.getReader(applyAllDeletes);
}
// in lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java
public static DirectoryReader open(final IndexCommit commit) throws CorruptIndexException, IOException {
return StandardDirectoryReader.open(commit.getDirectory(), commit, DEFAULT_TERMS_INDEX_DIVISOR);
}
// in lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java
public static DirectoryReader open(final IndexCommit commit, int termInfosIndexDivisor) throws CorruptIndexException, IOException {
return StandardDirectoryReader.open(commit.getDirectory(), commit, termInfosIndexDivisor);
}
// in lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java
public static DirectoryReader openIfChanged(DirectoryReader oldReader) throws IOException {
final DirectoryReader newReader = oldReader.doOpenIfChanged();
assert newReader != oldReader;
return newReader;
}
// in lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java
public static DirectoryReader openIfChanged(DirectoryReader oldReader, IndexCommit commit) throws IOException {
final DirectoryReader newReader = oldReader.doOpenIfChanged(commit);
assert newReader != oldReader;
return newReader;
}
// in lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java
public static DirectoryReader openIfChanged(DirectoryReader oldReader, IndexWriter writer, boolean applyAllDeletes) throws IOException {
final DirectoryReader newReader = oldReader.doOpenIfChanged(writer, applyAllDeletes);
assert newReader != oldReader;
return newReader;
}
// in lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java
public static List<IndexCommit> listCommits(Directory dir) throws IOException {
final String[] files = dir.listAll();
List<IndexCommit> commits = new ArrayList<IndexCommit>();
SegmentInfos latest = new SegmentInfos();
latest.read(dir);
final long currentGen = latest.getGeneration();
commits.add(new StandardDirectoryReader.ReaderCommit(latest, dir));
for(int i=0;i<files.length;i++) {
final String fileName = files[i];
if (fileName.startsWith(IndexFileNames.SEGMENTS) &&
!fileName.equals(IndexFileNames.SEGMENTS_GEN) &&
SegmentInfos.generationFromSegmentsFileName(fileName) < currentGen) {
SegmentInfos sis = new SegmentInfos();
try {
// IOException allowed to throw there, in case
// segments_N is corrupt
sis.read(dir, fileName);
} catch (FileNotFoundException fnfe) {
// LUCENE-948: on NFS (and maybe others), if
// you have writers switching back and forth
// between machines, it's very likely that the
// dir listing will be stale and will claim a
// file segments_X exists when in fact it
// doesn't. So, we catch this and handle it
// as if the file does not exist
sis = null;
}
if (sis != null)
commits.add(new StandardDirectoryReader.ReaderCommit(sis, dir));
}
}
// Ensure that the commit points are sorted in ascending order.
Collections.sort(commits);
return commits;
}
// in lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java
public static boolean indexExists(Directory directory) throws IOException {
try {
new SegmentInfos().read(directory);
return true;
} catch (IOException ioe) {
return false;
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
DirectoryReader getReader() throws IOException {
return getReader(true);
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
DirectoryReader getReader(boolean applyAllDeletes) throws IOException {
ensureOpen();
final long tStart = System.currentTimeMillis();
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "flush at getReader");
}
// Do this up front before flushing so that the readers
// obtained during this flush are pooled, the first time
// this method is called:
poolReaders = true;
final DirectoryReader r;
doBeforeFlush();
boolean anySegmentFlushed = false;
/*
* for releasing a NRT reader we must ensure that
* DW doesn't add any segments or deletes until we are
* done with creating the NRT DirectoryReader.
* We release the two stage full flush after we are done opening the
* directory reader!
*/
synchronized (fullFlushLock) {
boolean success = false;
try {
anySegmentFlushed = docWriter.flushAllThreads();
if (!anySegmentFlushed) {
// prevent double increment since docWriter#doFlush increments the flushcount
// if we flushed anything.
flushCount.incrementAndGet();
}
success = true;
// Prevent segmentInfos from changing while opening the
// reader; in theory we could do similar retry logic,
// just like we do when loading segments_N
synchronized(this) {
maybeApplyDeletes(applyAllDeletes);
r = StandardDirectoryReader.open(this, segmentInfos, applyAllDeletes);
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "return reader version=" + r.getVersion() + " reader=" + r);
}
}
} catch (OutOfMemoryError oom) {
handleOOM(oom, "getReader");
// never reached but javac disagrees:
return null;
} finally {
if (!success) {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "hit exception during NRT reader");
}
}
// Done: finish the full flush!
docWriter.finishFullFlush(success);
doAfterFlush();
}
}
if (anySegmentFlushed) {
maybeMerge();
}
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "getReader took " + (System.currentTimeMillis() - tStart) + " msec");
}
return r;
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public synchronized void drop(SegmentInfoPerCommit info) throws IOException {
final ReadersAndLiveDocs rld = readerMap.get(info);
if (rld != null) {
assert info == rld.info;
readerMap.remove(info);
rld.dropReaders();
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public synchronized void release(ReadersAndLiveDocs rld) throws IOException {
// Matches incRef in get:
rld.decRef();
// Pool still holds a ref:
assert rld.refCount() >= 1;
if (!poolReaders && rld.refCount() == 1) {
// This is the last ref to this RLD, and we're not
// pooling, so remove it:
if (rld.writeLiveDocs(directory)) {
// Make sure we only write del docs for a live segment:
assert infoIsLive(rld.info);
// Must checkpoint w/ deleter, because we just
// created created new _X_N.del file.
deleter.checkpoint(segmentInfos, false);
}
rld.dropReaders();
readerMap.remove(rld.info);
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
synchronized void dropAll(boolean doSave) throws IOException {
final Iterator<Map.Entry<SegmentInfoPerCommit,ReadersAndLiveDocs>> it = readerMap.entrySet().iterator();
while(it.hasNext()) {
final ReadersAndLiveDocs rld = it.next().getValue();
if (doSave && rld.writeLiveDocs(directory)) {
// Make sure we only write del docs for a live segment:
assert infoIsLive(rld.info);
// Must checkpoint w/ deleter, because we just
// created created new _X_N.del file.
deleter.checkpoint(segmentInfos, false);
}
// Important to remove as-we-go, not with .clear()
// in the end, in case we hit an exception;
// otherwise we could over-decref if close() is
// called again:
it.remove();
// NOTE: it is allowed that these decRefs do not
// actually close the SRs; this happens when a
// near real-time reader is kept open after the
// IndexWriter instance is closed:
rld.dropReaders();
}
assert readerMap.size() == 0;
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public synchronized void commit(SegmentInfos infos) throws IOException {
for (SegmentInfoPerCommit info : infos) {
final ReadersAndLiveDocs rld = readerMap.get(info);
if (rld != null) {
assert rld.info == info;
if (rld.writeLiveDocs(directory)) {
// Make sure we only write del docs for a live segment:
assert infoIsLive(info);
// Must checkpoint w/ deleter, because we just
// created created new _X_N.del file.
deleter.checkpoint(segmentInfos, false);
}
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public int numDeletedDocs(SegmentInfoPerCommit info) throws IOException {
ensureOpen(false);
int delCount = info.getDelCount();
final ReadersAndLiveDocs rld = readerPool.get(info, false);
if (rld != null) {
delCount += rld.getPendingDeleteCount();
}
return delCount;
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
private FieldInfos getFieldInfos(SegmentInfo info) throws IOException {
Directory cfsDir = null;
try {
if (info.getUseCompoundFile()) {
cfsDir = new CompoundFileDirectory(info.dir,
IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION),
IOContext.READONCE,
false);
} else {
cfsDir = info.dir;
}
return info.getCodec().fieldInfosFormat().getFieldInfosReader().read(cfsDir,
info.name,
IOContext.READONCE);
} finally {
if (info.getUseCompoundFile() && cfsDir != null) {
cfsDir.close();
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
private FieldNumbers getFieldNumberMap() throws IOException {
final FieldNumbers map = new FieldNumbers();
SegmentInfoPerCommit biggest = null;
for(SegmentInfoPerCommit info : segmentInfos) {
if (biggest == null || (info.info.getDocCount()-info.getDelCount()) > (biggest.info.getDocCount()-biggest.getDelCount())) {
biggest = info;
}
}
if (biggest != null) {
for(FieldInfo fi : getFieldInfos(biggest.info)) {
map.addOrGet(fi.name, fi.number);
}
}
// TODO: we could also pull DV type of each field here,
// and use that to make sure new segment(s) don't change
// the type...
return map;
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
private void messageState() throws IOException {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "\ndir=" + directory + "\n" +
"index=" + segString() + "\n" +
"version=" + Constants.LUCENE_VERSION + "\n" +
config.toString());
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public void close() throws CorruptIndexException, IOException {
close(true);
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public void close(boolean waitForMerges) throws CorruptIndexException, IOException {
// Ensure that only one thread actually gets to do the closing:
if (shouldClose()) {
// If any methods have hit OutOfMemoryError, then abort
// on close, in case the internal state of IndexWriter
// or DocumentsWriter is corrupt
if (hitOOM)
rollbackInternal();
else
closeInternal(waitForMerges);
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
private void closeInternal(boolean waitForMerges) throws CorruptIndexException, IOException {
try {
if (pendingCommit != null) {
throw new IllegalStateException("cannot close: prepareCommit was already called with no corresponding call to commit");
}
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "now flush at close waitForMerges=" + waitForMerges);
}
docWriter.close();
// Only allow a new merge to be triggered if we are
// going to wait for merges:
if (!hitOOM) {
flush(waitForMerges, true);
}
if (waitForMerges)
// Give merge scheduler last chance to run, in case
// any pending merges are waiting:
mergeScheduler.merge(this);
mergePolicy.close();
synchronized(this) {
finishMerges(waitForMerges);
stopMerges = true;
}
mergeScheduler.close();
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "now call final commit()");
}
if (!hitOOM) {
commitInternal(null);
}
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "at close: " + segString());
}
// used by assert below
final DocumentsWriter oldWriter = docWriter;
synchronized(this) {
readerPool.dropAll(true);
docWriter = null;
deleter.close();
}
if (writeLock != null) {
writeLock.release(); // release write lock
writeLock = null;
}
synchronized(this) {
closed = true;
}
assert oldWriter.perThreadPool.numDeactivatedThreadStates() == oldWriter.perThreadPool.getMaxThreadStates();
} catch (OutOfMemoryError oom) {
handleOOM(oom, "closeInternal");
} finally {
synchronized(this) {
closing = false;
notifyAll();
if (!closed) {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "hit exception while closing");
}
}
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public synchronized int numDocs() throws IOException {
ensureOpen();
int count;
if (docWriter != null)
count = docWriter.getNumDocs();
else
count = 0;
for (final SegmentInfoPerCommit info : segmentInfos) {
count += info.info.getDocCount() - numDeletedDocs(info);
}
return count;
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public synchronized boolean hasDeletions() throws IOException {
ensureOpen();
if (bufferedDeletesStream.any()) {
return true;
}
if (docWriter.anyDeletions()) {
return true;
}
for (final SegmentInfoPerCommit info : segmentInfos) {
if (info.hasDeletions()) {
return true;
}
}
return false;
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public void addDocument(Iterable<? extends IndexableField> doc) throws CorruptIndexException, IOException {
addDocument(doc, analyzer);
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public void addDocument(Iterable<? extends IndexableField> doc, Analyzer analyzer) throws CorruptIndexException, IOException {
updateDocument(null, doc, analyzer);
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public void addDocuments(Iterable<? extends Iterable<? extends IndexableField>> docs) throws CorruptIndexException, IOException {
addDocuments(docs, analyzer);
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public void addDocuments(Iterable<? extends Iterable<? extends IndexableField>> docs, Analyzer analyzer) throws CorruptIndexException, IOException {
updateDocuments(null, docs, analyzer);
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public void updateDocuments(Term delTerm, Iterable<? extends Iterable<? extends IndexableField>> docs) throws CorruptIndexException, IOException {
updateDocuments(delTerm, docs, analyzer);
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public void updateDocuments(Term delTerm, Iterable<? extends Iterable<? extends IndexableField>> docs, Analyzer analyzer) throws CorruptIndexException, IOException {
ensureOpen();
try {
boolean success = false;
boolean anySegmentFlushed = false;
try {
anySegmentFlushed = docWriter.updateDocuments(docs, analyzer, delTerm);
success = true;
} finally {
if (!success) {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "hit exception updating document");
}
}
}
if (anySegmentFlushed) {
maybeMerge();
}
} catch (OutOfMemoryError oom) {
handleOOM(oom, "updateDocuments");
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public void deleteDocuments(Term term) throws CorruptIndexException, IOException {
ensureOpen();
try {
docWriter.deleteTerms(term);
} catch (OutOfMemoryError oom) {
handleOOM(oom, "deleteDocuments(Term)");
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public void deleteDocuments(Term... terms) throws CorruptIndexException, IOException {
ensureOpen();
try {
docWriter.deleteTerms(terms);
} catch (OutOfMemoryError oom) {
handleOOM(oom, "deleteDocuments(Term..)");
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public void deleteDocuments(Query query) throws CorruptIndexException, IOException {
ensureOpen();
try {
docWriter.deleteQueries(query);
} catch (OutOfMemoryError oom) {
handleOOM(oom, "deleteDocuments(Query)");
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public void deleteDocuments(Query... queries) throws CorruptIndexException, IOException {
ensureOpen();
try {
docWriter.deleteQueries(queries);
} catch (OutOfMemoryError oom) {
handleOOM(oom, "deleteDocuments(Query..)");
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public void updateDocument(Term term, Iterable<? extends IndexableField> doc) throws CorruptIndexException, IOException {
ensureOpen();
updateDocument(term, doc, getAnalyzer());
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public void updateDocument(Term term, Iterable<? extends IndexableField> doc, Analyzer analyzer)
throws CorruptIndexException, IOException {
ensureOpen();
try {
boolean success = false;
boolean anySegmentFlushed = false;
try {
anySegmentFlushed = docWriter.updateDocument(doc, analyzer, term);
success = true;
} finally {
if (!success) {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "hit exception updating document");
}
}
}
if (anySegmentFlushed) {
maybeMerge();
}
} catch (OutOfMemoryError oom) {
handleOOM(oom, "updateDocument");
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
final synchronized Collection<String> getIndexFileNames() throws IOException {
return segmentInfos.files(directory, true);
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public void forceMerge(int maxNumSegments) throws CorruptIndexException, IOException {
forceMerge(maxNumSegments, true);
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public void forceMerge(int maxNumSegments, boolean doWait) throws CorruptIndexException, IOException {
ensureOpen();
if (maxNumSegments < 1)
throw new IllegalArgumentException("maxNumSegments must be >= 1; got " + maxNumSegments);
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "forceMerge: index now " + segString());
infoStream.message("IW", "now flush at forceMerge");
}
flush(true, true);
synchronized(this) {
resetMergeExceptions();
segmentsToMerge.clear();
for(SegmentInfoPerCommit info : segmentInfos) {
segmentsToMerge.put(info, Boolean.TRUE);
}
mergeMaxNumSegments = maxNumSegments;
// Now mark all pending & running merges for forced
// merge:
for(final MergePolicy.OneMerge merge : pendingMerges) {
merge.maxNumSegments = maxNumSegments;
segmentsToMerge.put(merge.info, Boolean.TRUE);
}
for (final MergePolicy.OneMerge merge: runningMerges) {
merge.maxNumSegments = maxNumSegments;
segmentsToMerge.put(merge.info, Boolean.TRUE);
}
}
maybeMerge(maxNumSegments);
if (doWait) {
synchronized(this) {
while(true) {
if (hitOOM) {
throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot complete forceMerge");
}
if (mergeExceptions.size() > 0) {
// Forward any exceptions in background merge
// threads to the current thread:
final int size = mergeExceptions.size();
for(int i=0;i<size;i++) {
final MergePolicy.OneMerge merge = mergeExceptions.get(i);
if (merge.maxNumSegments != -1) {
IOException err = new IOException("background merge hit exception: " + merge.segString(directory));
final Throwable t = merge.getException();
if (t != null)
err.initCause(t);
throw err;
}
}
}
if (maxNumSegmentsMergesPending())
doWait();
else
break;
}
}
// If close is called while we are still
// running, throw an exception so the calling
// thread will know merging did not
// complete
ensureOpen();
}
// NOTE: in the ConcurrentMergeScheduler case, when
// doWait is false, we can return immediately while
// background threads accomplish the merging
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public void forceMergeDeletes(boolean doWait)
throws CorruptIndexException, IOException {
ensureOpen();
flush(true, true);
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "forceMergeDeletes: index now " + segString());
}
MergePolicy.MergeSpecification spec;
synchronized(this) {
spec = mergePolicy.findForcedDeletesMerges(segmentInfos);
if (spec != null) {
final int numMerges = spec.merges.size();
for(int i=0;i<numMerges;i++)
registerMerge(spec.merges.get(i));
}
}
mergeScheduler.merge(this);
if (spec != null && doWait) {
final int numMerges = spec.merges.size();
synchronized(this) {
boolean running = true;
while(running) {
if (hitOOM) {
throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot complete forceMergeDeletes");
}
// Check each merge that MergePolicy asked us to
// do, to see if any of them are still running and
// if any of them have hit an exception.
running = false;
for(int i=0;i<numMerges;i++) {
final MergePolicy.OneMerge merge = spec.merges.get(i);
if (pendingMerges.contains(merge) || runningMerges.contains(merge)) {
running = true;
}
Throwable t = merge.getException();
if (t != null) {
IOException ioe = new IOException("background merge hit exception: " + merge.segString(directory));
ioe.initCause(t);
throw ioe;
}
}
// If any of our merges are still running, wait:
if (running)
doWait();
}
}
}
// NOTE: in the ConcurrentMergeScheduler case, when
// doWait is false, we can return immediately while
// background threads accomplish the merging
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public void forceMergeDeletes() throws CorruptIndexException, IOException {
forceMergeDeletes(true);
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public final void maybeMerge() throws CorruptIndexException, IOException {
maybeMerge(-1);
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
private final void maybeMerge(int maxNumSegments) throws CorruptIndexException, IOException {
ensureOpen(false);
updatePendingMerges(maxNumSegments);
mergeScheduler.merge(this);
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
private synchronized void updatePendingMerges(int maxNumSegments)
throws CorruptIndexException, IOException {
assert maxNumSegments == -1 || maxNumSegments > 0;
if (stopMerges) {
return;
}
// Do not start new merges if we've hit OOME
if (hitOOM) {
return;
}
final MergePolicy.MergeSpecification spec;
if (maxNumSegments != -1) {
spec = mergePolicy.findForcedMerges(segmentInfos, maxNumSegments, Collections.unmodifiableMap(segmentsToMerge));
if (spec != null) {
final int numMerges = spec.merges.size();
for(int i=0;i<numMerges;i++) {
final MergePolicy.OneMerge merge = spec.merges.get(i);
merge.maxNumSegments = maxNumSegments;
}
}
} else {
spec = mergePolicy.findMerges(segmentInfos);
}
if (spec != null) {
final int numMerges = spec.merges.size();
for(int i=0;i<numMerges;i++) {
registerMerge(spec.merges.get(i));
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public void rollback() throws IOException {
ensureOpen();
// Ensure that only one thread actually gets to do the closing:
if (shouldClose())
rollbackInternal();
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
private void rollbackInternal() throws IOException {
boolean success = false;
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "rollback");
}
try {
synchronized(this) {
finishMerges(false);
stopMerges = true;
}
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "rollback: done finish merges");
}
// Must pre-close these two, in case they increment
// changeCount so that we can then set it to false
// before calling closeInternal
mergePolicy.close();
mergeScheduler.close();
bufferedDeletesStream.clear();
synchronized(this) {
if (pendingCommit != null) {
pendingCommit.rollbackCommit(directory);
deleter.decRef(pendingCommit);
pendingCommit = null;
notifyAll();
}
// Don't bother saving any changes in our segmentInfos
readerPool.dropAll(false);
// Keep the same segmentInfos instance but replace all
// of its SegmentInfo instances. This is so the next
// attempt to commit using this instance of IndexWriter
// will always write to a new generation ("write
// once").
segmentInfos.rollbackSegmentInfos(rollbackSegments);
if (infoStream.isEnabled("IW") ) {
infoStream.message("IW", "rollback: infos=" + segString(segmentInfos));
}
docWriter.abort();
assert testPoint("rollback before checkpoint");
// Ask deleter to locate unreferenced files & remove
// them:
deleter.checkpoint(segmentInfos, false);
deleter.refresh();
}
lastCommitChangeCount = changeCount;
success = true;
} catch (OutOfMemoryError oom) {
handleOOM(oom, "rollbackInternal");
} finally {
synchronized(this) {
if (!success) {
closing = false;
notifyAll();
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "hit exception during rollback");
}
}
}
}
closeInternal(false);
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public synchronized void deleteAll() throws IOException {
ensureOpen();
boolean success = false;
try {
// Abort any running merges
finishMerges(false);
// Remove any buffered docs
docWriter.abort();
// Remove all segments
segmentInfos.clear();
// Ask deleter to locate unreferenced files & remove them:
deleter.checkpoint(segmentInfos, false);
deleter.refresh();
// Don't bother saving any changes in our segmentInfos
readerPool.dropAll(false);
// Mark that the index has changed
++changeCount;
segmentInfos.changed();
success = true;
} catch (OutOfMemoryError oom) {
handleOOM(oom, "deleteAll");
} finally {
if (!success) {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "hit exception during deleteAll");
}
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
private synchronized void finishMerges(boolean waitForMerges) throws IOException {
if (!waitForMerges) {
stopMerges = true;
// Abort all pending & running merges:
for (final MergePolicy.OneMerge merge : pendingMerges) {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "now abort pending merge " + segString(merge.segments));
}
merge.abort();
mergeFinish(merge);
}
pendingMerges.clear();
for (final MergePolicy.OneMerge merge : runningMerges) {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "now abort running merge " + segString(merge.segments));
}
merge.abort();
}
// These merges periodically check whether they have
// been aborted, and stop if so. We wait here to make
// sure they all stop. It should not take very long
// because the merge threads periodically check if
// they are aborted.
while(runningMerges.size() > 0) {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "now wait for " + runningMerges.size() + " running merge to abort");
}
doWait();
}
stopMerges = false;
notifyAll();
assert 0 == mergingSegments.size();
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "all running merges have aborted");
}
} else {
// waitForMerges() will ensure any running addIndexes finishes.
// It's fine if a new one attempts to start because from our
// caller above the call will see that we are in the
// process of closing, and will throw an
// AlreadyClosedException.
waitForMerges();
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
synchronized void checkpoint() throws IOException {
changeCount++;
segmentInfos.changed();
deleter.checkpoint(segmentInfos, false);
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
SegmentInfoPerCommit prepareFlushedSegment(FlushedSegment flushedSegment) throws IOException {
assert flushedSegment != null;
SegmentInfoPerCommit newSegment = flushedSegment.segmentInfo;
setDiagnostics(newSegment.info, "flush");
IOContext context = new IOContext(new FlushInfo(newSegment.info.getDocCount(), newSegment.info.sizeInBytes()));
boolean success = false;
try {
if (useCompoundFile(newSegment)) {
// Now build compound file
Collection<String> oldFiles = createCompoundFile(infoStream, directory, MergeState.CheckAbort.NONE, newSegment.info, context);
newSegment.info.setUseCompoundFile(true);
synchronized(this) {
deleter.deleteNewFiles(oldFiles);
}
}
// Have codec write SegmentInfo. Must do this after
// creating CFS so that 1) .si isn't slurped into CFS,
// and 2) .si reflects useCompoundFile=true change
// above:
codec.segmentInfoFormat().getSegmentInfosWriter().write(directory, newSegment.info, flushedSegment.fieldInfos, context);
// TODO: ideally we would freeze newSegment here!!
// because any changes after writing the .si will be
// lost...
// Must write deleted docs after the CFS so we don't
// slurp the del file into CFS:
if (flushedSegment.liveDocs != null) {
final int delCount = flushedSegment.delCount;
assert delCount > 0;
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "flush: write " + delCount + " deletes gen=" + flushedSegment.segmentInfo.getDelGen());
}
// TODO: in the NRT case it'd be better to hand
// this del vector over to the
// shortly-to-be-opened SegmentReader and let it
// carry the changes; there's no reason to use
// filesystem as intermediary here.
SegmentInfoPerCommit info = flushedSegment.segmentInfo;
Codec codec = info.info.getCodec();
codec.liveDocsFormat().writeLiveDocs(flushedSegment.liveDocs, directory, info, delCount, context);
newSegment.setDelCount(delCount);
newSegment.advanceDelGen();
}
success = true;
} finally {
if (!success) {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "hit exception " +
"reating compound file for newly flushed segment " + newSegment.info.name);
}
synchronized(this) {
deleter.refresh(newSegment.info.name);
}
}
}
return newSegment;
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
synchronized void publishFrozenDeletes(FrozenBufferedDeletes packet) throws IOException {
assert packet != null && packet.any();
synchronized (bufferedDeletesStream) {
bufferedDeletesStream.push(packet);
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
synchronized void publishFlushedSegment(SegmentInfoPerCommit newSegment,
FrozenBufferedDeletes packet, FrozenBufferedDeletes globalPacket) throws IOException {
// Lock order IW -> BDS
synchronized (bufferedDeletesStream) {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "publishFlushedSegment");
}
if (globalPacket != null && globalPacket.any()) {
bufferedDeletesStream.push(globalPacket);
}
// Publishing the segment must be synched on IW -> BDS to make the sure
// that no merge prunes away the seg. private delete packet
final long nextGen;
if (packet != null && packet.any()) {
nextGen = bufferedDeletesStream.push(packet);
} else {
// Since we don't have a delete packet to apply we can get a new
// generation right away
nextGen = bufferedDeletesStream.getNextGen();
}
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "publish sets newSegment delGen=" + nextGen + " seg=" + newSegment);
}
newSegment.setBufferedDeletesGen(nextGen);
segmentInfos.add(newSegment);
checkpoint();
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
synchronized boolean useCompoundFile(SegmentInfoPerCommit segmentInfo) throws IOException {
return mergePolicy.useCompoundFile(segmentInfos, segmentInfo);
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public void addIndexes(Directory... dirs) throws CorruptIndexException, IOException {
ensureOpen();
noDupDirs(dirs);
try {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "flush at addIndexes(Directory...)");
}
flush(false, true);
List<SegmentInfoPerCommit> infos = new ArrayList<SegmentInfoPerCommit>();
for (Directory dir : dirs) {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "addIndexes: process directory " + dir);
}
SegmentInfos sis = new SegmentInfos(); // read infos from dir
sis.read(dir);
final Set<String> dsFilesCopied = new HashSet<String>();
final Map<String, String> dsNames = new HashMap<String, String>();
final Set<String> copiedFiles = new HashSet<String>();
for (SegmentInfoPerCommit info : sis) {
assert !infos.contains(info): "dup info dir=" + info.info.dir + " name=" + info.info.name;
String newSegName = newSegmentName();
String dsName = info.info.name;
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "addIndexes: process segment origName=" + info.info.name + " newName=" + newSegName + " dsName=" + dsName + " info=" + info);
}
IOContext context = new IOContext(new MergeInfo(info.info.getDocCount(), info.info.sizeInBytes(), true, -1));
infos.add(copySegmentAsIs(info, newSegName, dsNames, dsFilesCopied, context, copiedFiles));
}
}
synchronized (this) {
ensureOpen();
segmentInfos.addAll(infos);
checkpoint();
}
} catch (OutOfMemoryError oom) {
handleOOM(oom, "addIndexes(Directory...)");
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public void addIndexes(IndexReader... readers) throws CorruptIndexException, IOException {
ensureOpen();
int numDocs = 0;
try {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "flush at addIndexes(IndexReader...)");
}
flush(false, true);
String mergedName = newSegmentName();
for (IndexReader indexReader : readers) {
numDocs += indexReader.numDocs();
}
final IOContext context = new IOContext(new MergeInfo(numDocs, -1, true, -1));
// TODO: somehow we should fix this merge so it's
// abortable so that IW.close(false) is able to stop it
TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory);
SegmentInfo info = new SegmentInfo(directory, Constants.LUCENE_MAIN_VERSION, mergedName, -1,
false, codec, null, null);
SegmentMerger merger = new SegmentMerger(info, infoStream, trackingDir, config.getTermIndexInterval(),
MergeState.CheckAbort.NONE, payloadProcessorProvider,
globalFieldNumberMap, context);
for (IndexReader reader : readers) { // add new indexes
merger.add(reader);
}
MergeState mergeState = merger.merge(); // merge 'em
SegmentInfoPerCommit infoPerCommit = new SegmentInfoPerCommit(info, 0, -1L);
info.setFiles(new HashSet<String>(trackingDir.getCreatedFiles()));
trackingDir.getCreatedFiles().clear();
setDiagnostics(info, "addIndexes(IndexReader...)");
boolean useCompoundFile;
synchronized(this) { // Guard segmentInfos
if (stopMerges) {
deleter.deleteNewFiles(infoPerCommit.files());
return;
}
ensureOpen();
useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, infoPerCommit);
}
// Now create the compound file if needed
if (useCompoundFile) {
Collection<String> filesToDelete = infoPerCommit.files();
createCompoundFile(infoStream, directory, MergeState.CheckAbort.NONE, info, context);
// delete new non cfs files directly: they were never
// registered with IFD
synchronized(this) {
deleter.deleteNewFiles(filesToDelete);
}
info.setUseCompoundFile(true);
}
// Have codec write SegmentInfo. Must do this after
// creating CFS so that 1) .si isn't slurped into CFS,
// and 2) .si reflects useCompoundFile=true change
// above:
codec.segmentInfoFormat().getSegmentInfosWriter().write(trackingDir, info, mergeState.fieldInfos, context);
info.addFiles(trackingDir.getCreatedFiles());
// Register the new segment
synchronized(this) {
if (stopMerges) {
deleter.deleteNewFiles(info.files());
return;
}
ensureOpen();
segmentInfos.add(infoPerCommit);
checkpoint();
}
} catch (OutOfMemoryError oom) {
handleOOM(oom, "addIndexes(IndexReader...)");
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
private SegmentInfoPerCommit copySegmentAsIs(SegmentInfoPerCommit info, String segName,
Map<String, String> dsNames, Set<String> dsFilesCopied, IOContext context,
Set<String> copiedFiles)
throws IOException {
// Determine if the doc store of this segment needs to be copied. It's
// only relevant for segments that share doc store with others,
// because the DS might have been copied already, in which case we
// just want to update the DS name of this SegmentInfo.
final String dsName = info.info.name;
assert dsName != null;
final String newDsName;
if (dsNames.containsKey(dsName)) {
newDsName = dsNames.get(dsName);
} else {
dsNames.put(dsName, segName);
newDsName = segName;
}
// note: we don't really need this fis (its copied), but we load it up
// so we don't pass a null value to the si writer
FieldInfos fis = getFieldInfos(info.info);
final Map<String,String> attributes;
// copy the attributes map, we might modify it below.
// also we need to ensure its read-write, since we will invoke the SIwriter (which might want to set something).
if (info.info.attributes() == null) {
attributes = new HashMap<String,String>();
} else {
attributes = new HashMap<String,String>(info.info.attributes());
}
//System.out.println("copy seg=" + info.info.name + " version=" + info.info.getVersion());
// Same SI as before but we change directory, name and docStoreSegment:
SegmentInfo newInfo = new SegmentInfo(directory, info.info.getVersion(), segName, info.info.getDocCount(),
info.info.getUseCompoundFile(),
info.info.getCodec(), info.info.getDiagnostics(), attributes);
SegmentInfoPerCommit newInfoPerCommit = new SegmentInfoPerCommit(newInfo, info.getDelCount(), info.getDelGen());
Set<String> segFiles = new HashSet<String>();
// Build up new segment's file names. Must do this
// before writing SegmentInfo:
for (String file: info.files()) {
final String newFileName;
newFileName = segName + IndexFileNames.stripSegmentName(file);
segFiles.add(newFileName);
}
newInfo.setFiles(segFiles);
// We must rewrite the SI file because it references
// segment name (its own name, if its 3.x, and doc
// store segment name):
TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory);
try {
newInfo.getCodec().segmentInfoFormat().getSegmentInfosWriter().write(trackingDir, newInfo, fis, context);
} catch (UnsupportedOperationException uoe) {
// OK: 3x codec cannot write a new SI file;
// SegmentInfos will write this on commit
}
final Collection<String> siFiles = trackingDir.getCreatedFiles();
// Copy the segment's files
for (String file: info.files()) {
final String newFileName = segName + IndexFileNames.stripSegmentName(file);
if (siFiles.contains(newFileName)) {
// We already rewrote this above
continue;
}
assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists; siFiles=" + siFiles;
assert !copiedFiles.contains(file): "file \"" + file + "\" is being copied more than once";
copiedFiles.add(file);
info.info.dir.copy(directory, file, newFileName, context);
}
return newInfoPerCommit;
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
protected void doAfterFlush() throws IOException {}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
protected void doBeforeFlush() throws IOException {}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public final void prepareCommit() throws CorruptIndexException, IOException {
ensureOpen();
prepareCommit(null);
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public final void prepareCommit(Map<String,String> commitUserData) throws CorruptIndexException, IOException {
ensureOpen(false);
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "prepareCommit: flush");
infoStream.message("IW", " index before flush " + segString());
}
if (hitOOM) {
throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot commit");
}
if (pendingCommit != null) {
throw new IllegalStateException("prepareCommit was already called with no corresponding call to commit");
}
doBeforeFlush();
assert testPoint("startDoFlush");
SegmentInfos toCommit = null;
boolean anySegmentsFlushed = false;
// This is copied from doFlush, except it's modified to
// clone & incRef the flushed SegmentInfos inside the
// sync block:
try {
synchronized (fullFlushLock) {
boolean flushSuccess = false;
boolean success = false;
try {
anySegmentsFlushed = docWriter.flushAllThreads();
if (!anySegmentsFlushed) {
// prevent double increment since docWriter#doFlush increments the flushcount
// if we flushed anything.
flushCount.incrementAndGet();
}
flushSuccess = true;
synchronized(this) {
maybeApplyDeletes(true);
readerPool.commit(segmentInfos);
// Must clone the segmentInfos while we still
// hold fullFlushLock and while sync'd so that
// no partial changes (eg a delete w/o
// corresponding add from an updateDocument) can
// sneak into the commit point:
toCommit = segmentInfos.clone();
pendingCommitChangeCount = changeCount;
// This protects the segmentInfos we are now going
// to commit. This is important in case, eg, while
// we are trying to sync all referenced files, a
// merge completes which would otherwise have
// removed the files we are now syncing.
filesToCommit = toCommit.files(directory, false);
deleter.incRef(filesToCommit);
}
success = true;
} finally {
if (!success) {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "hit exception during prepareCommit");
}
}
// Done: finish the full flush!
docWriter.finishFullFlush(flushSuccess);
doAfterFlush();
}
}
} catch (OutOfMemoryError oom) {
handleOOM(oom, "prepareCommit");
}
boolean success = false;
try {
if (anySegmentsFlushed) {
maybeMerge();
}
success = true;
} finally {
if (!success) {
synchronized (this) {
deleter.decRef(filesToCommit);
filesToCommit = null;
}
}
}
startCommit(toCommit, commitUserData);
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public final void commit() throws CorruptIndexException, IOException {
commit(null);
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public final void commit(Map<String,String> commitUserData) throws CorruptIndexException, IOException {
ensureOpen();
commitInternal(commitUserData);
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
private final void commitInternal(Map<String,String> commitUserData) throws CorruptIndexException, IOException {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "commit: start");
}
synchronized(commitLock) {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "commit: enter lock");
}
if (pendingCommit == null) {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "commit: now prepare");
}
prepareCommit(commitUserData);
} else {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "commit: already prepared");
}
}
finishCommit();
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
private synchronized final void finishCommit() throws CorruptIndexException, IOException {
if (pendingCommit != null) {
try {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "commit: pendingCommit != null");
}
pendingCommit.finishCommit(directory);
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "commit: wrote segments file \"" + pendingCommit.getSegmentsFileName() + "\"");
}
lastCommitChangeCount = pendingCommitChangeCount;
segmentInfos.updateGeneration(pendingCommit);
segmentInfos.setUserData(pendingCommit.getUserData());
rollbackSegments = pendingCommit.createBackupSegmentInfos();
deleter.checkpoint(pendingCommit, true);
} finally {
// Matches the incRef done in prepareCommit:
deleter.decRef(filesToCommit);
filesToCommit = null;
pendingCommit = null;
notifyAll();
}
} else {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "commit: pendingCommit == null; skip");
}
}
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "commit: done");
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
protected final void flush(boolean triggerMerge, boolean applyAllDeletes) throws CorruptIndexException, IOException {
// NOTE: this method cannot be sync'd because
// maybeMerge() in turn calls mergeScheduler.merge which
// in turn can take a long time to run and we don't want
// to hold the lock for that. In the case of
// ConcurrentMergeScheduler this can lead to deadlock
// when it stalls due to too many running merges.
// We can be called during close, when closing==true, so we must pass false to ensureOpen:
ensureOpen(false);
if (doFlush(applyAllDeletes) && triggerMerge) {
maybeMerge();
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
private boolean doFlush(boolean applyAllDeletes) throws CorruptIndexException, IOException {
if (hitOOM) {
throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot flush");
}
doBeforeFlush();
assert testPoint("startDoFlush");
boolean success = false;
try {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", " start flush: applyAllDeletes=" + applyAllDeletes);
infoStream.message("IW", " index before flush " + segString());
}
final boolean anySegmentFlushed;
synchronized (fullFlushLock) {
boolean flushSuccess = false;
try {
anySegmentFlushed = docWriter.flushAllThreads();
flushSuccess = true;
} finally {
docWriter.finishFullFlush(flushSuccess);
}
}
synchronized(this) {
maybeApplyDeletes(applyAllDeletes);
doAfterFlush();
if (!anySegmentFlushed) {
// flushCount is incremented in flushAllThreads
flushCount.incrementAndGet();
}
success = true;
return anySegmentFlushed;
}
} catch (OutOfMemoryError oom) {
handleOOM(oom, "doFlush");
// never hit
return false;
} finally {
if (!success) {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "hit exception during flush");
}
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
final synchronized void maybeApplyDeletes(boolean applyAllDeletes) throws IOException {
if (applyAllDeletes) {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "apply all deletes during flush");
}
applyAllDeletes();
} else if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "don't apply deletes now delTermCount=" + bufferedDeletesStream.numTerms() + " bytesUsed=" + bufferedDeletesStream.bytesUsed());
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
final synchronized void applyAllDeletes() throws IOException {
flushDeletesCount.incrementAndGet();
final BufferedDeletesStream.ApplyDeletesResult result;
result = bufferedDeletesStream.applyDeletes(readerPool, segmentInfos.asList());
if (result.anyDeletes) {
checkpoint();
}
if (!keepFullyDeletedSegments && result.allDeleted != null) {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "drop 100% deleted segments: " + segString(result.allDeleted));
}
for (SegmentInfoPerCommit info : result.allDeleted) {
// If a merge has already registered for this
// segment, we leave it in the readerPool; the
// merge will skip merging it and will then drop
// it once it's done:
if (!mergingSegments.contains(info)) {
segmentInfos.remove(info);
readerPool.drop(info);
}
}
checkpoint();
}
bufferedDeletesStream.prune(segmentInfos);
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
private synchronized void ensureValidMerge(MergePolicy.OneMerge merge) throws IOException {
for(SegmentInfoPerCommit info : merge.segments) {
if (!segmentInfos.contains(info)) {
throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.info.name + ") that is not in the current index " + segString(), directory);
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
synchronized private ReadersAndLiveDocs commitMergedDeletes(MergePolicy.OneMerge merge) throws IOException {
assert testPoint("startCommitMergeDeletes");
final List<SegmentInfoPerCommit> sourceSegments = merge.segments;
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "commitMergeDeletes " + segString(merge.segments));
}
// Carefully merge deletes that occurred after we
// started merging:
int docUpto = 0;
long minGen = Long.MAX_VALUE;
// Lazy init (only when we find a delete to carry over):
ReadersAndLiveDocs mergedDeletes = null;
for(int i=0; i < sourceSegments.size(); i++) {
SegmentInfoPerCommit info = sourceSegments.get(i);
minGen = Math.min(info.getBufferedDeletesGen(), minGen);
final int docCount = info.info.getDocCount();
final Bits prevLiveDocs = merge.readerLiveDocs.get(i);
final Bits currentLiveDocs;
final ReadersAndLiveDocs rld = readerPool.get(info, false);
// We hold a ref so it should still be in the pool:
assert rld != null: "seg=" + info.info.name;
currentLiveDocs = rld.getLiveDocs();
if (prevLiveDocs != null) {
// If we had deletions on starting the merge we must
// still have deletions now:
assert currentLiveDocs != null;
assert prevLiveDocs.length() == docCount;
assert currentLiveDocs.length() == docCount;
// There were deletes on this segment when the merge
// started. The merge has collapsed away those
// deletes, but, if new deletes were flushed since
// the merge started, we must now carefully keep any
// newly flushed deletes but mapping them to the new
// docIDs.
// Since we copy-on-write, if any new deletes were
// applied after merging has started, we can just
// check if the before/after liveDocs have changed.
// If so, we must carefully merge the liveDocs one
// doc at a time:
if (currentLiveDocs != prevLiveDocs) {
// This means this segment received new deletes
// since we started the merge, so we
// must merge them:
for(int j=0;j<docCount;j++) {
if (!prevLiveDocs.get(j)) {
assert !currentLiveDocs.get(j);
} else {
if (!currentLiveDocs.get(j)) {
if (mergedDeletes == null) {
mergedDeletes = readerPool.get(merge.info, true);
mergedDeletes.initWritableLiveDocs();
}
mergedDeletes.delete(docUpto);
}
docUpto++;
}
}
} else {
docUpto += info.info.getDocCount() - info.getDelCount() - rld.getPendingDeleteCount();
}
} else if (currentLiveDocs != null) {
assert currentLiveDocs.length() == docCount;
// This segment had no deletes before but now it
// does:
for(int j=0; j<docCount; j++) {
if (!currentLiveDocs.get(j)) {
if (mergedDeletes == null) {
mergedDeletes = readerPool.get(merge.info, true);
mergedDeletes.initWritableLiveDocs();
}
mergedDeletes.delete(docUpto);
}
docUpto++;
}
} else {
// No deletes before or after
docUpto += info.info.getDocCount();
}
}
assert docUpto == merge.info.info.getDocCount();
if (infoStream.isEnabled("IW")) {
if (mergedDeletes == null) {
infoStream.message("IW", "no new deletes since merge started");
} else {
infoStream.message("IW", mergedDeletes.getPendingDeleteCount() + " new deletes since merge started");
}
}
// If new deletes were applied while we were merging
// (which happens if eg commit() or getReader() is
// called during our merge), then it better be the case
// that the delGen has increased for all our merged
// segments:
assert mergedDeletes == null || minGen > merge.info.getBufferedDeletesGen();
merge.info.setBufferedDeletesGen(minGen);
return mergedDeletes;
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
synchronized private boolean commitMerge(MergePolicy.OneMerge merge) throws IOException {
assert testPoint("startCommitMerge");
if (hitOOM) {
throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot complete merge");
}
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "commitMerge: " + segString(merge.segments) + " index=" + segString());
}
assert merge.registerDone;
// If merge was explicitly aborted, or, if rollback() or
// rollbackTransaction() had been called since our merge
// started (which results in an unqualified
// deleter.refresh() call that will remove any index
// file that current segments does not reference), we
// abort this merge
if (merge.isAborted()) {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "commitMerge: skip: it was aborted");
}
return false;
}
final ReadersAndLiveDocs mergedDeletes = merge.info.info.getDocCount() == 0 ? null : commitMergedDeletes(merge);
assert mergedDeletes == null || mergedDeletes.getPendingDeleteCount() != 0;
// If the doc store we are using has been closed and
// is in now compound format (but wasn't when we
// started), then we will switch to the compound
// format as well:
assert !segmentInfos.contains(merge.info);
final boolean allDeleted = merge.segments.size() == 0 ||
merge.info.info.getDocCount() == 0 ||
(mergedDeletes != null &&
mergedDeletes.getPendingDeleteCount() == merge.info.info.getDocCount());
if (infoStream.isEnabled("IW")) {
if (allDeleted) {
infoStream.message("IW", "merged segment " + merge.info + " is 100% deleted" + (keepFullyDeletedSegments ? "" : "; skipping insert"));
}
}
final boolean dropSegment = allDeleted && !keepFullyDeletedSegments;
// If we merged no segments then we better be dropping
// the new segment:
assert merge.segments.size() > 0 || dropSegment;
assert merge.info.info.getDocCount() != 0 || keepFullyDeletedSegments || dropSegment;
segmentInfos.applyMergeChanges(merge, dropSegment);
if (mergedDeletes != null) {
if (dropSegment) {
mergedDeletes.dropChanges();
}
readerPool.release(mergedDeletes);
if (dropSegment) {
readerPool.drop(mergedDeletes.info);
}
}
// Must close before checkpoint, otherwise IFD won't be
// able to delete the held-open files from the merge
// readers:
closeMergeReaders(merge, false);
// Must note the change to segmentInfos so any commits
// in-flight don't lose it:
checkpoint();
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "after commit: " + segString());
}
if (merge.maxNumSegments != -1 && !dropSegment) {
// cascade the forceMerge:
if (!segmentsToMerge.containsKey(merge.info)) {
segmentsToMerge.put(merge.info, Boolean.FALSE);
}
}
return true;
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
final private void handleMergeException(Throwable t, MergePolicy.OneMerge merge) throws IOException {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "handleMergeException: merge=" + segString(merge.segments) + " exc=" + t);
}
// Set the exception on the merge, so if
// forceMerge is waiting on us it sees the root
// cause exception:
merge.setException(t);
addMergeException(merge);
if (t instanceof MergePolicy.MergeAbortedException) {
// We can ignore this exception (it happens when
// close(false) or rollback is called), unless the
// merge involves segments from external directories,
// in which case we must throw it so, for example, the
// rollbackTransaction code in addIndexes* is
// executed.
if (merge.isExternal)
throw (MergePolicy.MergeAbortedException) t;
} else if (t instanceof IOException)
throw (IOException) t;
else if (t instanceof RuntimeException)
throw (RuntimeException) t;
else if (t instanceof Error)
throw (Error) t;
else
// Should not get here
throw new RuntimeException(t);
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public void merge(MergePolicy.OneMerge merge)
throws CorruptIndexException, IOException {
boolean success = false;
final long t0 = System.currentTimeMillis();
try {
try {
try {
mergeInit(merge);
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "now merge\n merge=" + segString(merge.segments) + "\n index=" + segString());
}
mergeMiddle(merge);
mergeSuccess(merge);
success = true;
} catch (Throwable t) {
handleMergeException(t, merge);
}
} finally {
synchronized(this) {
mergeFinish(merge);
if (!success) {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "hit exception during merge");
}
if (merge.info != null && !segmentInfos.contains(merge.info)) {
deleter.refresh(merge.info.info.name);
}
}
// This merge (and, generally, any change to the
// segments) may now enable new merges, so we call
// merge policy & update pending merges.
if (success && !merge.isAborted() && (merge.maxNumSegments != -1 || (!closed && !closing))) {
updatePendingMerges(merge.maxNumSegments);
}
}
}
} catch (OutOfMemoryError oom) {
handleOOM(oom, "merge");
}
if (merge.info != null && !merge.isAborted()) {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "merge time " + (System.currentTimeMillis()-t0) + " msec for " + merge.info.info.getDocCount() + " docs");
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
final synchronized boolean registerMerge(MergePolicy.OneMerge merge) throws MergePolicy.MergeAbortedException, IOException {
if (merge.registerDone) {
return true;
}
assert merge.segments.size() > 0;
if (stopMerges) {
merge.abort();
throw new MergePolicy.MergeAbortedException("merge is aborted: " + segString(merge.segments));
}
boolean isExternal = false;
for(SegmentInfoPerCommit info : merge.segments) {
if (mergingSegments.contains(info)) {
return false;
}
if (!segmentInfos.contains(info)) {
return false;
}
if (info.info.dir != directory) {
isExternal = true;
}
if (segmentsToMerge.containsKey(info)) {
merge.maxNumSegments = mergeMaxNumSegments;
}
}
ensureValidMerge(merge);
pendingMerges.add(merge);
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "add merge to pendingMerges: " + segString(merge.segments) + " [total " + pendingMerges.size() + " pending]");
}
merge.mergeGen = mergeGen;
merge.isExternal = isExternal;
// OK it does not conflict; now record that this merge
// is running (while synchronized) to avoid race
// condition where two conflicting merges from different
// threads, start
if (infoStream.isEnabled("IW")) {
StringBuilder builder = new StringBuilder("registerMerge merging= [");
for (SegmentInfoPerCommit info : mergingSegments) {
builder.append(info.info.name).append(", ");
}
builder.append("]");
// don't call mergingSegments.toString() could lead to ConcurrentModException
// since merge updates the segments FieldInfos
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", builder.toString());
}
}
for(SegmentInfoPerCommit info : merge.segments) {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "registerMerge info=" + info);
}
mergingSegments.add(info);
}
// Merge is now registered
merge.registerDone = true;
return true;
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
final synchronized void mergeInit(MergePolicy.OneMerge merge) throws IOException {
boolean success = false;
try {
_mergeInit(merge);
success = true;
} finally {
if (!success) {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "hit exception in mergeInit");
}
mergeFinish(merge);
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
synchronized private void _mergeInit(MergePolicy.OneMerge merge) throws IOException {
assert testPoint("startMergeInit");
assert merge.registerDone;
assert merge.maxNumSegments == -1 || merge.maxNumSegments > 0;
if (hitOOM) {
throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot merge");
}
if (merge.info != null) {
// mergeInit already done
return;
}
if (merge.isAborted()) {
return;
}
// TODO: in the non-pool'd case this is somewhat
// wasteful, because we open these readers, close them,
// and then open them again for merging. Maybe we
// could pre-pool them somehow in that case...
// Lock order: IW -> BD
final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream.applyDeletes(readerPool, merge.segments);
if (result.anyDeletes) {
checkpoint();
}
if (!keepFullyDeletedSegments && result.allDeleted != null) {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "drop 100% deleted segments: " + result.allDeleted);
}
for(SegmentInfoPerCommit info : result.allDeleted) {
segmentInfos.remove(info);
if (merge.segments.contains(info)) {
mergingSegments.remove(info);
merge.segments.remove(info);
}
readerPool.drop(info);
}
checkpoint();
}
Map<String,String> details = new HashMap<String,String>();
details.put("mergeMaxNumSegments", ""+merge.maxNumSegments);
details.put("mergeFactor", Integer.toString(merge.segments.size()));
// Bind a new segment name here so even with
// ConcurrentMergePolicy we keep deterministic segment
// names.
final String mergeSegmentName = newSegmentName();
SegmentInfo si = new SegmentInfo(directory, Constants.LUCENE_MAIN_VERSION, mergeSegmentName, -1, false, codec, details, null);
merge.info = new SegmentInfoPerCommit(si, 0, -1L);
// Lock order: IW -> BD
bufferedDeletesStream.prune(segmentInfos);
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "merge seg=" + merge.info.info.name);
}
assert merge.estimatedMergeBytes == 0;
for(SegmentInfoPerCommit info : merge.segments) {
if (info.info.getDocCount() > 0) {
final int delCount = numDeletedDocs(info);
assert delCount <= info.info.getDocCount();
final double delRatio = ((double) delCount)/info.info.getDocCount();
merge.estimatedMergeBytes += info.info.sizeInBytes() * (1.0 - delRatio);
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
final synchronized void mergeFinish(MergePolicy.OneMerge merge) throws IOException {
// forceMerge, addIndexes or finishMerges may be waiting
// on merges to finish.
notifyAll();
// It's possible we are called twice, eg if there was an
// exception inside mergeInit
if (merge.registerDone) {
final List<SegmentInfoPerCommit> sourceSegments = merge.segments;
for(SegmentInfoPerCommit info : sourceSegments) {
mergingSegments.remove(info);
}
merge.registerDone = false;
}
runningMerges.remove(merge);
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
private final synchronized void closeMergeReaders(MergePolicy.OneMerge merge, boolean suppressExceptions) throws IOException {
final int numSegments = merge.readers.size();
Throwable th = null;
boolean drop = !suppressExceptions;
for (int i = 0; i < numSegments; i++) {
final SegmentReader sr = merge.readers.get(i);
if (sr != null) {
try {
final ReadersAndLiveDocs rld = readerPool.get(sr.getSegmentInfo(), false);
// We still hold a ref so it should not have been removed:
assert rld != null;
if (drop) {
rld.dropChanges();
}
rld.release(sr);
readerPool.release(rld);
if (drop) {
readerPool.drop(rld.info);
}
} catch (Throwable t) {
if (th == null) {
th = t;
}
}
merge.readers.set(i, null);
}
}
// If any error occured, throw it.
if (!suppressExceptions && th != null) {
if (th instanceof IOException) throw (IOException) th;
if (th instanceof RuntimeException) throw (RuntimeException) th;
if (th instanceof Error) throw (Error) th;
throw new RuntimeException(th);
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
private int mergeMiddle(MergePolicy.OneMerge merge)
throws CorruptIndexException, IOException {
merge.checkAborted(directory);
final String mergedName = merge.info.info.name;
List<SegmentInfoPerCommit> sourceSegments = merge.segments;
IOContext context = new IOContext(merge.getMergeInfo());
final MergeState.CheckAbort checkAbort = new MergeState.CheckAbort(merge, directory);
final TrackingDirectoryWrapper dirWrapper = new TrackingDirectoryWrapper(directory);
SegmentMerger merger = new SegmentMerger(merge.info.info, infoStream, dirWrapper, config.getTermIndexInterval(), checkAbort,
payloadProcessorProvider, globalFieldNumberMap, context);
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "merging " + segString(merge.segments));
}
merge.readers = new ArrayList<SegmentReader>();
merge.readerLiveDocs = new ArrayList<Bits>();
// This is try/finally to make sure merger's readers are
// closed:
boolean success = false;
try {
int segUpto = 0;
while(segUpto < sourceSegments.size()) {
final SegmentInfoPerCommit info = sourceSegments.get(segUpto);
// Hold onto the "live" reader; we will use this to
// commit merged deletes
final ReadersAndLiveDocs rld = readerPool.get(info, true);
final SegmentReader reader = rld.getMergeReader(context);
assert reader != null;
// Carefully pull the most recent live docs:
final Bits liveDocs;
final int delCount;
synchronized(this) {
// Must sync to ensure BufferedDeletesStream
// cannot change liveDocs/pendingDeleteCount while
// we pull a copy:
liveDocs = rld.getReadOnlyLiveDocs();
delCount = rld.getPendingDeleteCount() + info.getDelCount();
assert rld.verifyDocCounts();
if (infoStream.isEnabled("IW")) {
if (rld.getPendingDeleteCount() != 0) {
infoStream.message("IW", "seg=" + info + " delCount=" + info.getDelCount() + " pendingDelCount=" + rld.getPendingDeleteCount());
} else if (info.getDelCount() != 0) {
infoStream.message("IW", "seg=" + info + " delCount=" + info.getDelCount());
} else {
infoStream.message("IW", "seg=" + info + " no deletes");
}
}
}
merge.readerLiveDocs.add(liveDocs);
merge.readers.add(reader);
assert delCount <= info.info.getDocCount(): "delCount=" + delCount + " info.docCount=" + info.info.getDocCount() + " rld.pendingDeleteCount=" + rld.getPendingDeleteCount() + " info.getDelCount()=" + info.getDelCount();
if (delCount < info.info.getDocCount()) {
merger.add(reader, liveDocs, delCount);
}
segUpto++;
}
merge.checkAborted(directory);
// This is where all the work happens:
MergeState mergeState = merger.merge();
assert mergeState.segmentInfo == merge.info.info;
merge.info.info.setFiles(new HashSet<String>(dirWrapper.getCreatedFiles()));
// Record which codec was used to write the segment
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "merge codec=" + codec + " docCount=" + merge.info.info.getDocCount() + "; merged segment has " +
(mergeState.fieldInfos.hasVectors() ? "vectors" : "no vectors") + "; " +
(mergeState.fieldInfos.hasNorms() ? "norms" : "no norms") + "; " +
(mergeState.fieldInfos.hasDocValues() ? "docValues" : "no docValues") + "; " +
(mergeState.fieldInfos.hasProx() ? "prox" : "no prox") + "; " +
(mergeState.fieldInfos.hasProx() ? "freqs" : "no freqs"));
}
// Very important to do this before opening the reader
// because codec must know if prox was written for
// this segment:
//System.out.println("merger set hasProx=" + merger.hasProx() + " seg=" + merge.info.name);
boolean useCompoundFile;
synchronized (this) { // Guard segmentInfos
useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, merge.info);
}
if (useCompoundFile) {
success = false;
Collection<String> filesToRemove = merge.info.files();
try {
filesToRemove = createCompoundFile(infoStream, directory, checkAbort, merge.info.info, context);
success = true;
} catch (IOException ioe) {
synchronized(this) {
if (merge.isAborted()) {
// This can happen if rollback or close(false)
// is called -- fall through to logic below to
// remove the partially created CFS:
} else {
handleMergeException(ioe, merge);
}
}
} catch (Throwable t) {
handleMergeException(t, merge);
} finally {
if (!success) {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "hit exception creating compound file during merge");
}
synchronized(this) {
deleter.deleteFile(IndexFileNames.segmentFileName(mergedName, "", IndexFileNames.COMPOUND_FILE_EXTENSION));
deleter.deleteFile(IndexFileNames.segmentFileName(mergedName, "", IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION));
deleter.deleteNewFiles(merge.info.files());
}
}
}
// So that, if we hit exc in deleteNewFiles (next)
// or in commitMerge (later), we close the
// per-segment readers in the finally clause below:
success = false;
synchronized(this) {
// delete new non cfs files directly: they were never
// registered with IFD
deleter.deleteNewFiles(filesToRemove);
if (merge.isAborted()) {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "abort merge after building CFS");
}
deleter.deleteFile(IndexFileNames.segmentFileName(mergedName, "", IndexFileNames.COMPOUND_FILE_EXTENSION));
deleter.deleteFile(IndexFileNames.segmentFileName(mergedName, "", IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION));
return 0;
}
}
merge.info.info.setUseCompoundFile(true);
} else {
// So that, if we hit exc in commitMerge (later),
// we close the per-segment readers in the finally
// clause below:
success = false;
}
// Have codec write SegmentInfo. Must do this after
// creating CFS so that 1) .si isn't slurped into CFS,
// and 2) .si reflects useCompoundFile=true change
// above:
boolean success2 = false;
try {
codec.segmentInfoFormat().getSegmentInfosWriter().write(directory, merge.info.info, mergeState.fieldInfos, context);
success2 = true;
} finally {
if (!success2) {
synchronized(this) {
deleter.deleteNewFiles(merge.info.files());
}
}
}
// TODO: ideally we would freeze merge.info here!!
// because any changes after writing the .si will be
// lost...
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", String.format("merged segment size=%.3f MB vs estimate=%.3f MB", merge.info.info.sizeInBytes()/1024./1024., merge.estimatedMergeBytes/1024/1024.));
}
final IndexReaderWarmer mergedSegmentWarmer = config.getMergedSegmentWarmer();
if (poolReaders && mergedSegmentWarmer != null) {
final ReadersAndLiveDocs rld = readerPool.get(merge.info, true);
final SegmentReader sr = rld.getReader(IOContext.READ);
try {
mergedSegmentWarmer.warm(sr);
} finally {
synchronized(this) {
rld.release(sr);
readerPool.release(rld);
}
}
}
// Force READ context because we merge deletes onto
// this reader:
if (!commitMerge(merge)) {
// commitMerge will return false if this merge was aborted
return 0;
}
success = true;
} finally {
// Readers are already closed in commitMerge if we didn't hit
// an exc:
if (!success) {
closeMergeReaders(merge, true);
}
}
return merge.info.info.getDocCount();
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public synchronized String segString() throws IOException {
return segString(segmentInfos);
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public synchronized String segString(Iterable<SegmentInfoPerCommit> infos) throws IOException {
final StringBuilder buffer = new StringBuilder();
for(final SegmentInfoPerCommit info : infos) {
if (buffer.length() > 0) {
buffer.append(' ');
}
buffer.append(segString(info));
}
return buffer.toString();
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public synchronized String segString(SegmentInfoPerCommit info) throws IOException {
return info.toString(info.info.dir, numDeletedDocs(info) - info.getDelCount());
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
private boolean filesExist(SegmentInfos toSync) throws IOException {
Collection<String> files = toSync.files(directory, false);
for(final String fileName: files) {
assert directory.fileExists(fileName): "file " + fileName + " does not exist";
// If this trips it means we are missing a call to
// .checkpoint somewhere, because by the time we
// are called, deleter should know about every
// file referenced by the current head
// segmentInfos:
assert deleter.exists(fileName): "IndexFileDeleter doesn't know about file " + fileName;
}
return true;
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
private void startCommit(final SegmentInfos toSync, final Map<String,String> commitUserData) throws IOException {
assert testPoint("startStartCommit");
assert pendingCommit == null;
if (hitOOM) {
throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot commit");
}
try {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "startCommit(): start");
}
synchronized(this) {
assert lastCommitChangeCount <= changeCount;
if (pendingCommitChangeCount == lastCommitChangeCount) {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", " skip startCommit(): no changes pending");
}
deleter.decRef(filesToCommit);
filesToCommit = null;
return;
}
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "startCommit index=" + segString(toLiveInfos(toSync)) + " changeCount=" + changeCount);
}
assert filesExist(toSync);
if (commitUserData != null) {
toSync.setUserData(commitUserData);
}
}
assert testPoint("midStartCommit");
boolean pendingCommitSet = false;
try {
assert testPoint("midStartCommit2");
synchronized(this) {
assert pendingCommit == null;
assert segmentInfos.getGeneration() == toSync.getGeneration();
// Exception here means nothing is prepared
// (this method unwinds everything it did on
// an exception)
toSync.prepareCommit(directory);
//System.out.println("DONE prepareCommit");
pendingCommitSet = true;
pendingCommit = toSync;
}
// This call can take a long time -- 10s of seconds
// or more. We do it without sync:
boolean success = false;
final Collection<String> filesToSync = toSync.files(directory, false);
try {
directory.sync(filesToSync);
success = true;
} finally {
if (!success) {
pendingCommitSet = false;
pendingCommit = null;
toSync.rollbackCommit(directory);
}
}
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "done all syncs: " + filesToSync);
}
assert testPoint("midStartCommitSuccess");
} finally {
synchronized(this) {
// Have our master segmentInfos record the
// generations we just prepared. We do this
// on error or success so we don't
// double-write a segments_N file.
segmentInfos.updateGeneration(toSync);
if (!pendingCommitSet) {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "hit exception committing segments file");
}
// Hit exception
deleter.decRef(filesToCommit);
filesToCommit = null;
}
}
}
} catch (OutOfMemoryError oom) {
handleOOM(oom, "startCommit");
}
assert testPoint("finishStartCommit");
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public static boolean isLocked(Directory directory) throws IOException {
return directory.makeLock(WRITE_LOCK_NAME).isLocked();
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public static void unlock(Directory directory) throws IOException {
directory.makeLock(IndexWriter.WRITE_LOCK_NAME).release();
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
public synchronized void deleteUnusedFiles() throws IOException {
ensureOpen(false);
deleter.deletePendingFiles();
deleter.revisitPolicy();
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
synchronized void deletePendingFiles() throws IOException {
deleter.deletePendingFiles();
}
// in lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
static final Collection<String> createCompoundFile(InfoStream infoStream, Directory directory, CheckAbort checkAbort, final SegmentInfo info, IOContext context)
throws IOException {
final String fileName = IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION);
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "create compound file " + fileName);
}
// Now merge all added files
Collection<String> files = info.files();
CompoundFileDirectory cfsDir = new CompoundFileDirectory(directory, fileName, context, true);
IOException prior = null;
try {
for (String file : files) {
directory.copy(cfsDir, file, file, context);
checkAbort.work(directory.fileLength(file));
}
} catch(IOException ex) {
prior = ex;
} finally {
IOUtils.closeWhileHandlingException(prior, cfsDir);
}
// Replace all previous files with the CFS/CFE files:
Set<String> siFiles = new HashSet<String>();
siFiles.add(fileName);
siFiles.add(IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION));
info.setFiles(siFiles);
return files;
}
// in lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java
public TermsEnum getOrdTermsEnum(AtomicReader reader) throws IOException {
if (indexedTermsArray == null) {
//System.out.println("GET normal enum");
final Fields fields = reader.fields();
if (fields == null) {
return null;
}
final Terms terms = fields.terms(field);
if (terms == null) {
return null;
} else {
return terms.iterator(null);
}
} else {
//System.out.println("GET wrapped enum ordBase=" + ordBase);
return new OrdWrappedTermsEnum(reader);
}
}
// in lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java
protected void visitTerm(TermsEnum te, int termNum) throws IOException {
}
// in lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java
protected void setActualDocFreq(int termNum, int df) throws IOException {
}
// in lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java
protected void uninvert(final AtomicReader reader, final BytesRef termPrefix) throws IOException {
//System.out.println("DTO uninvert field=" + field + " prefix=" + termPrefix);
final long startTime = System.currentTimeMillis();
prefix = termPrefix == null ? null : BytesRef.deepCopyOf(termPrefix);
final int maxDoc = reader.maxDoc();
final int[] index = new int[maxDoc]; // immediate term numbers, or the index into the byte[] representing the last number
final int[] lastTerm = new int[maxDoc]; // last term we saw for this document
final byte[][] bytes = new byte[maxDoc][]; // list of term numbers for the doc (delta encoded vInts)
final Fields fields = reader.fields();
if (fields == null) {
// No terms
return;
}
final Terms terms = fields.terms(field);
if (terms == null) {
// No terms
return;
}
final TermsEnum te = terms.iterator(null);
final BytesRef seekStart = termPrefix != null ? termPrefix : new BytesRef();
//System.out.println("seekStart=" + seekStart.utf8ToString());
if (te.seekCeil(seekStart) == TermsEnum.SeekStatus.END) {
// No terms match
return;
}
// If we need our "term index wrapper", these will be
// init'd below:
List<BytesRef> indexedTerms = null;
PagedBytes indexedTermsBytes = null;
boolean testedOrd = false;
final Bits liveDocs = reader.getLiveDocs();
// we need a minimum of 9 bytes, but round up to 12 since the space would
// be wasted with most allocators anyway.
byte[] tempArr = new byte[12];
//
// enumerate all terms, and build an intermediate form of the un-inverted field.
//
// During this intermediate form, every document has a (potential) byte[]
// and the int[maxDoc()] array either contains the termNumber list directly
// or the *end* offset of the termNumber list in it's byte array (for faster
// appending and faster creation of the final form).
//
// idea... if things are too large while building, we could do a range of docs
// at a time (but it would be a fair amount slower to build)
// could also do ranges in parallel to take advantage of multiple CPUs
// OPTIONAL: remap the largest df terms to the lowest 128 (single byte)
// values. This requires going over the field first to find the most
// frequent terms ahead of time.
int termNum = 0;
docsEnum = null;
// Loop begins with te positioned to first term (we call
// seek above):
for (;;) {
final BytesRef t = te.term();
if (t == null || (termPrefix != null && !StringHelper.startsWith(t, termPrefix))) {
break;
}
//System.out.println("visit term=" + t.utf8ToString() + " " + t + " termNum=" + termNum);
if (!testedOrd) {
try {
ordBase = (int) te.ord();
//System.out.println("got ordBase=" + ordBase);
} catch (UnsupportedOperationException uoe) {
// Reader cannot provide ord support, so we wrap
// our own support by creating our own terms index:
indexedTerms = new ArrayList<BytesRef>();
indexedTermsBytes = new PagedBytes(15);
//System.out.println("NO ORDS");
}
testedOrd = true;
}
visitTerm(te, termNum);
if (indexedTerms != null && (termNum & indexIntervalMask) == 0) {
// Index this term
sizeOfIndexedStrings += t.length;
BytesRef indexedTerm = new BytesRef();
indexedTermsBytes.copy(t, indexedTerm);
// TODO: really should 1) strip off useless suffix,
// and 2) use FST not array/PagedBytes
indexedTerms.add(indexedTerm);
}
final int df = te.docFreq();
if (df <= maxTermDocFreq) {
docsEnum = te.docs(liveDocs, docsEnum, false);
// dF, but takes deletions into account
int actualDF = 0;
for (;;) {
int doc = docsEnum.nextDoc();
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
//System.out.println(" chunk=" + chunk + " docs");
actualDF ++;
termInstances++;
//System.out.println(" docID=" + doc);
// add TNUM_OFFSET to the term number to make room for special reserved values:
// 0 (end term) and 1 (index into byte array follows)
int delta = termNum - lastTerm[doc] + TNUM_OFFSET;
lastTerm[doc] = termNum;
int val = index[doc];
if ((val & 0xff)==1) {
// index into byte array (actually the end of
// the doc-specific byte[] when building)
int pos = val >>> 8;
int ilen = vIntSize(delta);
byte[] arr = bytes[doc];
int newend = pos+ilen;
if (newend > arr.length) {
// We avoid a doubling strategy to lower memory usage.
// this faceting method isn't for docs with many terms.
// In hotspot, objects have 2 words of overhead, then fields, rounded up to a 64-bit boundary.
// TODO: figure out what array lengths we can round up to w/o actually using more memory
// (how much space does a byte[] take up? Is data preceded by a 32 bit length only?
// It should be safe to round up to the nearest 32 bits in any case.
int newLen = (newend + 3) & 0xfffffffc; // 4 byte alignment
byte[] newarr = new byte[newLen];
System.arraycopy(arr, 0, newarr, 0, pos);
arr = newarr;
bytes[doc] = newarr;
}
pos = writeInt(delta, arr, pos);
index[doc] = (pos<<8) | 1; // update pointer to end index in byte[]
} else {
// OK, this int has data in it... find the end (a zero starting byte - not
// part of another number, hence not following a byte with the high bit set).
int ipos;
if (val==0) {
ipos=0;
} else if ((val & 0x0000ff80)==0) {
ipos=1;
} else if ((val & 0x00ff8000)==0) {
ipos=2;
} else if ((val & 0xff800000)==0) {
ipos=3;
} else {
ipos=4;
}
//System.out.println(" ipos=" + ipos);
int endPos = writeInt(delta, tempArr, ipos);
//System.out.println(" endpos=" + endPos);
if (endPos <= 4) {
//System.out.println(" fits!");
// value will fit in the integer... move bytes back
for (int j=ipos; j<endPos; j++) {
val |= (tempArr[j] & 0xff) << (j<<3);
}
index[doc] = val;
} else {
// value won't fit... move integer into byte[]
for (int j=0; j<ipos; j++) {
tempArr[j] = (byte)val;
val >>>=8;
}
// point at the end index in the byte[]
index[doc] = (endPos<<8) | 1;
bytes[doc] = tempArr;
tempArr = new byte[12];
}
}
}
setActualDocFreq(termNum, actualDF);
}
termNum++;
if (te.next() == null) {
break;
}
}
numTermsInField = termNum;
long midPoint = System.currentTimeMillis();
if (termInstances == 0) {
// we didn't invert anything
// lower memory consumption.
tnums = null;
} else {
this.index = index;
//
// transform intermediate form into the final form, building a single byte[]
// at a time, and releasing the intermediate byte[]s as we go to avoid
// increasing the memory footprint.
//
for (int pass = 0; pass<256; pass++) {
byte[] target = tnums[pass];
int pos=0; // end in target;
if (target != null) {
pos = target.length;
} else {
target = new byte[4096];
}
// loop over documents, 0x00ppxxxx, 0x01ppxxxx, 0x02ppxxxx
// where pp is the pass (which array we are building), and xx is all values.
// each pass shares the same byte[] for termNumber lists.
for (int docbase = pass<<16; docbase<maxDoc; docbase+=(1<<24)) {
int lim = Math.min(docbase + (1<<16), maxDoc);
for (int doc=docbase; doc<lim; doc++) {
//System.out.println(" pass=" + pass + " process docID=" + doc);
int val = index[doc];
if ((val&0xff) == 1) {
int len = val >>> 8;
//System.out.println(" ptr pos=" + pos);
index[doc] = (pos<<8)|1; // change index to point to start of array
if ((pos & 0xff000000) != 0) {
// we only have 24 bits for the array index
throw new IllegalStateException("Too many values for UnInvertedField faceting on field "+field);
}
byte[] arr = bytes[doc];
/*
for(byte b : arr) {
//System.out.println(" b=" + Integer.toHexString((int) b));
}
*/
bytes[doc] = null; // IMPORTANT: allow GC to avoid OOM
if (target.length <= pos + len) {
int newlen = target.length;
/*** we don't have to worry about the array getting too large
* since the "pos" param will overflow first (only 24 bits available)
if ((newlen<<1) <= 0) {
// overflow...
newlen = Integer.MAX_VALUE;
if (newlen <= pos + len) {
throw new SolrException(400,"Too many terms to uninvert field!");
}
} else {
while (newlen <= pos + len) newlen<<=1; // doubling strategy
}
****/
while (newlen <= pos + len) newlen<<=1; // doubling strategy
byte[] newtarget = new byte[newlen];
System.arraycopy(target, 0, newtarget, 0, pos);
target = newtarget;
}
System.arraycopy(arr, 0, target, pos, len);
pos += len + 1; // skip single byte at end and leave it 0 for terminator
}
}
}
// shrink array
if (pos < target.length) {
byte[] newtarget = new byte[pos];
System.arraycopy(target, 0, newtarget, 0, pos);
target = newtarget;
}
tnums[pass] = target;
if ((pass << 16) > maxDoc)
break;
}
}
if (indexedTerms != null) {
indexedTermsArray = indexedTerms.toArray(new BytesRef[indexedTerms.size()]);
}
long endTime = System.currentTimeMillis();
total_time = (int)(endTime-startTime);
phase1_time = (int)(midPoint-startTime);
}
// in lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java
Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
return termsEnum.docs(liveDocs, reuse, needsFreqs);
}
// in lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java
Override
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
return termsEnum.docsAndPositions(liveDocs, reuse, needsOffsets);
}
// in lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java
Override
public BytesRef next() throws IOException {
ord++;
if (termsEnum.next() == null) {
term = null;
return null;
}
return setTerm(); // this is extra work if we know we are in bounds...
}
// in lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java
Override
public int docFreq() throws IOException {
return termsEnum.docFreq();
}
// in lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java
Override
public long totalTermFreq() throws IOException {
return termsEnum.totalTermFreq();
}
// in lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java
Override
public long ord() throws IOException {
return ordBase + ord;
}
// in lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java
Override
public SeekStatus seekCeil(BytesRef target, boolean useCache) throws IOException {
// already here
if (term != null && term.equals(target)) {
return SeekStatus.FOUND;
}
int startIdx = Arrays.binarySearch(indexedTermsArray, target);
if (startIdx >= 0) {
// we hit the term exactly... lucky us!
TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(target);
assert seekStatus == TermsEnum.SeekStatus.FOUND;
ord = startIdx << indexIntervalBits;
setTerm();
assert term != null;
return SeekStatus.FOUND;
}
// we didn't hit the term exactly
startIdx = -startIdx-1;
if (startIdx == 0) {
// our target occurs *before* the first term
TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(target);
assert seekStatus == TermsEnum.SeekStatus.NOT_FOUND;
ord = 0;
setTerm();
assert term != null;
return SeekStatus.NOT_FOUND;
}
// back up to the start of the block
startIdx--;
if ((ord >> indexIntervalBits) == startIdx && term != null && term.compareTo(target) <= 0) {
// we are already in the right block and the current term is before the term we want,
// so we don't need to seek.
} else {
// seek to the right block
TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(indexedTermsArray[startIdx]);
assert seekStatus == TermsEnum.SeekStatus.FOUND;
ord = startIdx << indexIntervalBits;
setTerm();
assert term != null; // should be non-null since it's in the index
}
while (term != null && term.compareTo(target) < 0) {
next();
}
if (term == null) {
return SeekStatus.END;
} else if (term.compareTo(target) == 0) {
return SeekStatus.FOUND;
} else {
return SeekStatus.NOT_FOUND;
}
}
// in lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java
Override
public void seekExact(long targetOrd) throws IOException {
int delta = (int) (targetOrd - ordBase - ord);
//System.out.println(" seek(ord) targetOrd=" + targetOrd + " delta=" + delta + " ord=" + ord + " ii=" + indexInterval);
if (delta < 0 || delta > indexInterval) {
final int idx = (int) (targetOrd >>> indexIntervalBits);
final BytesRef base = indexedTermsArray[idx];
//System.out.println(" do seek term=" + base.utf8ToString());
ord = idx << indexIntervalBits;
delta = (int) (targetOrd - ord);
final TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(base, true);
assert seekStatus == TermsEnum.SeekStatus.FOUND;
} else {
//System.out.println("seek w/in block");
}
while (--delta >= 0) {
BytesRef br = termsEnum.next();
if (br == null) {
assert false;
return;
}
ord++;
}
setTerm();
assert term != null;
}
// in lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java
private BytesRef setTerm() throws IOException {
term = termsEnum.term();
//System.out.println(" setTerm() term=" + term.utf8ToString() + " vs prefix=" + (prefix == null ? "null" : prefix.utf8ToString()));
if (prefix != null && !StringHelper.startsWith(term, prefix)) {
term = null;
}
return term;
}
// in lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java
public BytesRef lookupTerm(TermsEnum termsEnum, int ord) throws IOException {
termsEnum.seekExact(ord);
return termsEnum.term();
}
// in lucene/core/src/java/org/apache/lucene/index/TermsHashPerField.java
Override
boolean start(IndexableField[] fields, int count) throws IOException {
doCall = consumer.start(fields, count);
bytesHash.reinit();
if (nextPerField != null) {
doNextCall = nextPerField.start(fields, count);
}
return doCall || doNextCall;
}
// in lucene/core/src/java/org/apache/lucene/index/TermsHashPerField.java
public void add(int textStart) throws IOException {
int termID = bytesHash.addByPoolOffset(textStart);
if (termID >= 0) { // New posting
// First time we are seeing this token since we last
// flushed the hash.
// Init stream slices
if (numPostingInt + intPool.intUpto > DocumentsWriterPerThread.INT_BLOCK_SIZE)
intPool.nextBuffer();
if (ByteBlockPool.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt*ByteBlockPool.FIRST_LEVEL_SIZE) {
bytePool.nextBuffer();
}
intUptos = intPool.buffer;
intUptoStart = intPool.intUpto;
intPool.intUpto += streamCount;
postingsArray.intStarts[termID] = intUptoStart + intPool.intOffset;
for(int i=0;i<streamCount;i++) {
final int upto = bytePool.newSlice(ByteBlockPool.FIRST_LEVEL_SIZE);
intUptos[intUptoStart+i] = upto + bytePool.byteOffset;
}
postingsArray.byteStarts[termID] = intUptos[intUptoStart];
consumer.newTerm(termID);
} else {
termID = (-termID)-1;
int intStart = postingsArray.intStarts[termID];
intUptos = intPool.buffers[intStart >> DocumentsWriterPerThread.INT_BLOCK_SHIFT];
intUptoStart = intStart & DocumentsWriterPerThread.INT_BLOCK_MASK;
consumer.addTerm(termID);
}
}
// in lucene/core/src/java/org/apache/lucene/index/TermsHashPerField.java
Override
void add() throws IOException {
// We are first in the chain so we must "intern" the
// term text into textStart address
// Get the text & hash of this term.
int termID;
try {
termID = bytesHash.add(termBytesRef, termAtt.fillBytesRef());
} catch (MaxBytesLengthExceededException e) {
// Not enough room in current block
// Just skip this term, to remain as robust as
// possible during indexing. A TokenFilter
// can be inserted into the analyzer chain if
// other behavior is wanted (pruning the term
// to a prefix, throwing an exception, etc).
if (docState.maxTermPrefix == null) {
final int saved = termBytesRef.length;
try {
termBytesRef.length = Math.min(30, DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8);
docState.maxTermPrefix = termBytesRef.toString();
} finally {
termBytesRef.length = saved;
}
}
consumer.skippingLongTerm();
return;
}
if (termID >= 0) {// New posting
bytesHash.byteStart(termID);
// Init stream slices
if (numPostingInt + intPool.intUpto > DocumentsWriterPerThread.INT_BLOCK_SIZE) {
intPool.nextBuffer();
}
if (ByteBlockPool.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt*ByteBlockPool.FIRST_LEVEL_SIZE) {
bytePool.nextBuffer();
}
intUptos = intPool.buffer;
intUptoStart = intPool.intUpto;
intPool.intUpto += streamCount;
postingsArray.intStarts[termID] = intUptoStart + intPool.intOffset;
for(int i=0;i<streamCount;i++) {
final int upto = bytePool.newSlice(ByteBlockPool.FIRST_LEVEL_SIZE);
intUptos[intUptoStart+i] = upto + bytePool.byteOffset;
}
postingsArray.byteStarts[termID] = intUptos[intUptoStart];
consumer.newTerm(termID);
} else {
termID = (-termID)-1;
final int intStart = postingsArray.intStarts[termID];
intUptos = intPool.buffers[intStart >> DocumentsWriterPerThread.INT_BLOCK_SHIFT];
intUptoStart = intStart & DocumentsWriterPerThread.INT_BLOCK_MASK;
consumer.addTerm(termID);
}
if (doNextCall)
nextPerField.add(postingsArray.textStarts[termID]);
}
// in lucene/core/src/java/org/apache/lucene/index/TermsHashPerField.java
Override
void finish() throws IOException {
consumer.finish();
if (nextPerField != null)
nextPerField.finish();
}
// in lucene/core/src/java/org/apache/lucene/index/BufferedDeletesStream.java
public synchronized ApplyDeletesResult applyDeletes(IndexWriter.ReaderPool readerPool, List<SegmentInfoPerCommit> infos) throws IOException {
final long t0 = System.currentTimeMillis();
if (infos.size() == 0) {
return new ApplyDeletesResult(false, nextGen++, null);
}
assert checkDeleteStats();
if (!any()) {
if (infoStream.isEnabled("BD")) {
infoStream.message("BD", "applyDeletes: no deletes; skipping");
}
return new ApplyDeletesResult(false, nextGen++, null);
}
if (infoStream.isEnabled("BD")) {
infoStream.message("BD", "applyDeletes: infos=" + infos + " packetCount=" + deletes.size());
}
List<SegmentInfoPerCommit> infos2 = new ArrayList<SegmentInfoPerCommit>();
infos2.addAll(infos);
Collections.sort(infos2, sortSegInfoByDelGen);
CoalescedDeletes coalescedDeletes = null;
boolean anyNewDeletes = false;
int infosIDX = infos2.size()-1;
int delIDX = deletes.size()-1;
List<SegmentInfoPerCommit> allDeleted = null;
while (infosIDX >= 0) {
//System.out.println("BD: cycle delIDX=" + delIDX + " infoIDX=" + infosIDX);
final FrozenBufferedDeletes packet = delIDX >= 0 ? deletes.get(delIDX) : null;
final SegmentInfoPerCommit info = infos2.get(infosIDX);
final long segGen = info.getBufferedDeletesGen();
if (packet != null && segGen < packet.delGen()) {
//System.out.println(" coalesce");
if (coalescedDeletes == null) {
coalescedDeletes = new CoalescedDeletes();
}
if (!packet.isSegmentPrivate) {
/*
* Only coalesce if we are NOT on a segment private del packet: the segment private del packet
* must only applied to segments with the same delGen. Yet, if a segment is already deleted
* from the SI since it had no more documents remaining after some del packets younger than
* its segPrivate packet (higher delGen) have been applied, the segPrivate packet has not been
* removed.
*/
coalescedDeletes.update(packet);
}
delIDX--;
} else if (packet != null && segGen == packet.delGen()) {
assert packet.isSegmentPrivate : "Packet and Segments deletegen can only match on a segment private del packet gen=" + segGen;
//System.out.println(" eq");
// Lock order: IW -> BD -> RP
assert readerPool.infoIsLive(info);
final ReadersAndLiveDocs rld = readerPool.get(info, true);
final SegmentReader reader = rld.getReader(IOContext.READ);
int delCount = 0;
final boolean segAllDeletes;
try {
if (coalescedDeletes != null) {
//System.out.println(" del coalesced");
delCount += applyTermDeletes(coalescedDeletes.termsIterable(), rld, reader);
delCount += applyQueryDeletes(coalescedDeletes.queriesIterable(), rld, reader);
}
//System.out.println(" del exact");
// Don't delete by Term here; DocumentsWriterPerThread
// already did that on flush:
delCount += applyQueryDeletes(packet.queriesIterable(), rld, reader);
final int fullDelCount = rld.info.getDelCount() + rld.getPendingDeleteCount();
assert fullDelCount <= rld.info.info.getDocCount();
segAllDeletes = fullDelCount == rld.info.info.getDocCount();
} finally {
rld.release(reader);
readerPool.release(rld);
}
anyNewDeletes |= delCount > 0;
if (segAllDeletes) {
if (allDeleted == null) {
allDeleted = new ArrayList<SegmentInfoPerCommit>();
}
allDeleted.add(info);
}
if (infoStream.isEnabled("BD")) {
infoStream.message("BD", "seg=" + info + " segGen=" + segGen + " segDeletes=[" + packet + "]; coalesced deletes=[" + (coalescedDeletes == null ? "null" : coalescedDeletes) + "] newDelCount=" + delCount + (segAllDeletes ? " 100% deleted" : ""));
}
if (coalescedDeletes == null) {
coalescedDeletes = new CoalescedDeletes();
}
/*
* Since we are on a segment private del packet we must not
* update the coalescedDeletes here! We can simply advance to the
* next packet and seginfo.
*/
delIDX--;
infosIDX--;
info.setBufferedDeletesGen(nextGen);
} else {
//System.out.println(" gt");
if (coalescedDeletes != null) {
// Lock order: IW -> BD -> RP
assert readerPool.infoIsLive(info);
final ReadersAndLiveDocs rld = readerPool.get(info, true);
final SegmentReader reader = rld.getReader(IOContext.READ);
int delCount = 0;
final boolean segAllDeletes;
try {
delCount += applyTermDeletes(coalescedDeletes.termsIterable(), rld, reader);
delCount += applyQueryDeletes(coalescedDeletes.queriesIterable(), rld, reader);
final int fullDelCount = rld.info.getDelCount() + rld.getPendingDeleteCount();
assert fullDelCount <= rld.info.info.getDocCount();
segAllDeletes = fullDelCount == rld.info.info.getDocCount();
} finally {
rld.release(reader);
readerPool.release(rld);
}
anyNewDeletes |= delCount > 0;
if (segAllDeletes) {
if (allDeleted == null) {
allDeleted = new ArrayList<SegmentInfoPerCommit>();
}
allDeleted.add(info);
}
if (infoStream.isEnabled("BD")) {
infoStream.message("BD", "seg=" + info + " segGen=" + segGen + " coalesced deletes=[" + (coalescedDeletes == null ? "null" : coalescedDeletes) + "] newDelCount=" + delCount + (segAllDeletes ? " 100% deleted" : ""));
}
}
info.setBufferedDeletesGen(nextGen);
infosIDX--;
}
}
assert checkDeleteStats();
if (infoStream.isEnabled("BD")) {
infoStream.message("BD", "applyDeletes took " + (System.currentTimeMillis()-t0) + " msec");
}
// assert infos != segmentInfos || !any() : "infos=" + infos + " segmentInfos=" + segmentInfos + " any=" + any;
return new ApplyDeletesResult(anyNewDeletes, nextGen++, allDeleted);
}
// in lucene/core/src/java/org/apache/lucene/index/BufferedDeletesStream.java
private synchronized long applyTermDeletes(Iterable<Term> termsIter, ReadersAndLiveDocs rld, SegmentReader reader) throws IOException {
long delCount = 0;
Fields fields = reader.fields();
if (fields == null) {
// This reader has no postings
return 0;
}
TermsEnum termsEnum = null;
String currentField = null;
DocsEnum docs = null;
assert checkDeleteTerm(null);
boolean any = false;
//System.out.println(Thread.currentThread().getName() + " del terms reader=" + reader);
for (Term term : termsIter) {
// Since we visit terms sorted, we gain performance
// by re-using the same TermsEnum and seeking only
// forwards
if (!term.field().equals(currentField)) {
assert currentField == null || currentField.compareTo(term.field()) < 0;
currentField = term.field();
Terms terms = fields.terms(currentField);
if (terms != null) {
termsEnum = terms.iterator(null);
} else {
termsEnum = null;
}
}
if (termsEnum == null) {
continue;
}
assert checkDeleteTerm(term);
// System.out.println(" term=" + term);
if (termsEnum.seekExact(term.bytes(), false)) {
DocsEnum docsEnum = termsEnum.docs(rld.getLiveDocs(), docs, false);
//System.out.println("BDS: got docsEnum=" + docsEnum);
if (docsEnum != null) {
while (true) {
final int docID = docsEnum.nextDoc();
//System.out.println(Thread.currentThread().getName() + " del term=" + term + " doc=" + docID);
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
// NOTE: there is no limit check on the docID
// when deleting by Term (unlike by Query)
// because on flush we apply all Term deletes to
// each segment. So all Term deleting here is
// against prior segments:
if (!any) {
rld.initWritableLiveDocs();
any = true;
}
if (rld.delete(docID)) {
delCount++;
}
}
}
}
}
return delCount;
}
// in lucene/core/src/java/org/apache/lucene/index/BufferedDeletesStream.java
private static long applyQueryDeletes(Iterable<QueryAndLimit> queriesIter, ReadersAndLiveDocs rld, final SegmentReader reader) throws IOException {
long delCount = 0;
final AtomicReaderContext readerContext = reader.getTopReaderContext();
boolean any = false;
for (QueryAndLimit ent : queriesIter) {
Query query = ent.query;
int limit = ent.limit;
final DocIdSet docs = new QueryWrapperFilter(query).getDocIdSet(readerContext, reader.getLiveDocs());
if (docs != null) {
final DocIdSetIterator it = docs.iterator();
if (it != null) {
while(true) {
int doc = it.nextDoc();
if (doc >= limit) {
break;
}
if (!any) {
rld.initWritableLiveDocs();
any = true;
}
if (rld.delete(doc)) {
delCount++;
}
}
}
}
}
return delCount;
}
// in lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java
public long sizeInBytes() throws IOException {
if (sizeInBytes == -1) {
long sum = 0;
for (final String fileName : files()) {
sum += dir.fileLength(fileName);
}
sizeInBytes = sum;
}
return sizeInBytes;
}
// in lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java
public Set<String> files() throws IOException {
if (setFiles == null) {
throw new IllegalStateException("files were not computed yet");
}
return Collections.unmodifiableSet(setFiles);
}
// in lucene/core/src/java/org/apache/lucene/index/BaseCompositeReader.java
Override
public final Fields getTermVectors(int docID) throws IOException {
ensureOpen();
final int i = readerIndex(docID); // find subreader num
return subReaders[i].getTermVectors(docID - starts[i]); // dispatch to subreader
}
// in lucene/core/src/java/org/apache/lucene/index/BaseCompositeReader.java
Override
public final void document(int docID, StoredFieldVisitor visitor) throws CorruptIndexException, IOException {
ensureOpen();
final int i = readerIndex(docID); // find subreader num
subReaders[i].document(docID - starts[i], visitor); // dispatch to subreader
}
// in lucene/core/src/java/org/apache/lucene/index/BaseCompositeReader.java
Override
public final int docFreq(String field, BytesRef t) throws IOException {
ensureOpen();
int total = 0; // sum freqs in subreaders
for (int i = 0; i < subReaders.length; i++) {
total += subReaders[i].docFreq(field, t);
}
return total;
}
// in lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java
Override
void finish() throws IOException {
if (!doVectors || termsHashPerField.bytesHash.size() == 0) {
return;
}
termsWriter.addFieldToFlush(this);
}
// in lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumerPerField.java
void finishDocument() throws IOException {
assert docState.testPoint("TermVectorsTermsWriterPerField.finish start");
final int numPostings = termsHashPerField.bytesHash.size();
final BytesRef flushTerm = termsWriter.flushTerm;
assert numPostings >= 0;
if (numPostings > maxNumPostings)
maxNumPostings = numPostings;
// This is called once, after inverting all occurrences
// of a given field in the doc. At this point we flush
// our hash into the DocWriter.
assert termsWriter.vectorFieldsInOrder(fieldInfo);
TermVectorsPostingsArray postings = (TermVectorsPostingsArray) termsHashPerField.postingsArray;
final TermVectorsWriter tv = termsWriter.writer;
final int[] termIDs = termsHashPerField.sortPostings(tv.getComparator());
tv.startField(fieldInfo, numPostings, doVectorPositions, doVectorOffsets);
final ByteSliceReader posReader = doVectorPositions ? termsWriter.vectorSliceReaderPos : null;
final ByteSliceReader offReader = doVectorOffsets ? termsWriter.vectorSliceReaderOff : null;
final ByteBlockPool termBytePool = termsHashPerField.termBytePool;
for(int j=0;j<numPostings;j++) {
final int termID = termIDs[j];
final int freq = postings.freqs[termID];
// Get BytesRef
termBytePool.setBytesRef(flushTerm, postings.textStarts[termID]);
tv.startTerm(flushTerm, freq);
if (doVectorPositions || doVectorOffsets) {
if (posReader != null) {
termsHashPerField.initReader(posReader, termID, 0);
}
if (offReader != null) {
termsHashPerField.initReader(offReader, termID, 1);
}
tv.addProx(freq, posReader, offReader);
}
}
termsHashPerField.reset();
// commit the termVectors once successful - FI will otherwise reset them
fieldInfo.setStoreTermVectors();
}
// in lucene/core/src/java/org/apache/lucene/index/MultiTerms.java
Override
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
final List<MultiTermsEnum.TermsEnumIndex> termsEnums = new ArrayList<MultiTermsEnum.TermsEnumIndex>();
for(int i=0;i<subs.length;i++) {
final TermsEnum termsEnum = subs[i].intersect(compiled, startTerm);
if (termsEnum != null) {
termsEnums.add(new MultiTermsEnum.TermsEnumIndex(termsEnum, i));
}
}
if (termsEnums.size() > 0) {
return new MultiTermsEnum(subSlices).reset(termsEnums.toArray(MultiTermsEnum.TermsEnumIndex.EMPTY_ARRAY));
} else {
return TermsEnum.EMPTY;
}
}
// in lucene/core/src/java/org/apache/lucene/index/MultiTerms.java
Override
public TermsEnum iterator(TermsEnum reuse) throws IOException {
final List<MultiTermsEnum.TermsEnumIndex> termsEnums = new ArrayList<MultiTermsEnum.TermsEnumIndex>();
for(int i=0;i<subs.length;i++) {
final TermsEnum termsEnum = subs[i].iterator(null);
if (termsEnum != null) {
termsEnums.add(new MultiTermsEnum.TermsEnumIndex(termsEnum, i));
}
}
if (termsEnums.size() > 0) {
return new MultiTermsEnum(subSlices).reset(termsEnums.toArray(MultiTermsEnum.TermsEnumIndex.EMPTY_ARRAY));
} else {
return TermsEnum.EMPTY;
}
}
// in lucene/core/src/java/org/apache/lucene/index/MultiTerms.java
Override
public long size() throws IOException {
return -1;
}
// in lucene/core/src/java/org/apache/lucene/index/MultiTerms.java
Override
public long getSumTotalTermFreq() throws IOException {
long sum = 0;
for(Terms terms : subs) {
final long v = terms.getSumTotalTermFreq();
if (v == -1) {
return -1;
}
sum += v;
}
return sum;
}
// in lucene/core/src/java/org/apache/lucene/index/MultiTerms.java
Override
public long getSumDocFreq() throws IOException {
long sum = 0;
for(Terms terms : subs) {
final long v = terms.getSumDocFreq();
if (v == -1) {
return -1;
}
sum += v;
}
return sum;
}
// in lucene/core/src/java/org/apache/lucene/index/MultiTerms.java
Override
public int getDocCount() throws IOException {
int sum = 0;
for(Terms terms : subs) {
final int v = terms.getDocCount();
if (v == -1) {
return -1;
}
sum += v;
}
return sum;
}
// in lucene/core/src/java/org/apache/lucene/index/StoredFieldsConsumer.java
public void flush(SegmentWriteState state) throws IOException {
int numDocs = state.segmentInfo.getDocCount();
if (numDocs > 0) {
// It's possible that all documents seen in this segment
// hit non-aborting exceptions, in which case we will
// not have yet init'd the FieldsWriter:
initFieldsWriter(state.context);
fill(numDocs);
}
if (fieldsWriter != null) {
try {
fieldsWriter.finish(state.fieldInfos, numDocs);
} finally {
fieldsWriter.close();
fieldsWriter = null;
lastDocID = 0;
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/StoredFieldsConsumer.java
private synchronized void initFieldsWriter(IOContext context) throws IOException {
if (fieldsWriter == null) {
fieldsWriter = codec.storedFieldsFormat().fieldsWriter(docWriter.directory, docWriter.getSegmentInfo(), context);
lastDocID = 0;
}
}
// in lucene/core/src/java/org/apache/lucene/index/StoredFieldsConsumer.java
void fill(int docID) throws IOException {
// We must "catch up" for all docs before us
// that had no stored fields:
while(lastDocID < docID) {
fieldsWriter.startDocument(0);
lastDocID++;
}
}
// in lucene/core/src/java/org/apache/lucene/index/StoredFieldsConsumer.java
void finishDocument() throws IOException {
assert docWriter.writer.testPoint("StoredFieldsWriter.finishDocument start");
initFieldsWriter(IOContext.DEFAULT);
fill(docState.docID);
if (fieldsWriter != null && numStoredFields > 0) {
fieldsWriter.startDocument(numStoredFields);
for (int i = 0; i < numStoredFields; i++) {
fieldsWriter.writeField(fieldInfos[i], storedFields[i]);
}
lastDocID++;
}
reset();
assert docWriter.writer.testPoint("StoredFieldsWriter.finishDocument end");
}
// in lucene/core/src/java/org/apache/lucene/index/StoredFieldsConsumer.java
public void addField(IndexableField field, FieldInfo fieldInfo) throws IOException {
if (numStoredFields == storedFields.length) {
int newSize = ArrayUtil.oversize(numStoredFields + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
IndexableField[] newArray = new IndexableField[newSize];
System.arraycopy(storedFields, 0, newArray, 0, numStoredFields);
storedFields = newArray;
FieldInfo[] newInfoArray = new FieldInfo[newSize];
System.arraycopy(fieldInfos, 0, newInfoArray, 0, numStoredFields);
fieldInfos = newInfoArray;
}
storedFields[numStoredFields] = field;
fieldInfos[numStoredFields] = fieldInfo;
numStoredFields++;
assert docState.testPoint("StoredFieldsWriterPerThread.processFields.writeField");
}
// in lucene/core/src/java/org/apache/lucene/index/PersistentSnapshotDeletionPolicy.java
public static Map<String, String> readSnapshotsInfo(Directory dir) throws IOException {
IndexReader r = DirectoryReader.open(dir);
Map<String, String> snapshots = new HashMap<String, String>();
try {
int numDocs = r.numDocs();
// index is allowed to have exactly one document or 0.
if (numDocs == 1) {
Document doc = r.document(r.maxDoc() - 1);
if (doc.getField(SNAPSHOTS_ID) == null) {
throw new IllegalStateException("directory is not a valid snapshots store!");
}
doc.removeField(SNAPSHOTS_ID);
for (IndexableField f : doc) {
snapshots.put(f.name(), f.stringValue());
}
} else if (numDocs != 0) {
throw new IllegalStateException(
"should be at most 1 document in the snapshots directory: " + numDocs);
}
} finally {
r.close();
}
return snapshots;
}
// in lucene/core/src/java/org/apache/lucene/index/PersistentSnapshotDeletionPolicy.java
Override
public synchronized void onInit(List<? extends IndexCommit> commits)
throws IOException {
// super.onInit() needs to be called first to ensure that initialization
// behaves as expected. The superclass, SnapshotDeletionPolicy, ensures
// that any snapshot IDs with empty IndexCommits are released. Since this
// happens, this class needs to persist these changes.
super.onInit(commits);
persistSnapshotInfos(null, null);
}
// in lucene/core/src/java/org/apache/lucene/index/PersistentSnapshotDeletionPolicy.java
Override
public synchronized IndexCommit snapshot(String id) throws IOException {
checkSnapshotted(id);
if (SNAPSHOTS_ID.equals(id)) {
throw new IllegalArgumentException(id + " is reserved and cannot be used as a snapshot id");
}
persistSnapshotInfos(id, lastCommit.getSegmentsFileName());
return super.snapshot(id);
}
// in lucene/core/src/java/org/apache/lucene/index/PersistentSnapshotDeletionPolicy.java
Override
public synchronized void release(String id) throws IOException {
super.release(id);
persistSnapshotInfos(null, null);
}
// in lucene/core/src/java/org/apache/lucene/index/PersistentSnapshotDeletionPolicy.java
public void close() throws CorruptIndexException, IOException {
writer.close();
}
// in lucene/core/src/java/org/apache/lucene/index/PersistentSnapshotDeletionPolicy.java
private void persistSnapshotInfos(String id, String segment) throws IOException {
writer.deleteAll();
Document d = new Document();
FieldType ft = new FieldType();
ft.setStored(true);
d.add(new Field(SNAPSHOTS_ID, "", ft));
for (Entry<String, String> e : super.getSnapshots().entrySet()) {
d.add(new Field(e.getKey(), e.getValue(), ft));
}
if (id != null) {
d.add(new Field(id, segment, ft));
}
writer.addDocument(d);
writer.commit();
}
// in lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java
void decRef() throws IOException {
//System.out.println("core.decRef seg=" + owner.getSegmentInfo() + " rc=" + ref);
if (ref.decrementAndGet() == 0) {
IOUtils.close(termVectorsLocal, fieldsReaderLocal, fields, perDocProducer,
termVectorsReaderOrig, fieldsReaderOrig, cfsReader, norms);
notifyCoreClosedListeners();
}
}
// in lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java
Override
void flush(Map<String, TermsHashConsumerPerField> fieldsToFlush, final SegmentWriteState state) throws IOException {
if (writer != null) {
int numDocs = state.segmentInfo.getDocCount();
// At least one doc in this run had term vectors enabled
try {
fill(numDocs);
assert state.segmentInfo != null;
writer.finish(state.fieldInfos, numDocs);
} finally {
IOUtils.close(writer);
writer = null;
lastDocID = 0;
hasVectors = false;
}
}
for (final TermsHashConsumerPerField field : fieldsToFlush.values() ) {
TermVectorsConsumerPerField perField = (TermVectorsConsumerPerField) field;
perField.termsHashPerField.reset();
perField.shrinkHash();
}
}
// in lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java
void fill(int docID) throws IOException {
while(lastDocID < docID) {
writer.startDocument(0);
lastDocID++;
}
}
// in lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java
private final void initTermVectorsWriter() throws IOException {
if (writer == null) {
IOContext context = new IOContext(new FlushInfo(docWriter.getNumDocsInRAM(), docWriter.bytesUsed()));
writer = docWriter.codec.termVectorsFormat().vectorsWriter(docWriter.directory, docWriter.getSegmentInfo(), context);
lastDocID = 0;
}
}
// in lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java
Override
void finishDocument(TermsHash termsHash) throws IOException {
assert docWriter.writer.testPoint("TermVectorsTermsWriter.finishDocument start");
if (!hasVectors) {
return;
}
initTermVectorsWriter();
fill(docState.docID);
// Append term vectors to the real outputs:
writer.startDocument(numVectorFields);
for (int i = 0; i < numVectorFields; i++) {
perFields[i].finishDocument();
}
assert lastDocID == docState.docID: "lastDocID=" + lastDocID + " docState.docID=" + docState.docID;
lastDocID++;
termsHash.reset();
reset();
assert docWriter.writer.testPoint("TermVectorsTermsWriter.finishDocument end");
}
// in lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java
Override
void startDocument() throws IOException {
assert clearLastVectorFieldName();
reset();
}
// in lucene/core/src/java/org/apache/lucene/index/SnapshotDeletionPolicy.java
Override
public Collection<String> getFileNames() throws IOException {
return cp.getFileNames();
}
// in lucene/core/src/java/org/apache/lucene/index/SnapshotDeletionPolicy.java
Override
public Map<String, String> getUserData() throws IOException {
return cp.getUserData();
}
// in lucene/core/src/java/org/apache/lucene/index/SnapshotDeletionPolicy.java
public synchronized void onCommit(List<? extends IndexCommit> commits)
throws IOException {
primary.onCommit(wrapCommits(commits));
lastCommit = commits.get(commits.size() - 1);
}
// in lucene/core/src/java/org/apache/lucene/index/SnapshotDeletionPolicy.java
public synchronized void onInit(List<? extends IndexCommit> commits)
throws IOException {
primary.onInit(wrapCommits(commits));
lastCommit = commits.get(commits.size() - 1);
/*
* Assign snapshotted IndexCommits to their correct snapshot IDs as
* specified in the constructor.
*/
for (IndexCommit commit : commits) {
Set<String> ids = segmentsFileToIDs.get(commit.getSegmentsFileName());
if (ids != null) {
for (String id : ids) {
idToSnapshot.get(id).commit = commit;
}
}
}
/*
* Second, see if there are any instances where a snapshot ID was specified
* in the constructor but an IndexCommit doesn't exist. In this case, the ID
* should be removed.
*
* Note: This code is protective for extreme cases where IDs point to
* non-existent segments. As the constructor should have received its
* information via a call to getSnapshots(), the data should be well-formed.
*/
// Find lost snapshots
ArrayList<String> idsToRemove = null;
for (Entry<String, SnapshotInfo> e : idToSnapshot.entrySet()) {
if (e.getValue().commit == null) {
if (idsToRemove == null) {
idsToRemove = new ArrayList<String>();
}
idsToRemove.add(e.getKey());
}
}
// Finally, remove those 'lost' snapshots.
if (idsToRemove != null) {
for (String id : idsToRemove) {
SnapshotInfo info = idToSnapshot.remove(id);
segmentsFileToIDs.remove(info.segmentsFileName);
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/SnapshotDeletionPolicy.java
public synchronized void release(String id) throws IOException {
SnapshotInfo info = idToSnapshot.remove(id);
if (info == null) {
throw new IllegalStateException("Snapshot doesn't exist: " + id);
}
Set<String> ids = segmentsFileToIDs.get(info.segmentsFileName);
if (ids != null) {
ids.remove(id);
if (ids.size() == 0) {
segmentsFileToIDs.remove(info.segmentsFileName);
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/SnapshotDeletionPolicy.java
public synchronized IndexCommit snapshot(String id) throws IOException {
if (lastCommit == null) {
// no commit exists. Really shouldn't happen, but might be if SDP is
// accessed before onInit or onCommit were called.
throw new IllegalStateException("No index commit to snapshot");
}
// Can't use the same snapshot ID twice...
checkSnapshotted(id);
registerSnapshotInfo(id, lastCommit.getSegmentsFileName(), lastCommit);
return lastCommit;
}
// in lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
void abort() throws IOException {
hasAborted = aborting = true;
try {
if (infoStream.isEnabled("DWPT")) {
infoStream.message("DWPT", "now abort");
}
try {
consumer.abort();
} catch (Throwable t) {
}
pendingDeletes.clear();
deleteSlice = deleteQueue.newSlice();
// Reset all postings data
doAfterFlush();
} finally {
aborting = false;
if (infoStream.isEnabled("DWPT")) {
infoStream.message("DWPT", "done abort");
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
public void updateDocument(Iterable<? extends IndexableField> doc, Analyzer analyzer, Term delTerm) throws IOException {
assert writer.testPoint("DocumentsWriterPerThread addDocument start");
assert deleteQueue != null;
docState.doc = doc;
docState.analyzer = analyzer;
docState.docID = numDocsInRAM;
if (segmentInfo == null) {
initSegmentInfo();
}
if (INFO_VERBOSE && infoStream.isEnabled("DWPT")) {
infoStream.message("DWPT", Thread.currentThread().getName() + " update delTerm=" + delTerm + " docID=" + docState.docID + " seg=" + segmentInfo.name);
}
boolean success = false;
try {
try {
consumer.processDocument(fieldInfos);
} finally {
docState.clear();
}
success = true;
} finally {
if (!success) {
if (!aborting) {
// mark document as deleted
deleteDocID(docState.docID);
numDocsInRAM++;
} else {
abort();
}
}
}
success = false;
try {
consumer.finishDocument();
success = true;
} finally {
if (!success) {
abort();
}
}
finishDocument(delTerm);
}
// in lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
public int updateDocuments(Iterable<? extends Iterable<? extends IndexableField>> docs, Analyzer analyzer, Term delTerm) throws IOException {
assert writer.testPoint("DocumentsWriterPerThread addDocuments start");
assert deleteQueue != null;
docState.analyzer = analyzer;
if (segmentInfo == null) {
initSegmentInfo();
}
if (INFO_VERBOSE && infoStream.isEnabled("DWPT")) {
infoStream.message("DWPT", Thread.currentThread().getName() + " update delTerm=" + delTerm + " docID=" + docState.docID + " seg=" + segmentInfo.name);
}
int docCount = 0;
try {
for(Iterable<? extends IndexableField> doc : docs) {
docState.doc = doc;
docState.docID = numDocsInRAM;
docCount++;
boolean success = false;
try {
consumer.processDocument(fieldInfos);
success = true;
} finally {
if (!success) {
// An exc is being thrown...
if (!aborting) {
// One of the documents hit a non-aborting
// exception (eg something happened during
// analysis). We now go and mark any docs
// from this batch that we had already indexed
// as deleted:
int docID = docState.docID;
final int endDocID = docID - docCount;
while (docID > endDocID) {
deleteDocID(docID);
docID--;
}
// Incr here because finishDocument will not
// be called (because an exc is being thrown):
numDocsInRAM++;
} else {
abort();
}
}
}
success = false;
try {
consumer.finishDocument();
success = true;
} finally {
if (!success) {
abort();
}
}
finishDocument(null);
}
// Apply delTerm only after all indexing has
// succeeded, but apply it only to docs prior to when
// this batch started:
if (delTerm != null) {
deleteQueue.add(delTerm, deleteSlice);
assert deleteSlice.isTailItem(delTerm) : "expected the delete term as the tail item";
deleteSlice.apply(pendingDeletes, numDocsInRAM-docCount);
}
} finally {
docState.clear();
}
return docCount;
}
// in lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
private void finishDocument(Term delTerm) throws IOException {
/*
* here we actually finish the document in two steps 1. push the delete into
* the queue and update our slice. 2. increment the DWPT private document
* id.
*
* the updated slice we get from 1. holds all the deletes that have occurred
* since we updated the slice the last time.
*/
if (deleteSlice == null) {
deleteSlice = deleteQueue.newSlice();
if (delTerm != null) {
deleteQueue.add(delTerm, deleteSlice);
deleteSlice.reset();
}
} else {
if (delTerm != null) {
deleteQueue.add(delTerm, deleteSlice);
assert deleteSlice.isTailItem(delTerm) : "expected the delete term as the tail item";
deleteSlice.apply(pendingDeletes, numDocsInRAM);
} else if (deleteQueue.updateSlice(deleteSlice)) {
deleteSlice.apply(pendingDeletes, numDocsInRAM);
}
}
++numDocsInRAM;
}
// in lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
private void doAfterFlush() throws IOException {
segmentInfo = null;
consumer.doAfterFlush();
directory.getCreatedFiles().clear();
fieldInfos = new FieldInfos.Builder(fieldInfos.globalFieldNumbers);
parent.subtractFlushedNumDocs(numDocsInRAM);
numDocsInRAM = 0;
}
// in lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
FlushedSegment flush() throws IOException {
assert numDocsInRAM > 0;
assert deleteSlice == null : "all deletes must be applied in prepareFlush";
segmentInfo.setDocCount(numDocsInRAM);
flushState = new SegmentWriteState(infoStream, directory, segmentInfo, fieldInfos.finish(),
writer.getConfig().getTermIndexInterval(),
pendingDeletes, new IOContext(new FlushInfo(numDocsInRAM, bytesUsed())));
final double startMBUsed = parent.flushControl.netBytes() / 1024. / 1024.;
// Apply delete-by-docID now (delete-byDocID only
// happens when an exception is hit processing that
// doc, eg if analyzer has some problem w/ the text):
if (pendingDeletes.docIDs.size() > 0) {
flushState.liveDocs = codec.liveDocsFormat().newLiveDocs(numDocsInRAM);
for(int delDocID : pendingDeletes.docIDs) {
flushState.liveDocs.clear(delDocID);
}
flushState.delCountOnFlush = pendingDeletes.docIDs.size();
pendingDeletes.bytesUsed.addAndGet(-pendingDeletes.docIDs.size() * BufferedDeletes.BYTES_PER_DEL_DOCID);
pendingDeletes.docIDs.clear();
}
if (infoStream.isEnabled("DWPT")) {
infoStream.message("DWPT", "flush postings as segment " + flushState.segmentInfo.name + " numDocs=" + numDocsInRAM);
}
if (aborting) {
if (infoStream.isEnabled("DWPT")) {
infoStream.message("DWPT", "flush: skip because aborting is set");
}
return null;
}
boolean success = false;
try {
consumer.flush(flushState);
pendingDeletes.terms.clear();
segmentInfo.setFiles(new HashSet<String>(directory.getCreatedFiles()));
final SegmentInfoPerCommit segmentInfoPerCommit = new SegmentInfoPerCommit(segmentInfo, 0, -1L);
if (infoStream.isEnabled("DWPT")) {
infoStream.message("DWPT", "new segment has " + (flushState.liveDocs == null ? 0 : (flushState.segmentInfo.getDocCount() - flushState.delCountOnFlush)) + " deleted docs");
infoStream.message("DWPT", "new segment has " +
(flushState.fieldInfos.hasVectors() ? "vectors" : "no vectors") + "; " +
(flushState.fieldInfos.hasNorms() ? "norms" : "no norms") + "; " +
(flushState.fieldInfos.hasDocValues() ? "docValues" : "no docValues") + "; " +
(flushState.fieldInfos.hasProx() ? "prox" : "no prox") + "; " +
(flushState.fieldInfos.hasFreq() ? "freqs" : "no freqs"));
infoStream.message("DWPT", "flushedFiles=" + segmentInfoPerCommit.files());
infoStream.message("DWPT", "flushed codec=" + codec);
}
flushedDocCount += flushState.segmentInfo.getDocCount();
final BufferedDeletes segmentDeletes;
if (pendingDeletes.queries.isEmpty()) {
pendingDeletes.clear();
segmentDeletes = null;
} else {
segmentDeletes = pendingDeletes;
pendingDeletes = new BufferedDeletes();
}
if (infoStream.isEnabled("DWPT")) {
final double newSegmentSize = segmentInfo.sizeInBytes()/1024./1024.;
infoStream.message("DWPT", "flushed: segment=" + segmentInfo.name +
" ramUsed=" + nf.format(startMBUsed) + " MB" +
" newFlushedSize(includes docstores)=" + nf.format(newSegmentSize) + " MB" +
" docs/MB=" + nf.format(flushedDocCount / newSegmentSize));
}
assert segmentInfo != null;
FlushedSegment fs = new FlushedSegment(segmentInfoPerCommit, flushState.fieldInfos,
segmentDeletes, flushState.liveDocs, flushState.delCountOnFlush);
doAfterFlush();
success = true;
return fs;
} finally {
if (!success) {
if (segmentInfo != null) {
synchronized(parent.indexWriter) {
parent.indexWriter.deleter.refresh(segmentInfo.name);
}
}
abort();
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/DocumentsWriterFlushQueue.java
void addDeletesAndPurge(DocumentsWriter writer,
DocumentsWriterDeleteQueue deleteQueue) throws IOException {
synchronized (this) {
incTickets();// first inc the ticket count - freeze opens
// a window for #anyChanges to fail
boolean success = false;
try {
queue
.add(new GlobalDeletesTicket(deleteQueue.freezeGlobalBuffer(null)));
success = true;
} finally {
if (!success) {
decTickets();
}
}
}
// don't hold the lock on the FlushQueue when forcing the purge - this blocks and deadlocks
// if we hold the lock.
forcePurge(writer);
}
// in lucene/core/src/java/org/apache/lucene/index/DocumentsWriterFlushQueue.java
private void innerPurge(DocumentsWriter writer) throws IOException {
assert purgeLock.isHeldByCurrentThread();
while (true) {
final FlushTicket head;
final boolean canPublish;
synchronized (this) {
head = queue.peek();
canPublish = head != null && head.canPublish(); // do this synced
}
if (canPublish) {
try {
/*
* if we bock on publish -> lock IW -> lock BufferedDeletes we don't block
* concurrent segment flushes just because they want to append to the queue.
* the downside is that we need to force a purge on fullFlush since ther could
* be a ticket still in the queue.
*/
head.publish(writer);
} finally {
synchronized (this) {
// finally remove the publised ticket from the queue
final FlushTicket poll = queue.poll();
ticketCount.decrementAndGet();
assert poll == head;
}
}
} else {
break;
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/DocumentsWriterFlushQueue.java
void forcePurge(DocumentsWriter writer) throws IOException {
assert !Thread.holdsLock(this);
purgeLock.lock();
try {
innerPurge(writer);
} finally {
purgeLock.unlock();
}
}
// in lucene/core/src/java/org/apache/lucene/index/DocumentsWriterFlushQueue.java
void tryPurge(DocumentsWriter writer) throws IOException {
assert !Thread.holdsLock(this);
if (purgeLock.tryLock()) {
try {
innerPurge(writer);
} finally {
purgeLock.unlock();
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/DocumentsWriterFlushQueue.java
protected void publish(DocumentsWriter writer) throws IOException {
assert !published : "ticket was already publised - can not publish twice";
published = true;
// its a global ticket - no segment to publish
writer.finishFlush(null, frozenDeletes);
}
// in lucene/core/src/java/org/apache/lucene/index/DocumentsWriterFlushQueue.java
protected void publish(DocumentsWriter writer) throws IOException {
assert !published : "ticket was already publised - can not publish twice";
published = true;
writer.finishFlush(segment, frozenDeletes);
}
// in lucene/core/src/java/org/apache/lucene/index/MergePolicy.java
public long totalBytesSize() throws IOException {
long total = 0;
for (SegmentInfoPerCommit info : segments) {
total += info.info.sizeInBytes();
}
return total;
}
// in lucene/core/src/java/org/apache/lucene/index/MergePolicy.java
public int totalNumDocs() throws IOException {
int total = 0;
for (SegmentInfoPerCommit info : segments) {
total += info.info.getDocCount();
}
return total;
}
// in lucene/core/src/java/org/apache/lucene/index/MultiDocsEnum.java
MultiDocsEnum reset(final EnumWithSlice[] subs, final int numSubs) throws IOException {
this.numSubs = numSubs;
this.subs = new EnumWithSlice[subs.length];
for(int i=0;i<subs.length;i++) {
this.subs[i] = new EnumWithSlice();
this.subs[i].docsEnum = subs[i].docsEnum;
this.subs[i].slice = subs[i].slice;
}
upto = -1;
current = null;
return this;
}
// in lucene/core/src/java/org/apache/lucene/index/MultiDocsEnum.java
Override
public int freq() throws IOException {
return current.freq();
}
// in lucene/core/src/java/org/apache/lucene/index/MultiDocsEnum.java
Override
public int advance(int target) throws IOException {
while(true) {
if (current != null) {
final int doc = current.advance(target-currentBase);
if (doc == NO_MORE_DOCS) {
current = null;
} else {
return this.doc = doc + currentBase;
}
} else if (upto == numSubs-1) {
return this.doc = NO_MORE_DOCS;
} else {
upto++;
current = subs[upto].docsEnum;
currentBase = subs[upto].slice.start;
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/MultiDocsEnum.java
Override
public int nextDoc() throws IOException {
while(true) {
if (current == null) {
if (upto == numSubs-1) {
return this.doc = NO_MORE_DOCS;
} else {
upto++;
current = subs[upto].docsEnum;
currentBase = subs[upto].slice.start;
}
}
final int doc = current.nextDoc();
if (doc != NO_MORE_DOCS) {
return this.doc = currentBase + doc;
} else {
current = null;
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/NormsConsumer.java
Override
public void flush(Map<String,InvertedDocEndConsumerPerField> fieldsToFlush, SegmentWriteState state) throws IOException {
boolean success = false;
boolean anythingFlushed = false;
try {
if (state.fieldInfos.hasNorms()) {
for (FieldInfo fi : state.fieldInfos) {
final NormsConsumerPerField toWrite = (NormsConsumerPerField) fieldsToFlush.get(fi.name);
// we must check the final value of omitNorms for the fieldinfo, it could have
// changed for this field since the first time we added it.
if (!fi.omitsNorms()) {
if (toWrite != null && toWrite.initialized()) {
anythingFlushed = true;
final Type type = toWrite.flush(state.segmentInfo.getDocCount());
assert fi.getNormType() == type;
} else if (fi.isIndexed()) {
anythingFlushed = true;
assert fi.getNormType() == null: "got " + fi.getNormType() + "; field=" + fi.name;
}
}
}
}
success = true;
if (!anythingFlushed && consumer != null) {
consumer.abort();
}
} finally {
if (success) {
IOUtils.close(consumer);
} else {
IOUtils.closeWhileHandlingException(consumer);
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/NormsConsumer.java
Override
void finishDocument() throws IOException {}
// in lucene/core/src/java/org/apache/lucene/index/NormsConsumer.java
Override
void startDocument() throws IOException {}
// in lucene/core/src/java/org/apache/lucene/index/NormsConsumer.java
DocValuesConsumer newConsumer(PerDocWriteState perDocWriteState,
FieldInfo fieldInfo, Type type) throws IOException {
if (consumer == null) {
consumer = normsFormat.docsConsumer(perDocWriteState);
}
DocValuesConsumer addValuesField = consumer.addValuesField(type, fieldInfo);
return addValuesField;
}
// in lucene/core/src/java/org/apache/lucene/index/NoMergePolicy.java
Override
public MergeSpecification findMerges(SegmentInfos segmentInfos)
throws CorruptIndexException, IOException { return null; }
// in lucene/core/src/java/org/apache/lucene/index/NoMergePolicy.java
Override
public MergeSpecification findForcedMerges(SegmentInfos segmentInfos,
int maxSegmentCount, Map<SegmentInfoPerCommit,Boolean> segmentsToMerge)
throws CorruptIndexException, IOException { return null; }
// in lucene/core/src/java/org/apache/lucene/index/NoMergePolicy.java
Override
public MergeSpecification findForcedDeletesMerges(SegmentInfos segmentInfos)
throws CorruptIndexException, IOException { return null; }
// in lucene/core/src/java/org/apache/lucene/index/DocValues.java
public Source getSource() throws IOException {
return cache.load(this);
}
// in lucene/core/src/java/org/apache/lucene/index/DocValues.java
public void close() throws IOException {
cache.close(this);
}
// in lucene/core/src/java/org/apache/lucene/index/DocValues.java
public synchronized Source load(DocValues values) throws IOException {
if (ref == null) {
ref = values.load();
}
return ref;
}
// in lucene/core/src/java/org/apache/lucene/index/IndexUpgrader.java
public void upgrade() throws IOException {
if (!DirectoryReader.indexExists(dir)) {
throw new IndexNotFoundException(dir.toString());
}
if (!deletePriorCommits) {
final Collection<IndexCommit> commits = DirectoryReader.listCommits(dir);
if (commits.size() > 1) {
throw new IllegalArgumentException("This tool was invoked to not delete prior commit points, but the following commits were found: " + commits);
}
}
final IndexWriterConfig c = iwc.clone();
c.setMergePolicy(new UpgradeIndexMergePolicy(c.getMergePolicy()));
c.setIndexDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy());
final IndexWriter w = new IndexWriter(dir, c);
try {
InfoStream infoStream = c.getInfoStream();
if (infoStream.isEnabled("IndexUpgrader")) {
infoStream.message("IndexUpgrader", "Upgrading all pre-" + Constants.LUCENE_MAIN_VERSION + " segments of index directory '" + dir + "' to version " + Constants.LUCENE_MAIN_VERSION + "...");
}
w.forceMerge(1);
if (infoStream.isEnabled("IndexUpgrader")) {
infoStream.message("IndexUpgrader", "All segments upgraded to version " + Constants.LUCENE_MAIN_VERSION);
}
} finally {
w.close();
}
}
// in lucene/core/src/java/org/apache/lucene/index/NormsConsumerPerField.java
Override
void finish() throws IOException {
if (fieldInfo.isIndexed() && !fieldInfo.omitsNorms()) {
similarity.computeNorm(fieldState, norm);
if (norm.type() != null) {
IndexableField field = norm.field();
// some similarity might not compute any norms
DocValuesConsumer consumer = getConsumer(norm.type());
consumer.add(docState.docID, field);
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/NormsConsumerPerField.java
Type flush(int docCount) throws IOException {
if (!initialized()) {
return null; // null type - not omitted but not written
}
consumer.finish(docCount);
return initType;
}
// in lucene/core/src/java/org/apache/lucene/index/NormsConsumerPerField.java
private DocValuesConsumer getConsumer(Type type) throws IOException {
if (consumer == null) {
assert fieldInfo.getNormType() == null || fieldInfo.getNormType() == type;
fieldInfo.setNormValueType(type);
consumer = parent.newConsumer(docState.docWriter.newPerDocWriteState(""), fieldInfo, type);
this.initType = type;
}
if (initType != type) {
throw new IllegalArgumentException("NormTypes for field: " + fieldInfo.name + " doesn't match " + initType + " != " + type);
}
return consumer;
}
// in lucene/core/src/java/org/apache/lucene/index/DocInverter.java
Override
void flush(Map<String, DocFieldConsumerPerField> fieldsToFlush, SegmentWriteState state) throws IOException {
Map<String, InvertedDocConsumerPerField> childFieldsToFlush = new HashMap<String, InvertedDocConsumerPerField>();
Map<String, InvertedDocEndConsumerPerField> endChildFieldsToFlush = new HashMap<String, InvertedDocEndConsumerPerField>();
for (Map.Entry<String, DocFieldConsumerPerField> fieldToFlush : fieldsToFlush.entrySet()) {
DocInverterPerField perField = (DocInverterPerField) fieldToFlush.getValue();
childFieldsToFlush.put(fieldToFlush.getKey(), perField.consumer);
endChildFieldsToFlush.put(fieldToFlush.getKey(), perField.endConsumer);
}
consumer.flush(childFieldsToFlush, state);
endConsumer.flush(endChildFieldsToFlush, state);
}
// in lucene/core/src/java/org/apache/lucene/index/DocInverter.java
Override
public void startDocument() throws IOException {
consumer.startDocument();
endConsumer.startDocument();
}
// in lucene/core/src/java/org/apache/lucene/index/DocInverter.java
Override
public void finishDocument() throws IOException {
// TODO: allow endConsumer.finishDocument to also return
// a DocWriter
endConsumer.finishDocument();
consumer.finishDocument();
}
// in lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
public Status checkIndex() throws IOException {
return checkIndex(null);
}
// in lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
public Status checkIndex(List<String> onlySegments) throws IOException {
NumberFormat nf = NumberFormat.getInstance();
SegmentInfos sis = new SegmentInfos();
Status result = new Status();
result.dir = dir;
try {
sis.read(dir);
} catch (Throwable t) {
msg("ERROR: could not read any segments file in directory");
result.missingSegments = true;
if (infoStream != null)
t.printStackTrace(infoStream);
return result;
}
// find the oldest and newest segment versions
String oldest = Integer.toString(Integer.MAX_VALUE), newest = Integer.toString(Integer.MIN_VALUE);
String oldSegs = null;
boolean foundNonNullVersion = false;
Comparator<String> versionComparator = StringHelper.getVersionComparator();
for (SegmentInfoPerCommit si : sis) {
String version = si.info.getVersion();
if (version == null) {
// pre-3.1 segment
oldSegs = "pre-3.1";
} else {
foundNonNullVersion = true;
if (versionComparator.compare(version, oldest) < 0) {
oldest = version;
}
if (versionComparator.compare(version, newest) > 0) {
newest = version;
}
}
}
final int numSegments = sis.size();
final String segmentsFileName = sis.getSegmentsFileName();
// note: we only read the format byte (required preamble) here!
IndexInput input = null;
try {
input = dir.openInput(segmentsFileName, IOContext.DEFAULT);
} catch (Throwable t) {
msg("ERROR: could not open segments file in directory");
if (infoStream != null)
t.printStackTrace(infoStream);
result.cantOpenSegments = true;
return result;
}
int format = 0;
try {
format = input.readInt();
} catch (Throwable t) {
msg("ERROR: could not read segment file version in directory");
if (infoStream != null)
t.printStackTrace(infoStream);
result.missingSegmentVersion = true;
return result;
} finally {
if (input != null)
input.close();
}
String sFormat = "";
boolean skip = false;
result.segmentsFileName = segmentsFileName;
result.numSegments = numSegments;
result.userData = sis.getUserData();
String userDataString;
if (sis.getUserData().size() > 0) {
userDataString = " userData=" + sis.getUserData();
} else {
userDataString = "";
}
String versionString = null;
if (oldSegs != null) {
if (foundNonNullVersion) {
versionString = "versions=[" + oldSegs + " .. " + newest + "]";
} else {
versionString = "version=" + oldSegs;
}
} else {
versionString = oldest.equals(newest) ? ( "version=" + oldest ) : ("versions=[" + oldest + " .. " + newest + "]");
}
msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments
+ " " + versionString + " format=" + sFormat + userDataString);
if (onlySegments != null) {
result.partial = true;
if (infoStream != null)
infoStream.print("\nChecking only these segments:");
for (String s : onlySegments) {
if (infoStream != null)
infoStream.print(" " + s);
}
result.segmentsChecked.addAll(onlySegments);
msg(":");
}
if (skip) {
msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
result.toolOutOfDate = true;
return result;
}
result.newSegments = sis.clone();
result.newSegments.clear();
result.maxSegmentName = -1;
for(int i=0;i<numSegments;i++) {
final SegmentInfoPerCommit info = sis.info(i);
int segmentName = Integer.parseInt(info.info.name.substring(1), Character.MAX_RADIX);
if (segmentName > result.maxSegmentName) {
result.maxSegmentName = segmentName;
}
if (onlySegments != null && !onlySegments.contains(info.info.name)) {
continue;
}
Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus();
result.segmentInfos.add(segInfoStat);
msg(" " + (1+i) + " of " + numSegments + ": name=" + info.info.name + " docCount=" + info.info.getDocCount());
segInfoStat.name = info.info.name;
segInfoStat.docCount = info.info.getDocCount();
int toLoseDocCount = info.info.getDocCount();
SegmentReader reader = null;
try {
final Codec codec = info.info.getCodec();
msg(" codec=" + codec);
segInfoStat.codec = codec;
msg(" compound=" + info.info.getUseCompoundFile());
segInfoStat.compound = info.info.getUseCompoundFile();
msg(" numFiles=" + info.files().size());
segInfoStat.numFiles = info.files().size();
segInfoStat.sizeMB = info.sizeInBytes()/(1024.*1024.);
msg(" size (MB)=" + nf.format(segInfoStat.sizeMB));
Map<String,String> diagnostics = info.info.getDiagnostics();
segInfoStat.diagnostics = diagnostics;
if (diagnostics.size() > 0) {
msg(" diagnostics = " + diagnostics);
}
// TODO: we could append the info attributes() to the msg?
if (info.hasDeletions()) {
msg(" no deletions");
segInfoStat.hasDeletions = false;
}
else{
msg(" has deletions [delGen=" + info.getDelGen() + "]");
segInfoStat.hasDeletions = true;
segInfoStat.deletionsGen = info.getDelGen();
}
if (infoStream != null)
infoStream.print(" test: open reader.........");
reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, IOContext.DEFAULT);
segInfoStat.openReaderPassed = true;
final int numDocs = reader.numDocs();
toLoseDocCount = numDocs;
if (reader.hasDeletions()) {
if (reader.numDocs() != info.info.getDocCount() - info.getDelCount()) {
throw new RuntimeException("delete count mismatch: info=" + (info.info.getDocCount() - info.getDelCount()) + " vs reader=" + reader.numDocs());
}
if ((info.info.getDocCount()-reader.numDocs()) > reader.maxDoc()) {
throw new RuntimeException("too many deleted docs: maxDoc()=" + reader.maxDoc() + " vs del count=" + (info.info.getDocCount()-reader.numDocs()));
}
if (info.info.getDocCount() - numDocs != info.getDelCount()) {
throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs reader=" + (info.info.getDocCount() - numDocs));
}
Bits liveDocs = reader.getLiveDocs();
if (liveDocs == null) {
throw new RuntimeException("segment should have deletions, but liveDocs is null");
} else {
int numLive = 0;
for (int j = 0; j < liveDocs.length(); j++) {
if (liveDocs.get(j)) {
numLive++;
}
}
if (numLive != numDocs) {
throw new RuntimeException("liveDocs count mismatch: info=" + numDocs + ", vs bits=" + numLive);
}
}
segInfoStat.numDeleted = info.info.getDocCount() - numDocs;
msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]");
} else {
if (info.getDelCount() != 0) {
throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs reader=" + (info.info.getDocCount() - numDocs));
}
Bits liveDocs = reader.getLiveDocs();
if (liveDocs != null) {
// its ok for it to be non-null here, as long as none are set right?
for (int j = 0; j < liveDocs.length(); j++) {
if (!liveDocs.get(j)) {
throw new RuntimeException("liveDocs mismatch: info says no deletions but doc " + j + " is deleted.");
}
}
}
msg("OK");
}
if (reader.maxDoc() != info.info.getDocCount()) {
throw new RuntimeException("SegmentReader.maxDoc() " + reader.maxDoc() + " != SegmentInfos.docCount " + info.info.getDocCount());
}
// Test getFieldInfos()
if (infoStream != null) {
infoStream.print(" test: fields..............");
}
FieldInfos fieldInfos = reader.getFieldInfos();
msg("OK [" + fieldInfos.size() + " fields]");
segInfoStat.numFields = fieldInfos.size();
// Test Field Norms
segInfoStat.fieldNormStatus = testFieldNorms(fieldInfos, reader);
// Test the Term Index
segInfoStat.termIndexStatus = testPostings(fieldInfos, reader);
// Test Stored Fields
segInfoStat.storedFieldStatus = testStoredFields(info, reader, nf);
// Test Term Vectors
segInfoStat.termVectorStatus = testTermVectors(fieldInfos, info, reader, nf);
segInfoStat.docValuesStatus = testDocValues(info, fieldInfos, reader);
// Rethrow the first exception we encountered
// This will cause stats for failed segments to be incremented properly
if (segInfoStat.fieldNormStatus.error != null) {
throw new RuntimeException("Field Norm test failed");
} else if (segInfoStat.termIndexStatus.error != null) {
throw new RuntimeException("Term Index test failed");
} else if (segInfoStat.storedFieldStatus.error != null) {
throw new RuntimeException("Stored Field test failed");
} else if (segInfoStat.termVectorStatus.error != null) {
throw new RuntimeException("Term Vector test failed");
} else if (segInfoStat.docValuesStatus.error != null) {
throw new RuntimeException("DocValues test failed");
}
msg("");
} catch (Throwable t) {
msg("FAILED");
String comment;
comment = "fixIndex() would remove reference to this segment";
msg(" WARNING: " + comment + "; full exception:");
if (infoStream != null)
t.printStackTrace(infoStream);
msg("");
result.totLoseDocCount += toLoseDocCount;
result.numBadSegments++;
continue;
} finally {
if (reader != null)
reader.close();
}
// Keeper
result.newSegments.add(info.clone());
}
if (0 == result.numBadSegments) {
result.clean = true;
} else
msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected");
if ( ! (result.validCounter = (result.maxSegmentName < sis.counter))) {
result.clean = false;
result.newSegments.counter = result.maxSegmentName + 1;
msg("ERROR: Next segment name counter " + sis.counter + " is not greater than max segment name " + result.maxSegmentName);
}
if (result.clean) {
msg("No problems were detected with this index.\n");
}
return result;
}
// in lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
private Status.TermIndexStatus checkFields(Fields fields, Bits liveDocs, int maxDoc, FieldInfos fieldInfos, IndexSearcher searcher, boolean doPrint) throws IOException {
// TODO: we should probably return our own stats thing...?!
final Status.TermIndexStatus status = new Status.TermIndexStatus();
int computedFieldCount = 0;
if (fields == null) {
msg("OK [no fields/terms]");
return status;
}
DocsEnum docs = null;
DocsEnum docsAndFreqs = null;
DocsAndPositionsEnum postings = null;
String lastField = null;
final FieldsEnum fieldsEnum = fields.iterator();
while(true) {
final String field = fieldsEnum.next();
if (field == null) {
break;
}
// MultiFieldsEnum relies upon this order...
if (lastField != null && field.compareTo(lastField) <= 0) {
throw new RuntimeException("fields out of order: lastField=" + lastField + " field=" + field);
}
lastField = field;
// check that the field is in fieldinfos, and is indexed.
// TODO: add a separate test to check this for different reader impls
FieldInfo fi = fieldInfos.fieldInfo(field);
if (fi == null) {
throw new RuntimeException("fieldsEnum inconsistent with fieldInfos, no fieldInfos for: " + field);
}
if (!fi.isIndexed()) {
throw new RuntimeException("fieldsEnum inconsistent with fieldInfos, isIndexed == false for: " + field);
}
// TODO: really the codec should not return a field
// from FieldsEnum if it has no Terms... but we do
// this today:
// assert fields.terms(field) != null;
computedFieldCount++;
final Terms terms = fieldsEnum.terms();
if (terms == null) {
continue;
}
final TermsEnum termsEnum = terms.iterator(null);
boolean hasOrd = true;
final long termCountStart = status.termCount;
BytesRef lastTerm = null;
Comparator<BytesRef> termComp = terms.getComparator();
long sumTotalTermFreq = 0;
long sumDocFreq = 0;
FixedBitSet visitedDocs = new FixedBitSet(maxDoc);
while(true) {
final BytesRef term = termsEnum.next();
if (term == null) {
break;
}
// make sure terms arrive in order according to
// the comp
if (lastTerm == null) {
lastTerm = BytesRef.deepCopyOf(term);
} else {
if (termComp.compare(lastTerm, term) >= 0) {
throw new RuntimeException("terms out of order: lastTerm=" + lastTerm + " term=" + term);
}
lastTerm.copyBytes(term);
}
final int docFreq = termsEnum.docFreq();
if (docFreq <= 0) {
throw new RuntimeException("docfreq: " + docFreq + " is out of bounds");
}
status.totFreq += docFreq;
sumDocFreq += docFreq;
docs = termsEnum.docs(liveDocs, docs, false);
docsAndFreqs = termsEnum.docs(liveDocs, docsAndFreqs, true);
postings = termsEnum.docsAndPositions(liveDocs, postings, false);
if (hasOrd) {
long ord = -1;
try {
ord = termsEnum.ord();
} catch (UnsupportedOperationException uoe) {
hasOrd = false;
}
if (hasOrd) {
final long ordExpected = status.termCount - termCountStart;
if (ord != ordExpected) {
throw new RuntimeException("ord mismatch: TermsEnum has ord=" + ord + " vs actual=" + ordExpected);
}
}
}
status.termCount++;
final DocsEnum docs2;
final DocsEnum docsAndFreqs2;
final boolean hasPositions;
final boolean hasFreqs;
if (postings != null) {
docs2 = postings;
docsAndFreqs2 = postings;
hasPositions = true;
hasFreqs = true;
} else if (docsAndFreqs != null) {
docs2 = docsAndFreqs;
docsAndFreqs2 = docsAndFreqs;
hasPositions = false;
hasFreqs = true;
} else {
docs2 = docs;
docsAndFreqs2 = null;
hasPositions = false;
hasFreqs = false;
}
int lastDoc = -1;
int docCount = 0;
long totalTermFreq = 0;
while(true) {
final int doc = docs2.nextDoc();
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
visitedDocs.set(doc);
int freq = -1;
if (hasFreqs) {
freq = docsAndFreqs2.freq();
if (freq <= 0) {
throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
}
status.totPos += freq;
totalTermFreq += freq;
}
docCount++;
if (doc <= lastDoc) {
throw new RuntimeException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
}
if (doc >= maxDoc) {
throw new RuntimeException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc);
}
lastDoc = doc;
int lastPos = -1;
if (hasPositions) {
for(int j=0;j<freq;j++) {
final int pos = postings.nextPosition();
// NOTE: pos=-1 is allowed because of ancient bug
// (LUCENE-1542) whereby IndexWriter could
// write pos=-1 when first token's posInc is 0
// (separately: analyzers should not give
// posInc=0 to first token); also, term
// vectors are allowed to return pos=-1 if
// they indexed offset but not positions:
if (pos < -1) {
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
}
if (pos < lastPos) {
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
}
lastPos = pos;
if (postings.hasPayload()) {
postings.getPayload();
}
}
}
}
final long totalTermFreq2 = termsEnum.totalTermFreq();
final boolean hasTotalTermFreq = postings != null && totalTermFreq2 != -1;
// Re-count if there are deleted docs:
if (liveDocs != null) {
if (hasFreqs) {
final DocsEnum docsNoDel = termsEnum.docs(null, docsAndFreqs, true);
docCount = 0;
totalTermFreq = 0;
while(docsNoDel.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
visitedDocs.set(docsNoDel.docID());
docCount++;
totalTermFreq += docsNoDel.freq();
}
} else {
final DocsEnum docsNoDel = termsEnum.docs(null, docs, false);
docCount = 0;
totalTermFreq = -1;
while(docsNoDel.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
visitedDocs.set(docsNoDel.docID());
docCount++;
}
}
}
if (docCount != docFreq) {
throw new RuntimeException("term " + term + " docFreq=" + docFreq + " != tot docs w/o deletions " + docCount);
}
if (hasTotalTermFreq) {
if (totalTermFreq2 <= 0) {
throw new RuntimeException("totalTermFreq: " + totalTermFreq2 + " is out of bounds");
}
sumTotalTermFreq += totalTermFreq;
if (totalTermFreq != totalTermFreq2) {
throw new RuntimeException("term " + term + " totalTermFreq=" + totalTermFreq2 + " != recomputed totalTermFreq=" + totalTermFreq);
}
}
// Test skipping
if (hasPositions) {
for(int idx=0;idx<7;idx++) {
final int skipDocID = (int) (((idx+1)*(long) maxDoc)/8);
postings = termsEnum.docsAndPositions(liveDocs, postings, false);
final int docID = postings.advance(skipDocID);
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
break;
} else {
if (docID < skipDocID) {
throw new RuntimeException("term " + term + ": advance(docID=" + skipDocID + ") returned docID=" + docID);
}
final int freq = postings.freq();
if (freq <= 0) {
throw new RuntimeException("termFreq " + freq + " is out of bounds");
}
int lastPosition = -1;
for(int posUpto=0;posUpto<freq;posUpto++) {
final int pos = postings.nextPosition();
// NOTE: pos=-1 is allowed because of ancient bug
// (LUCENE-1542) whereby IndexWriter could
// write pos=-1 when first token's posInc is 0
// (separately: analyzers should not give
// posInc=0 to first token); also, term
// vectors are allowed to return pos=-1 if
// they indexed offset but not positions:
if (pos < -1) {
throw new RuntimeException("position " + pos + " is out of bounds");
}
if (pos < lastPosition) {
throw new RuntimeException("position " + pos + " is < lastPosition " + lastPosition);
}
lastPosition = pos;
}
final int nextDocID = postings.nextDoc();
if (nextDocID == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
if (nextDocID <= docID) {
throw new RuntimeException("term " + term + ": advance(docID=" + skipDocID + "), then .next() returned docID=" + nextDocID + " vs prev docID=" + docID);
}
}
}
} else {
for(int idx=0;idx<7;idx++) {
final int skipDocID = (int) (((idx+1)*(long) maxDoc)/8);
docs = termsEnum.docs(liveDocs, docs, false);
final int docID = docs.advance(skipDocID);
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
break;
} else {
if (docID < skipDocID) {
throw new RuntimeException("term " + term + ": advance(docID=" + skipDocID + ") returned docID=" + docID);
}
final int nextDocID = docs.nextDoc();
if (nextDocID == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
if (nextDocID <= docID) {
throw new RuntimeException("term " + term + ": advance(docID=" + skipDocID + "), then .next() returned docID=" + nextDocID + " vs prev docID=" + docID);
}
}
}
}
}
final Terms fieldTerms = fields.terms(field);
if (fieldTerms == null) {
// Unusual: the FieldsEnum returned a field but
// the Terms for that field is null; this should
// only happen if it's a ghost field (field with
// no terms, eg there used to be terms but all
// docs got deleted and then merged away):
// make sure TermsEnum is empty:
final Terms fieldTerms2 = fieldsEnum.terms();
if (fieldTerms2 != null && fieldTerms2.iterator(null).next() != null) {
throw new RuntimeException("Fields.terms(field=" + field + ") returned null yet the field appears to have terms");
}
} else {
if (fieldTerms instanceof BlockTreeTermsReader.FieldReader) {
final BlockTreeTermsReader.Stats stats = ((BlockTreeTermsReader.FieldReader) fieldTerms).computeStats();
assert stats != null;
if (status.blockTreeStats == null) {
status.blockTreeStats = new HashMap<String,BlockTreeTermsReader.Stats>();
}
status.blockTreeStats.put(field, stats);
}
if (sumTotalTermFreq != 0) {
final long v = fields.terms(field).getSumTotalTermFreq();
if (v != -1 && sumTotalTermFreq != v) {
throw new RuntimeException("sumTotalTermFreq for field " + field + "=" + v + " != recomputed sumTotalTermFreq=" + sumTotalTermFreq);
}
}
if (sumDocFreq != 0) {
final long v = fields.terms(field).getSumDocFreq();
if (v != -1 && sumDocFreq != v) {
throw new RuntimeException("sumDocFreq for field " + field + "=" + v + " != recomputed sumDocFreq=" + sumDocFreq);
}
}
if (fieldTerms != null) {
final int v = fieldTerms.getDocCount();
if (v != -1 && visitedDocs.cardinality() != v) {
throw new RuntimeException("docCount for field " + field + "=" + v + " != recomputed docCount=" + visitedDocs.cardinality());
}
}
// Test seek to last term:
if (lastTerm != null) {
if (termsEnum.seekCeil(lastTerm) != TermsEnum.SeekStatus.FOUND) {
throw new RuntimeException("seek to last term " + lastTerm + " failed");
}
if (searcher != null) {
searcher.search(new TermQuery(new Term(field, lastTerm)), 1);
}
}
// check unique term count
long termCount = -1;
if (status.termCount-termCountStart > 0) {
termCount = fields.terms(field).size();
if (termCount != -1 && termCount != status.termCount - termCountStart) {
throw new RuntimeException("termCount mismatch " + termCount + " vs " + (status.termCount - termCountStart));
}
}
// Test seeking by ord
if (hasOrd && status.termCount-termCountStart > 0) {
int seekCount = (int) Math.min(10000L, termCount);
if (seekCount > 0) {
BytesRef[] seekTerms = new BytesRef[seekCount];
// Seek by ord
for(int i=seekCount-1;i>=0;i--) {
long ord = i*(termCount/seekCount);
termsEnum.seekExact(ord);
seekTerms[i] = BytesRef.deepCopyOf(termsEnum.term());
}
// Seek by term
long totDocCount = 0;
for(int i=seekCount-1;i>=0;i--) {
if (termsEnum.seekCeil(seekTerms[i]) != TermsEnum.SeekStatus.FOUND) {
throw new RuntimeException("seek to existing term " + seekTerms[i] + " failed");
}
docs = termsEnum.docs(liveDocs, docs, false);
if (docs == null) {
throw new RuntimeException("null DocsEnum from to existing term " + seekTerms[i]);
}
while(docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
totDocCount++;
}
}
// TermQuery
if (searcher != null) {
long totDocCount2 = 0;
for(int i=0;i<seekCount;i++) {
totDocCount2 += searcher.search(new TermQuery(new Term(field, seekTerms[i])), 1).totalHits;
}
if (totDocCount != totDocCount2) {
throw new RuntimeException("search to seek terms produced wrong number of hits: " + totDocCount + " vs " + totDocCount2);
}
}
}
}
}
}
int fieldCount = fields.size();
if (fieldCount != -1) {
if (fieldCount < 0) {
throw new RuntimeException("invalid fieldCount: " + fieldCount);
}
if (fieldCount != computedFieldCount) {
throw new RuntimeException("fieldCount mismatch " + fieldCount + " vs recomputed field count " + computedFieldCount);
}
}
// for most implementations, this is boring (just the sum across all fields)
// but codecs that don't work per-field like preflex actually implement this,
// but don't implement it on Terms, so the check isn't redundant.
long uniqueTermCountAllFields = fields.getUniqueTermCount();
// this means something is seriously screwed, e.g. we are somehow getting enclosed in PFCW!!!!!!
if (uniqueTermCountAllFields == -1) {
throw new RuntimeException("invalid termCount: -1");
}
if (status.termCount != uniqueTermCountAllFields) {
throw new RuntimeException("termCount mismatch " + uniqueTermCountAllFields + " vs " + (status.termCount));
}
if (doPrint) {
msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]");
}
if (verbose && status.blockTreeStats != null && infoStream != null && status.termCount > 0) {
for(Map.Entry<String,BlockTreeTermsReader.Stats> ent : status.blockTreeStats.entrySet()) {
infoStream.println(" field \"" + ent.getKey() + "\":");
infoStream.println(" " + ent.getValue().toString().replace("\n", "\n "));
}
}
return status;
}
// in lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
private void checkDocValues(DocValues docValues, String fieldName, DocValues.Type expectedType, int expectedDocs) throws IOException {
if (docValues == null) {
throw new RuntimeException("field: " + fieldName + " omits docvalues but should have them!");
}
DocValues.Type type = docValues.getType();
if (type != expectedType) {
throw new RuntimeException("field: " + fieldName + " has type: " + type + " but fieldInfos says:" + expectedType);
}
final Source values = docValues.getDirectSource();
int size = docValues.getValueSize();
for (int i = 0; i < expectedDocs; i++) {
switch (type) {
case BYTES_FIXED_SORTED:
case BYTES_VAR_SORTED:
case BYTES_FIXED_DEREF:
case BYTES_FIXED_STRAIGHT:
case BYTES_VAR_DEREF:
case BYTES_VAR_STRAIGHT:
BytesRef bytes = new BytesRef();
values.getBytes(i, bytes);
if (size != -1 && size != bytes.length) {
throw new RuntimeException("field: " + fieldName + " returned wrongly sized bytes, was: " + bytes.length + " should be: " + size);
}
break;
case FLOAT_32:
assert size == 4;
values.getFloat(i);
break;
case FLOAT_64:
assert size == 8;
values.getFloat(i);
break;
case VAR_INTS:
assert size == -1;
values.getInt(i);
break;
case FIXED_INTS_16:
assert size == 2;
values.getInt(i);
break;
case FIXED_INTS_32:
assert size == 4;
values.getInt(i);
break;
case FIXED_INTS_64:
assert size == 8;
values.getInt(i);
break;
case FIXED_INTS_8:
assert size == 1;
values.getInt(i);
break;
default:
throw new IllegalArgumentException("Field: " + fieldName
+ " - no such DocValues type: " + type);
}
}
if (type == DocValues.Type.BYTES_FIXED_SORTED || type == DocValues.Type.BYTES_VAR_SORTED) {
// check sorted bytes
SortedSource sortedValues = values.asSortedSource();
Comparator<BytesRef> comparator = sortedValues.getComparator();
int lastOrd = -1;
BytesRef lastBytes = new BytesRef();
for (int i = 0; i < expectedDocs; i++) {
int ord = sortedValues.ord(i);
if (ord < 0 || ord > expectedDocs) {
throw new RuntimeException("field: " + fieldName + " ord is out of bounds: " + ord);
}
BytesRef bytes = new BytesRef();
sortedValues.getByOrd(ord, bytes);
if (lastOrd != -1) {
int ordComp = Integer.signum(new Integer(ord).compareTo(new Integer(lastOrd)));
int bytesComp = Integer.signum(comparator.compare(bytes, lastBytes));
if (ordComp != bytesComp) {
throw new RuntimeException("field: " + fieldName + " ord comparison is wrong: " + ordComp + " comparator claims: " + bytesComp);
}
}
lastOrd = ord;
lastBytes = bytes;
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
public void fixIndex(Status result, Codec codec) throws IOException {
if (result.partial)
throw new IllegalArgumentException("can only fix an index that was fully checked (this status checked a subset of segments)");
result.newSegments.changed();
result.newSegments.commit(result.dir);
}
// in lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
public static void main(String[] args) throws IOException, InterruptedException {
boolean doFix = false;
boolean doCrossCheckTermVectors = false;
Codec codec = Codec.getDefault(); // only used when fixing
boolean verbose = false;
List<String> onlySegments = new ArrayList<String>();
String indexPath = null;
String dirImpl = null;
int i = 0;
while(i < args.length) {
String arg = args[i];
if ("-fix".equals(arg)) {
doFix = true;
} else if ("-crossCheckTermVectors".equals(arg)) {
doCrossCheckTermVectors = true;
} else if ("-codec".equals(arg)) {
if (i == args.length-1) {
System.out.println("ERROR: missing name for -codec option");
System.exit(1);
}
i++;
codec = Codec.forName(args[i]);
} else if (arg.equals("-verbose")) {
verbose = true;
} else if (arg.equals("-segment")) {
if (i == args.length-1) {
System.out.println("ERROR: missing name for -segment option");
System.exit(1);
}
i++;
onlySegments.add(args[i]);
} else if ("-dir-impl".equals(arg)) {
if (i == args.length - 1) {
System.out.println("ERROR: missing value for -dir-impl option");
System.exit(1);
}
i++;
dirImpl = args[i];
} else {
if (indexPath != null) {
System.out.println("ERROR: unexpected extra argument '" + args[i] + "'");
System.exit(1);
}
indexPath = args[i];
}
i++;
}
if (indexPath == null) {
System.out.println("\nERROR: index path not specified");
System.out.println("\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-crossCheckTermVectors] [-segment X] [-segment Y] [-dir-impl X]\n" +
"\n" +
" -fix: actually write a new segments_N file, removing any problematic segments\n" +
" -crossCheckTermVectors: verifies that term vectors match postings; THIS IS VERY SLOW!\n" +
" -codec X: when fixing, codec to write the new segments_N file with\n" +
" -verbose: print additional details\n" +
" -segment X: only check the specified segments. This can be specified multiple\n" +
" times, to check more than one segment, eg '-segment _2 -segment _a'.\n" +
" You can't use this with the -fix option\n" +
" -dir-impl X: use a specific " + FSDirectory.class.getSimpleName() + " implementation. " +
"If no package is specified the " + FSDirectory.class.getPackage().getName() + " package will be used.\n" +
"**WARNING**: -fix should only be used on an emergency basis as it will cause\n" +
"documents (perhaps many) to be permanently removed from the index. Always make\n" +
"a backup copy of your index before running this! Do not run this tool on an index\n" +
"that is actively being written to. You have been warned!\n" +
"\n" +
"Run without -fix, this tool will open the index, report version information\n" +
"and report any exceptions it hits and what action it would take if -fix were\n" +
"specified. With -fix, this tool will remove any segments that have issues and\n" +
"write a new segments_N file. This means all documents contained in the affected\n" +
"segments will be removed.\n" +
"\n" +
"This tool exits with exit code 1 if the index cannot be opened or has any\n" +
"corruption, else 0.\n");
System.exit(1);
}
if (!assertsOn())
System.out.println("\nNOTE: testing will be more thorough if you run java with '-ea:org.apache.lucene...', so assertions are enabled");
if (onlySegments.size() == 0)
onlySegments = null;
else if (doFix) {
System.out.println("ERROR: cannot specify both -fix and -segment");
System.exit(1);
}
System.out.println("\nOpening index @ " + indexPath + "\n");
Directory dir = null;
try {
if (dirImpl == null) {
dir = FSDirectory.open(new File(indexPath));
} else {
dir = CommandLineUtil.newFSDirectory(dirImpl, new File(indexPath));
}
} catch (Throwable t) {
System.out.println("ERROR: could not open directory \"" + indexPath + "\"; exiting");
t.printStackTrace(System.out);
System.exit(1);
}
CheckIndex checker = new CheckIndex(dir);
checker.setCrossCheckTermVectors(doCrossCheckTermVectors);
checker.setInfoStream(System.out, verbose);
Status result = checker.checkIndex(onlySegments);
if (result.missingSegments) {
System.exit(1);
}
if (!result.clean) {
if (!doFix) {
System.out.println("WARNING: would write new segments file, and " + result.totLoseDocCount + " documents would be lost, if -fix were specified\n");
} else {
System.out.println("WARNING: " + result.totLoseDocCount + " documents will be lost\n");
System.out.println("NOTE: will write new segments file in 5 seconds; this will remove " + result.totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!");
for(int s=0;s<5;s++) {
Thread.sleep(1000);
System.out.println(" " + (5-s) + "...");
}
System.out.println("Writing...");
checker.fixIndex(result, codec);
System.out.println("OK");
System.out.println("Wrote new segments file \"" + result.newSegments.getSegmentsFileName() + "\"");
}
}
System.out.println("");
final int exitCode;
if (result.clean == true)
exitCode = 0;
else
exitCode = 1;
System.exit(exitCode);
}
// in lucene/core/src/java/org/apache/lucene/index/NoDeletionPolicy.java
public void onCommit(List<? extends IndexCommit> commits) throws IOException {}
// in lucene/core/src/java/org/apache/lucene/index/NoDeletionPolicy.java
public void onInit(List<? extends IndexCommit> commits) throws IOException {}
// in lucene/core/src/java/org/apache/lucene/index/MultiFields.java
public static Fields getFields(IndexReader r) throws IOException {
if (r instanceof AtomicReader) {
// already an atomic reader
return ((AtomicReader) r).fields();
}
assert r instanceof CompositeReader;
final IndexReader[] subs = ((CompositeReader) r).getSequentialSubReaders();
if (subs.length == 0) {
// no fields
return null;
} else {
final List<Fields> fields = new ArrayList<Fields>();
final List<ReaderUtil.Slice> slices = new ArrayList<ReaderUtil.Slice>();
new ReaderUtil.Gather(r) {
@Override
protected void add(int base, AtomicReader r) throws IOException {
final Fields f = r.fields();
if (f != null) {
fields.add(f);
slices.add(new ReaderUtil.Slice(base, r.maxDoc(), fields.size()-1));
}
}
}.run();
if (fields.isEmpty()) {
return null;
} else if (fields.size() == 1) {
return fields.get(0);
} else {
return new MultiFields(fields.toArray(Fields.EMPTY_ARRAY),
slices.toArray(ReaderUtil.Slice.EMPTY_ARRAY));
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/MultiFields.java
Override
protected void add(int base, AtomicReader r) throws IOException {
final Fields f = r.fields();
if (f != null) {
fields.add(f);
slices.add(new ReaderUtil.Slice(base, r.maxDoc(), fields.size()-1));
}
}
// in lucene/core/src/java/org/apache/lucene/index/MultiFields.java
public static Bits getLiveDocs(IndexReader r) {
if (r.hasDeletions()) {
final List<Bits> liveDocs = new ArrayList<Bits>();
final List<Integer> starts = new ArrayList<Integer>();
try {
final int maxDoc = new ReaderUtil.Gather(r) {
@Override
protected void add(int base, AtomicReader r) throws IOException {
// record all liveDocs, even if they are null
liveDocs.add(r.getLiveDocs());
starts.add(base);
}
}.run();
starts.add(maxDoc);
} catch (IOException ioe) {
// should not happen
throw new RuntimeException(ioe);
}
assert liveDocs.size() > 0;
if (liveDocs.size() == 1) {
// Only one actual sub reader -- optimize this case
return liveDocs.get(0);
} else {
return new MultiBits(liveDocs, starts, true);
}
} else {
return null;
}
}
// in lucene/core/src/java/org/apache/lucene/index/MultiFields.java
Override
protected void add(int base, AtomicReader r) throws IOException {
// record all liveDocs, even if they are null
liveDocs.add(r.getLiveDocs());
starts.add(base);
}
// in lucene/core/src/java/org/apache/lucene/index/MultiFields.java
public static Terms getTerms(IndexReader r, String field) throws IOException {
final Fields fields = getFields(r);
if (fields == null) {
return null;
} else {
return fields.terms(field);
}
}
// in lucene/core/src/java/org/apache/lucene/index/MultiFields.java
public static DocsEnum getTermDocsEnum(IndexReader r, Bits liveDocs, String field, BytesRef term, boolean needsFreqs) throws IOException {
assert field != null;
assert term != null;
final Terms terms = getTerms(r, field);
if (terms != null) {
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(term, true)) {
return termsEnum.docs(liveDocs, null, needsFreqs);
}
}
return null;
}
// in lucene/core/src/java/org/apache/lucene/index/MultiFields.java
public static DocsAndPositionsEnum getTermPositionsEnum(IndexReader r, Bits liveDocs, String field, BytesRef term, boolean needsOffsets) throws IOException {
assert field != null;
assert term != null;
final Terms terms = getTerms(r, field);
if (terms != null) {
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(term, true)) {
return termsEnum.docsAndPositions(liveDocs, null, needsOffsets);
}
}
return null;
}
// in lucene/core/src/java/org/apache/lucene/index/MultiFields.java
Override
public FieldsEnum iterator() throws IOException {
final List<FieldsEnum> fieldsEnums = new ArrayList<FieldsEnum>();
final List<ReaderUtil.Slice> fieldsSlices = new ArrayList<ReaderUtil.Slice>();
for(int i=0;i<subs.length;i++) {
fieldsEnums.add(subs[i].iterator());
fieldsSlices.add(subSlices[i]);
}
if (fieldsEnums.size() == 0) {
return FieldsEnum.EMPTY;
} else {
return new MultiFieldsEnum(this,
fieldsEnums.toArray(FieldsEnum.EMPTY_ARRAY),
fieldsSlices.toArray(ReaderUtil.Slice.EMPTY_ARRAY));
}
}
// in lucene/core/src/java/org/apache/lucene/index/MultiFields.java
Override
public Terms terms(String field) throws IOException {
Terms result = terms.get(field);
if (result != null)
return result;
// Lazy init: first time this field is requested, we
// create & add to terms:
final List<Terms> subs2 = new ArrayList<Terms>();
final List<ReaderUtil.Slice> slices2 = new ArrayList<ReaderUtil.Slice>();
// Gather all sub-readers that share this field
for(int i=0;i<subs.length;i++) {
final Terms terms = subs[i].terms(field);
if (terms != null) {
subs2.add(terms);
slices2.add(subSlices[i]);
}
}
if (subs2.size() == 0) {
result = null;
// don't cache this case with an unbounded cache, since the number of fields that don't exist
// is unbounded.
} else {
result = new MultiTerms(subs2.toArray(Terms.EMPTY_ARRAY),
slices2.toArray(ReaderUtil.Slice.EMPTY_ARRAY));
terms.put(field, result);
}
return result;
}
// in lucene/core/src/java/org/apache/lucene/index/MultiFields.java
public static long totalTermFreq(IndexReader r, String field, BytesRef text) throws IOException {
final Terms terms = getTerms(r, field);
if (terms != null) {
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(text, true)) {
return termsEnum.totalTermFreq();
}
}
return 0;
}
// in lucene/core/src/java/org/apache/lucene/index/IndexReader.java
public final void decRef() throws IOException {
// only check refcount here (don't call ensureOpen()), so we can
// still close the reader if it was made invalid by a child:
if (refCount.get() <= 0) {
throw new AlreadyClosedException("this IndexReader is closed");
}
final int rc = refCount.decrementAndGet();
if (rc == 0) {
boolean success = false;
try {
doClose();
success = true;
} finally {
if (!success) {
// Put reference back on failure
refCount.incrementAndGet();
}
}
reportCloseToParentReaders();
notifyReaderClosedListeners();
} else if (rc < 0) {
throw new IllegalStateException("too many decRef calls: refCount is " + rc + " after decrement");
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexReader.java
public final Terms getTermVector(int docID, String field)
throws IOException {
Fields vectors = getTermVectors(docID);
if (vectors == null) {
return null;
}
return vectors.terms(field);
}
// in lucene/core/src/java/org/apache/lucene/index/IndexReader.java
public final Document document(int docID) throws CorruptIndexException, IOException {
final DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor();
document(docID, visitor);
return visitor.getDocument();
}
// in lucene/core/src/java/org/apache/lucene/index/IndexReader.java
public final Document document(int docID, Set<String> fieldsToLoad) throws CorruptIndexException, IOException {
final DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(fieldsToLoad);
document(docID, visitor);
return visitor.getDocument();
}
// in lucene/core/src/java/org/apache/lucene/index/IndexReader.java
public final synchronized void close() throws IOException {
if (!closed) {
decRef();
closed = true;
}
}
// in lucene/core/src/java/org/apache/lucene/index/IndexReader.java
public final int docFreq(Term term) throws IOException {
return docFreq(term.field(), term.bytes());
}
// in lucene/core/src/java/org/apache/lucene/index/AtomicReader.java
Override
public final int docFreq(String field, BytesRef term) throws IOException {
final Fields fields = fields();
if (fields == null) {
return 0;
}
final Terms terms = fields.terms(field);
if (terms == null) {
return 0;
}
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(term, true)) {
return termsEnum.docFreq();
} else {
return 0;
}
}
// in lucene/core/src/java/org/apache/lucene/index/AtomicReader.java
public final long totalTermFreq(String field, BytesRef term) throws IOException {
final Fields fields = fields();
if (fields == null) {
return 0;
}
final Terms terms = fields.terms(field);
if (terms == null) {
return 0;
}
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(term, true)) {
return termsEnum.totalTermFreq();
} else {
return 0;
}
}
// in lucene/core/src/java/org/apache/lucene/index/AtomicReader.java
public final Terms terms(String field) throws IOException {
final Fields fields = fields();
if (fields == null) {
return null;
}
return fields.terms(field);
}
// in lucene/core/src/java/org/apache/lucene/index/AtomicReader.java
public final DocsEnum termDocsEnum(Bits liveDocs, String field, BytesRef term, boolean needsFreqs) throws IOException {
assert field != null;
assert term != null;
final Fields fields = fields();
if (fields != null) {
final Terms terms = fields.terms(field);
if (terms != null) {
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(term, true)) {
return termsEnum.docs(liveDocs, null, needsFreqs);
}
}
}
return null;
}
// in lucene/core/src/java/org/apache/lucene/index/AtomicReader.java
public final DocsAndPositionsEnum termPositionsEnum(Bits liveDocs, String field, BytesRef term, boolean needsOffsets) throws IOException {
assert field != null;
assert term != null;
final Fields fields = fields();
if (fields != null) {
final Terms terms = fields.terms(field);
if (terms != null) {
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(term, true)) {
return termsEnum.docsAndPositions(liveDocs, null, needsOffsets);
}
}
}
return null;
}
// in lucene/core/src/java/org/apache/lucene/index/AtomicReader.java
public final long getUniqueTermCount() throws IOException {
final Fields fields = fields();
if (fields == null) {
return 0;
}
return fields.getUniqueTermCount();
}
// in lucene/core/src/java/org/apache/lucene/index/TermsEnum.java
public boolean seekExact(BytesRef text, boolean useCache) throws IOException {
return seekCeil(text, useCache) == SeekStatus.FOUND;
}
// in lucene/core/src/java/org/apache/lucene/index/TermsEnum.java
public final SeekStatus seekCeil(BytesRef text) throws IOException {
return seekCeil(text, true);
}
// in lucene/core/src/java/org/apache/lucene/index/TermsEnum.java
public void seekExact(BytesRef term, TermState state) throws IOException {
if (!seekExact(term, true)) {
throw new IllegalArgumentException("term=" + term + " does not exist");
}
}
// in lucene/core/src/java/org/apache/lucene/index/TermsEnum.java
public TermState termState() throws IOException {
return new TermState() {
@Override
public void copyFrom(TermState other) {
}
};
}
// in lucene/core/src/java/org/apache/lucene/index/TermsEnum.java
Override
public TermState termState() throws IOException {
throw new IllegalStateException("this method should never be called");
}
// in lucene/core/src/java/org/apache/lucene/index/TermsEnum.java
Override
public void seekExact(BytesRef term, TermState state) throws IOException {
throw new IllegalStateException("this method should never be called");
}
// in lucene/core/src/java/org/apache/lucene/index/ParallelCompositeReader.java
private static IndexReader[] prepareSubReaders(CompositeReader[] readers, CompositeReader[] storedFieldsReaders) throws IOException {
if (readers.length == 0) {
if (storedFieldsReaders.length > 0)
throw new IllegalArgumentException("There must be at least one main reader if storedFieldsReaders are used.");
return new IndexReader[0];
} else {
final IndexReader[] firstSubReaders = readers[0].getSequentialSubReaders();
// check compatibility:
final int maxDoc = readers[0].maxDoc();
final int[] childMaxDoc = new int[firstSubReaders.length];
final boolean[] childAtomic = new boolean[firstSubReaders.length];
for (int i = 0; i < firstSubReaders.length; i++) {
childMaxDoc[i] = firstSubReaders[i].maxDoc();
childAtomic[i] = firstSubReaders[i] instanceof AtomicReader;
}
validate(readers, maxDoc, childMaxDoc, childAtomic);
validate(storedFieldsReaders, maxDoc, childMaxDoc, childAtomic);
// hierarchically build the same subreader structure as the first CompositeReader with Parallel*Readers:
final IndexReader[] subReaders = new IndexReader[firstSubReaders.length];
for (int i = 0; i < subReaders.length; i++) {
if (firstSubReaders[i] instanceof AtomicReader) {
final AtomicReader[] atomicSubs = new AtomicReader[readers.length];
for (int j = 0; j < readers.length; j++) {
atomicSubs[j] = (AtomicReader) readers[j].getSequentialSubReaders()[i];
}
final AtomicReader[] storedSubs = new AtomicReader[storedFieldsReaders.length];
for (int j = 0; j < storedFieldsReaders.length; j++) {
storedSubs[j] = (AtomicReader) storedFieldsReaders[j].getSequentialSubReaders()[i];
}
// we simply enable closing of subReaders, to prevent incRefs on subReaders
// -> for synthetic subReaders, close() is never
// called by our doClose()
subReaders[i] = new ParallelAtomicReader(true, atomicSubs, storedSubs);
} else {
assert firstSubReaders[i] instanceof CompositeReader;
final CompositeReader[] compositeSubs = new CompositeReader[readers.length];
for (int j = 0; j < readers.length; j++) {
compositeSubs[j] = (CompositeReader) readers[j].getSequentialSubReaders()[i];
}
final CompositeReader[] storedSubs = new CompositeReader[storedFieldsReaders.length];
for (int j = 0; j < storedFieldsReaders.length; j++) {
storedSubs[j] = (CompositeReader) storedFieldsReaders[j].getSequentialSubReaders()[i];
}
// we simply enable closing of subReaders, to prevent incRefs on subReaders
// -> for synthetic subReaders, close() is never called by our doClose()
subReaders[i] = new ParallelCompositeReader(true, compositeSubs, storedSubs);
}
}
return subReaders;
}
}
// in lucene/core/src/java/org/apache/lucene/index/ParallelCompositeReader.java
Override
protected synchronized void doClose() throws IOException {
IOException ioe = null;
for (final CompositeReader reader : completeReaderSet) {
try {
if (closeSubReaders) {
reader.close();
} else {
reader.decRef();
}
} catch (IOException e) {
if (ioe == null) ioe = e;
}
}
// throw the first exception
if (ioe != null) throw ioe;
}
// in lucene/core/src/java/org/apache/lucene/index/LogDocMergePolicy.java
Override
protected long size(SegmentInfoPerCommit info) throws IOException {
return sizeDocs(info);
}
// in lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java
Override
public void processFields(final IndexableField[] fields,
final int count) throws IOException {
fieldState.reset();
final boolean doInvert = consumer.start(fields, count);
for(int i=0;i<count;i++) {
final IndexableField field = fields[i];
final IndexableFieldType fieldType = field.fieldType();
// TODO FI: this should be "genericized" to querying
// consumer if it wants to see this particular field
// tokenized.
if (fieldType.indexed() && doInvert) {
// if the field omits norms, the boost cannot be indexed.
if (fieldType.omitNorms() && field.boost() != 1.0f) {
throw new UnsupportedOperationException("You cannot set an index-time boost: norms are omitted for field '" + field.name() + "'");
}
if (i > 0) {
fieldState.position += docState.analyzer == null ? 0 : docState.analyzer.getPositionIncrementGap(fieldInfo.name);
}
final TokenStream stream = field.tokenStream(docState.analyzer);
// reset the TokenStream to the first token
stream.reset();
try {
boolean hasMoreTokens = stream.incrementToken();
fieldState.attributeSource = stream;
OffsetAttribute offsetAttribute = fieldState.attributeSource.addAttribute(OffsetAttribute.class);
PositionIncrementAttribute posIncrAttribute = fieldState.attributeSource.addAttribute(PositionIncrementAttribute.class);
consumer.start(field);
for (;;) {
// If we hit an exception in stream.next below
// (which is fairly common, eg if analyzer
// chokes on a given document), then it's
// non-aborting and (above) this one document
// will be marked as deleted, but still
// consume a docID
if (!hasMoreTokens) break;
final int posIncr = posIncrAttribute.getPositionIncrement();
int position = fieldState.position + posIncr;
if (position > 0) {
position--;
} else if (position < 0) {
throw new IllegalArgumentException("position overflow for field '" + field.name() + "'");
}
// position is legal, we can safely place it in fieldState now.
// not sure if anything will use fieldState after non-aborting exc...
fieldState.position = position;
if (posIncr == 0)
fieldState.numOverlap++;
boolean success = false;
try {
// If we hit an exception in here, we abort
// all buffered documents since the last
// flush, on the likelihood that the
// internal state of the consumer is now
// corrupt and should not be flushed to a
// new segment:
consumer.add();
success = true;
} finally {
if (!success) {
docState.docWriter.setAborting();
}
}
fieldState.length++;
fieldState.position++;
hasMoreTokens = stream.incrementToken();
}
// trigger streams to perform end-of-stream operations
stream.end();
fieldState.offset += offsetAttribute.endOffset();
} finally {
stream.close();
}
fieldState.offset += docState.analyzer == null ? 0 : docState.analyzer.getOffsetGap(field);
fieldState.boost *= field.boost();
}
// LUCENE-2387: don't hang onto the field, so GC can
// reclaim
fields[i] = null;
}
consumer.finish();
endConsumer.finish();
}
// in lucene/core/src/java/org/apache/lucene/index/StoredFieldVisitor.java
public void binaryField(FieldInfo fieldInfo, byte[] value, int offset, int length) throws IOException {
}
// in lucene/core/src/java/org/apache/lucene/index/StoredFieldVisitor.java
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
}
// in lucene/core/src/java/org/apache/lucene/index/StoredFieldVisitor.java
public void intField(FieldInfo fieldInfo, int value) throws IOException {
}
// in lucene/core/src/java/org/apache/lucene/index/StoredFieldVisitor.java
public void longField(FieldInfo fieldInfo, long value) throws IOException {
}
// in lucene/core/src/java/org/apache/lucene/index/StoredFieldVisitor.java
public void floatField(FieldInfo fieldInfo, float value) throws IOException {
}
// in lucene/core/src/java/org/apache/lucene/index/StoredFieldVisitor.java
public void doubleField(FieldInfo fieldInfo, double value) throws IOException {
}
// in lucene/core/src/java/org/apache/lucene/index/SegmentInfoPerCommit.java
public long sizeInBytes() throws IOException {
if (sizeInBytes == -1) {
final Collection<String> files = new HashSet<String>();
info.getCodec().liveDocsFormat().files(this, files);
long sum = info.sizeInBytes();
for (final String fileName : files()) {
sum += info.dir.fileLength(fileName);
}
sizeInBytes = sum;
}
return sizeInBytes;
}
// in lucene/core/src/java/org/apache/lucene/index/SegmentInfoPerCommit.java
public Collection<String> files() throws IOException {
Collection<String> files = new HashSet<String>(info.files());
// Must separately add any live docs files:
info.getCodec().liveDocsFormat().files(this, files);
return files;
}
// in lucene/core/src/java/org/apache/lucene/index/AutomatonTermsEnum.java
Override
protected BytesRef nextSeekTerm(final BytesRef term) throws IOException {
//System.out.println("ATE.nextSeekTerm term=" + term);
if (term == null) {
assert seekBytesRef.length == 0;
// return the empty term, as its valid
if (runAutomaton.isAccept(runAutomaton.getInitialState())) {
return seekBytesRef;
}
} else {
seekBytesRef.copyBytes(term);
}
// seek to the next possible string;
if (nextString()) {
return seekBytesRef; // reposition
} else {
return null; // no more possible strings can match
}
}
// in lucene/core/src/java/org/apache/lucene/index/StandardDirectoryReader.java
static DirectoryReader open(final Directory directory, final IndexCommit commit,
final int termInfosIndexDivisor) throws CorruptIndexException, IOException {
return (DirectoryReader) new SegmentInfos.FindSegmentsFile(directory) {
@Override
protected Object doBody(String segmentFileName) throws CorruptIndexException, IOException {
SegmentInfos sis = new SegmentInfos();
sis.read(directory, segmentFileName);
final SegmentReader[] readers = new SegmentReader[sis.size()];
for (int i = sis.size()-1; i >= 0; i--) {
IOException prior = null;
boolean success = false;
try {
readers[i] = new SegmentReader(sis.info(i), termInfosIndexDivisor, IOContext.READ);
success = true;
} catch(IOException ex) {
prior = ex;
} finally {
if (!success)
IOUtils.closeWhileHandlingException(prior, readers);
}
}
return new StandardDirectoryReader(directory, readers, null, sis, termInfosIndexDivisor, false);
}
}.run(commit);
}
// in lucene/core/src/java/org/apache/lucene/index/StandardDirectoryReader.java
Override
protected Object doBody(String segmentFileName) throws CorruptIndexException, IOException {
SegmentInfos sis = new SegmentInfos();
sis.read(directory, segmentFileName);
final SegmentReader[] readers = new SegmentReader[sis.size()];
for (int i = sis.size()-1; i >= 0; i--) {
IOException prior = null;
boolean success = false;
try {
readers[i] = new SegmentReader(sis.info(i), termInfosIndexDivisor, IOContext.READ);
success = true;
} catch(IOException ex) {
prior = ex;
} finally {
if (!success)
IOUtils.closeWhileHandlingException(prior, readers);
}
}
return new StandardDirectoryReader(directory, readers, null, sis, termInfosIndexDivisor, false);
}
// in lucene/core/src/java/org/apache/lucene/index/StandardDirectoryReader.java
static DirectoryReader open(IndexWriter writer, SegmentInfos infos, boolean applyAllDeletes) throws IOException {
// IndexWriter synchronizes externally before calling
// us, which ensures infos will not change; so there's
// no need to process segments in reverse order
final int numSegments = infos.size();
List<SegmentReader> readers = new ArrayList<SegmentReader>();
final Directory dir = writer.getDirectory();
final SegmentInfos segmentInfos = infos.clone();
int infosUpto = 0;
for (int i=0;i<numSegments;i++) {
IOException prior = null;
boolean success = false;
try {
final SegmentInfoPerCommit info = infos.info(i);
assert info.info.dir == dir;
final ReadersAndLiveDocs rld = writer.readerPool.get(info, true);
try {
final SegmentReader reader = rld.getReadOnlyClone(IOContext.READ);
if (reader.numDocs() > 0 || writer.getKeepFullyDeletedSegments()) {
// Steal the ref:
readers.add(reader);
infosUpto++;
} else {
reader.close();
segmentInfos.remove(infosUpto);
}
} finally {
writer.readerPool.release(rld);
}
success = true;
} catch(IOException ex) {
prior = ex;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(prior, readers);
}
}
}
return new StandardDirectoryReader(dir, readers.toArray(new SegmentReader[readers.size()]),
writer, segmentInfos, writer.getConfig().getReaderTermsIndexDivisor(), applyAllDeletes);
}
// in lucene/core/src/java/org/apache/lucene/index/StandardDirectoryReader.java
private static DirectoryReader open(Directory directory, IndexWriter writer, SegmentInfos infos, AtomicReader[] oldReaders,
int termInfosIndexDivisor) throws IOException {
// we put the old SegmentReaders in a map, that allows us
// to lookup a reader using its segment name
final Map<String,Integer> segmentReaders = new HashMap<String,Integer>();
if (oldReaders != null) {
// create a Map SegmentName->SegmentReader
for (int i = 0; i < oldReaders.length; i++) {
segmentReaders.put(((SegmentReader) oldReaders[i]).getSegmentName(), Integer.valueOf(i));
}
}
SegmentReader[] newReaders = new SegmentReader[infos.size()];
// remember which readers are shared between the old and the re-opened
// DirectoryReader - we have to incRef those readers
boolean[] readerShared = new boolean[infos.size()];
for (int i = infos.size() - 1; i>=0; i--) {
// find SegmentReader for this segment
Integer oldReaderIndex = segmentReaders.get(infos.info(i).info.name);
if (oldReaderIndex == null) {
// this is a new segment, no old SegmentReader can be reused
newReaders[i] = null;
} else {
// there is an old reader for this segment - we'll try to reopen it
newReaders[i] = (SegmentReader) oldReaders[oldReaderIndex.intValue()];
}
boolean success = false;
IOException prior = null;
try {
SegmentReader newReader;
if (newReaders[i] == null || infos.info(i).info.getUseCompoundFile() != newReaders[i].getSegmentInfo().info.getUseCompoundFile()) {
// this is a new reader; in case we hit an exception we can close it safely
newReader = new SegmentReader(infos.info(i), termInfosIndexDivisor, IOContext.READ);
readerShared[i] = false;
newReaders[i] = newReader;
} else {
if (newReaders[i].getSegmentInfo().getDelGen() == infos.info(i).getDelGen()) {
// No change; this reader will be shared between
// the old and the new one, so we must incRef
// it:
readerShared[i] = true;
newReaders[i].incRef();
} else {
readerShared[i] = false;
// Steal the ref returned by SegmentReader ctor:
assert infos.info(i).info.dir == newReaders[i].getSegmentInfo().info.dir;
assert infos.info(i).hasDeletions();
newReaders[i] = new SegmentReader(infos.info(i), newReaders[i].core, IOContext.READ);
}
}
success = true;
} catch (IOException ex) {
prior = ex;
} finally {
if (!success) {
for (i++; i < infos.size(); i++) {
if (newReaders[i] != null) {
try {
if (!readerShared[i]) {
// this is a new subReader that is not used by the old one,
// we can close it
newReaders[i].close();
} else {
// this subReader is also used by the old reader, so instead
// closing we must decRef it
newReaders[i].decRef();
}
} catch (IOException ex) {
if (prior == null) prior = ex;
}
}
}
}
// throw the first exception
if (prior != null) throw prior;
}
}
return new StandardDirectoryReader(directory, newReaders, writer, infos, termInfosIndexDivisor, false);
}
// in lucene/core/src/java/org/apache/lucene/index/StandardDirectoryReader.java
Override
protected DirectoryReader doOpenIfChanged() throws CorruptIndexException, IOException {
return doOpenIfChanged(null);
}
// in lucene/core/src/java/org/apache/lucene/index/StandardDirectoryReader.java
Override
protected DirectoryReader doOpenIfChanged(final IndexCommit commit) throws CorruptIndexException, IOException {
ensureOpen();
// If we were obtained by writer.getReader(), re-ask the
// writer to get a new reader.
if (writer != null) {
return doOpenFromWriter(commit);
} else {
return doOpenNoWriter(commit);
}
}
// in lucene/core/src/java/org/apache/lucene/index/StandardDirectoryReader.java
Override
protected DirectoryReader doOpenIfChanged(IndexWriter writer, boolean applyAllDeletes) throws CorruptIndexException, IOException {
ensureOpen();
if (writer == this.writer && applyAllDeletes == this.applyAllDeletes) {
return doOpenFromWriter(null);
} else {
return writer.getReader(applyAllDeletes);
}
}
// in lucene/core/src/java/org/apache/lucene/index/StandardDirectoryReader.java
private DirectoryReader doOpenFromWriter(IndexCommit commit) throws CorruptIndexException, IOException {
if (commit != null) {
throw new IllegalArgumentException("a reader obtained from IndexWriter.getReader() cannot currently accept a commit");
}
if (writer.nrtIsCurrent(segmentInfos)) {
return null;
}
DirectoryReader reader = writer.getReader(applyAllDeletes);
// If in fact no changes took place, return null:
if (reader.getVersion() == segmentInfos.getVersion()) {
reader.decRef();
return null;
}
return reader;
}
// in lucene/core/src/java/org/apache/lucene/index/StandardDirectoryReader.java
private synchronized DirectoryReader doOpenNoWriter(IndexCommit commit) throws CorruptIndexException, IOException {
if (commit == null) {
if (isCurrent()) {
return null;
}
} else {
if (directory != commit.getDirectory()) {
throw new IOException("the specified commit does not match the specified Directory");
}
if (segmentInfos != null && commit.getSegmentsFileName().equals(segmentInfos.getSegmentsFileName())) {
return null;
}
}
return (DirectoryReader) new SegmentInfos.FindSegmentsFile(directory) {
@Override
protected Object doBody(String segmentFileName) throws CorruptIndexException, IOException {
final SegmentInfos infos = new SegmentInfos();
infos.read(directory, segmentFileName);
return doOpenIfChanged(infos, null);
}
}.run(commit);
}
// in lucene/core/src/java/org/apache/lucene/index/StandardDirectoryReader.java
Override
protected Object doBody(String segmentFileName) throws CorruptIndexException, IOException {
final SegmentInfos infos = new SegmentInfos();
infos.read(directory, segmentFileName);
return doOpenIfChanged(infos, null);
}
// in lucene/core/src/java/org/apache/lucene/index/StandardDirectoryReader.java
synchronized DirectoryReader doOpenIfChanged(SegmentInfos infos, IndexWriter writer) throws CorruptIndexException, IOException {
return StandardDirectoryReader.open(directory, writer, infos, subReaders, termInfosIndexDivisor);
}
// in lucene/core/src/java/org/apache/lucene/index/StandardDirectoryReader.java
Override
public boolean isCurrent() throws CorruptIndexException, IOException {
ensureOpen();
if (writer == null || writer.isClosed()) {
// Fully read the segments file: this ensures that it's
// completely written so that if
// IndexWriter.prepareCommit has been called (but not
// yet commit), then the reader will still see itself as
// current:
SegmentInfos sis = new SegmentInfos();
sis.read(directory);
// we loaded SegmentInfos from the directory
return sis.getVersion() == segmentInfos.getVersion();
} else {
return writer.nrtIsCurrent(segmentInfos);
}
}
// in lucene/core/src/java/org/apache/lucene/index/StandardDirectoryReader.java
Override
protected synchronized void doClose() throws IOException {
IOException ioe = null;
for (int i = 0; i < subReaders.length; i++) {
// try to close each reader, even if an exception is thrown
try {
subReaders[i].decRef();
} catch (IOException e) {
if (ioe == null) ioe = e;
}
}
if (writer != null) {
// Since we just closed, writer may now be able to
// delete unused files:
writer.deletePendingFiles();
}
// throw the first exception
if (ioe != null) throw ioe;
}
// in lucene/core/src/java/org/apache/lucene/index/StandardDirectoryReader.java
Override
public IndexCommit getIndexCommit() throws IOException {
ensureOpen();
return new ReaderCommit(segmentInfos, directory);
}
// in lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java
protected final void setInitialSeekTerm(BytesRef term) throws IOException {
this.initialSeekTerm = term;
}
// in lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java
protected BytesRef nextSeekTerm(final BytesRef currentTerm) throws IOException {
final BytesRef t = initialSeekTerm;
initialSeekTerm = null;
return t;
}
// in lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java
Override
public BytesRef term() throws IOException {
return tenum.term();
}
// in lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java
Override
public int docFreq() throws IOException {
return tenum.docFreq();
}
// in lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java
Override
public long totalTermFreq() throws IOException {
return tenum.totalTermFreq();
}
// in lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java
Override
public boolean seekExact(BytesRef term, boolean useCache) throws IOException {
throw new UnsupportedOperationException(getClass().getName()+" does not support seeking");
}
// in lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java
Override
public SeekStatus seekCeil(BytesRef term, boolean useCache) throws IOException {
throw new UnsupportedOperationException(getClass().getName()+" does not support seeking");
}
// in lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java
Override
public void seekExact(long ord) throws IOException {
throw new UnsupportedOperationException(getClass().getName()+" does not support seeking");
}
// in lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java
Override
public long ord() throws IOException {
return tenum.ord();
}
// in lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java
Override
public DocsEnum docs(Bits bits, DocsEnum reuse, boolean needsFreqs) throws IOException {
return tenum.docs(bits, reuse, needsFreqs);
}
// in lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java
Override
public DocsAndPositionsEnum docsAndPositions(Bits bits, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
return tenum.docsAndPositions(bits, reuse, needsOffsets);
}
// in lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java
Override
public void seekExact(BytesRef term, TermState state) throws IOException {
throw new UnsupportedOperationException(getClass().getName()+" does not support seeking");
}
// in lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java
Override
public TermState termState() throws IOException {
assert tenum != null;
return tenum.termState();
}
// in lucene/core/src/java/org/apache/lucene/index/ByteSliceReader.java
public long writeTo(DataOutput out) throws IOException {
long size = 0;
while(true) {
if (limit + bufferOffset == endIndex) {
assert endIndex - bufferOffset >= upto;
out.writeBytes(buffer, upto, limit-upto);
size += limit-upto;
break;
} else {
out.writeBytes(buffer, upto, limit-upto);
size += limit-upto;
nextSlice();
}
}
return size;
}
// in lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java
Override
public String next() throws IOException {
if (keys.hasNext()) {
currentField = keys.next();
} else {
currentField = null;
}
return currentField;
}
// in lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java
Override
public Terms terms() throws IOException {
return fields.terms(currentField);
}
// in lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java
void addField(String fieldName, Terms terms) throws IOException {
fields.put(fieldName, terms);
}
// in lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java
Override
public FieldsEnum iterator() throws IOException {
return new ParallelFieldsEnum(this);
}
// in lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java
Override
public Terms terms(String field) throws IOException {
return fields.get(field);
}
// in lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java
Override
public int size() throws IOException {
return fields.size();
}
// in lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java
Override
public void document(int docID, StoredFieldVisitor visitor) throws CorruptIndexException, IOException {
ensureOpen();
for (final AtomicReader reader: storedFieldsReaders) {
reader.document(docID, visitor);
}
}
// in lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java
Override
public Fields getTermVectors(int docID) throws IOException {
ensureOpen();
ParallelFields fields = null;
for (Map.Entry<String,AtomicReader> ent : tvFieldToReader.entrySet()) {
String fieldName = ent.getKey();
Terms vector = ent.getValue().getTermVector(docID, fieldName);
if (vector != null) {
if (fields == null) {
fields = new ParallelFields();
}
fields.addField(fieldName, vector);
}
}
return fields;
}
// in lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java
Override
protected synchronized void doClose() throws IOException {
IOException ioe = null;
for (AtomicReader reader : completeReaderSet) {
try {
if (closeSubReaders) {
reader.close();
} else {
reader.decRef();
}
} catch (IOException e) {
if (ioe == null) ioe = e;
}
}
// throw the first exception
if (ioe != null) throw ioe;
}
// in lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java
Override
public DocValues docValues(String field) throws IOException {
ensureOpen();
AtomicReader reader = fieldToReader.get(field);
return reader == null ? null : reader.docValues(field);
}
// in lucene/core/src/java/org/apache/lucene/index/ParallelAtomicReader.java
Override
public DocValues normValues(String field) throws IOException {
ensureOpen();
AtomicReader reader = fieldToReader.get(field);
return reader == null ? null : reader.normValues(field);
}
// in lucene/core/src/java/org/apache/lucene/index/SerialMergeScheduler.java
Override
synchronized public void merge(IndexWriter writer)
throws CorruptIndexException, IOException {
while(true) {
MergePolicy.OneMerge merge = writer.getNextMerge();
if (merge == null)
break;
writer.merge(merge);
}
}
// in lucene/core/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java
public static AtomicReader wrap(IndexReader reader) throws IOException {
if (reader instanceof CompositeReader) {
return new SlowCompositeReaderWrapper((CompositeReader) reader);
} else {
assert reader instanceof AtomicReader;
return (AtomicReader) reader;
}
}
// in lucene/core/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java
Override
public Fields fields() throws IOException {
ensureOpen();
return fields;
}
// in lucene/core/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java
Override
public DocValues docValues(String field) throws IOException {
ensureOpen();
return MultiDocValues.getDocValues(in, field);
}
// in lucene/core/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java
Override
public synchronized DocValues normValues(String field) throws IOException {
ensureOpen();
DocValues values = normsCache.get(field);
if (values == null) {
values = MultiDocValues.getNormDocValues(in, field);
normsCache.put(field, values);
}
return values;
}
// in lucene/core/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java
Override
public Fields getTermVectors(int docID)
throws IOException {
ensureOpen();
return in.getTermVectors(docID);
}
// in lucene/core/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java
Override
public void document(int docID, StoredFieldVisitor visitor) throws CorruptIndexException, IOException {
ensureOpen();
in.document(docID, visitor);
}
// in lucene/core/src/java/org/apache/lucene/index/SlowCompositeReaderWrapper.java
Override
protected void doClose() throws IOException {
// TODO: as this is a wrapper, should we really close the delegate?
in.close();
}
// in lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java
Override
public void flush(SegmentWriteState state) throws IOException {
Map<String,DocFieldConsumerPerField> childFields = new HashMap<String,DocFieldConsumerPerField>();
Collection<DocFieldConsumerPerField> fields = fields();
for (DocFieldConsumerPerField f : fields) {
childFields.put(f.getFieldInfo().name, f);
}
fieldsWriter.flush(state);
consumer.flush(childFields, state);
for (DocValuesConsumerAndDocID consumer : docValues.values()) {
consumer.docValuesConsumer.finish(state.segmentInfo.getDocCount());
}
// close perDocConsumer during flush to ensure all files are flushed due to PerCodec CFS
IOUtils.close(perDocConsumer);
// Important to save after asking consumer to flush so
// consumer can alter the FieldInfo* if necessary. EG,
// FreqProxTermsWriter does this with
// FieldInfo.storePayload.
FieldInfosWriter infosWriter = codec.fieldInfosFormat().getFieldInfosWriter();
infosWriter.write(state.directory, state.segmentInfo.name, state.fieldInfos, IOContext.DEFAULT);
}
// in lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java
Override
public void processDocument(FieldInfos.Builder fieldInfos) throws IOException {
consumer.startDocument();
fieldsWriter.startDocument();
fieldCount = 0;
final int thisFieldGen = fieldGen++;
// Absorb any new fields first seen in this document.
// Also absorb any changes to fields we had already
// seen before (eg suddenly turning on norms or
// vectors, etc.):
for(IndexableField field : docState.doc) {
final String fieldName = field.name();
// Make sure we have a PerField allocated
final int hashPos = fieldName.hashCode() & hashMask;
DocFieldProcessorPerField fp = fieldHash[hashPos];
while(fp != null && !fp.fieldInfo.name.equals(fieldName)) {
fp = fp.next;
}
if (fp == null) {
// TODO FI: we need to genericize the "flags" that a
// field holds, and, how these flags are merged; it
// needs to be more "pluggable" such that if I want
// to have a new "thing" my Fields can do, I can
// easily add it
FieldInfo fi = fieldInfos.addOrUpdate(fieldName, field.fieldType());
fp = new DocFieldProcessorPerField(this, fi);
fp.next = fieldHash[hashPos];
fieldHash[hashPos] = fp;
totalFieldCount++;
if (totalFieldCount >= fieldHash.length/2) {
rehash();
}
} else {
fieldInfos.addOrUpdate(fp.fieldInfo.name, field.fieldType());
}
if (thisFieldGen != fp.lastGen) {
// First time we're seeing this field for this doc
fp.fieldCount = 0;
if (fieldCount == fields.length) {
final int newSize = fields.length*2;
DocFieldProcessorPerField newArray[] = new DocFieldProcessorPerField[newSize];
System.arraycopy(fields, 0, newArray, 0, fieldCount);
fields = newArray;
}
fields[fieldCount++] = fp;
fp.lastGen = thisFieldGen;
}
fp.addField(field);
if (field.fieldType().stored()) {
fieldsWriter.addField(field, fp.fieldInfo);
}
final DocValues.Type dvType = field.fieldType().docValueType();
if (dvType != null) {
docValuesConsumer(dvType, docState, fp.fieldInfo).add(docState.docID, field);
}
}
// If we are writing vectors then we must visit
// fields in sorted order so they are written in
// sorted order. TODO: we actually only need to
// sort the subset of fields that have vectors
// enabled; we could save [small amount of] CPU
// here.
ArrayUtil.quickSort(fields, 0, fieldCount, fieldsComp);
for(int i=0;i<fieldCount;i++) {
final DocFieldProcessorPerField perField = fields[i];
perField.consumer.processFields(perField.fields, perField.fieldCount);
}
if (docState.maxTermPrefix != null && docState.infoStream.isEnabled("IW")) {
docState.infoStream.message("IW", "WARNING: document contains at least one immense term (whose UTF8 encoding is longer than the max length " + DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8 + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'");
docState.maxTermPrefix = null;
}
}
// in lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java
Override
void finishDocument() throws IOException {
try {
fieldsWriter.finishDocument();
} finally {
consumer.finishDocument();
}
}
// in lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java
DocValuesConsumer docValuesConsumer(DocValues.Type valueType, DocState docState, FieldInfo fieldInfo)
throws IOException {
DocValuesConsumerAndDocID docValuesConsumerAndDocID = docValues.get(fieldInfo.name);
if (docValuesConsumerAndDocID != null) {
if (docState.docID == docValuesConsumerAndDocID.docID) {
throw new IllegalArgumentException("DocValuesField \"" + fieldInfo.name + "\" appears more than once in this document (only one value is allowed, per field)");
}
assert docValuesConsumerAndDocID.docID < docState.docID;
docValuesConsumerAndDocID.docID = docState.docID;
return docValuesConsumerAndDocID.docValuesConsumer;
}
if (perDocConsumer == null) {
PerDocWriteState perDocWriteState = docState.docWriter.newPerDocWriteState("");
perDocConsumer = docState.docWriter.codec.docValuesFormat().docsConsumer(perDocWriteState);
if (perDocConsumer == null) {
throw new IllegalStateException("codec=" + docState.docWriter.codec + " does not support docValues: from docValuesFormat().docsConsumer(...) returned null; field=" + fieldInfo.name);
}
}
DocValuesConsumer docValuesConsumer = perDocConsumer.addValuesField(valueType, fieldInfo);
assert fieldInfo.getDocValuesType() == null || fieldInfo.getDocValuesType() == valueType;
fieldInfo.setDocValuesType(valueType);
docValuesConsumerAndDocID = new DocValuesConsumerAndDocID(docValuesConsumer);
docValuesConsumerAndDocID.docID = docState.docID;
docValues.put(fieldInfo.name, docValuesConsumerAndDocID);
return docValuesConsumer;
}
// in lucene/core/src/java/org/apache/lucene/index/SortedBytesMergeUtils.java
public static List<SortedSourceSlice> buildSlices(
int[] docBases, MergeState.DocMap[] docMaps,
DocValues[] docValues, MergeContext ctx) throws IOException {
final List<SortedSourceSlice> slices = new ArrayList<SortedSourceSlice>();
for (int i = 0; i < docValues.length; i++) {
final SortedSourceSlice nextSlice;
final Source directSource;
if (docValues[i] != null
&& (directSource = docValues[i].getDirectSource()) != null) {
final SortedSourceSlice slice = new SortedSourceSlice(i, directSource
.asSortedSource(), docBases, ctx.getMergeDocCount(), ctx.docToEntry);
nextSlice = slice;
} else {
nextSlice = new SortedSourceSlice(i, new MissingValueSource(ctx),
docBases, ctx.getMergeDocCount(), ctx.docToEntry);
}
createOrdMapping(docBases, docMaps, nextSlice);
slices.add(nextSlice);
}
return Collections.unmodifiableList(slices);
}
// in lucene/core/src/java/org/apache/lucene/index/SortedBytesMergeUtils.java
public static int mergeRecords(MergeContext ctx, BytesRefConsumer consumer,
List<SortedSourceSlice> slices) throws IOException {
final RecordMerger merger = new RecordMerger(new MergeQueue(slices.size(),
ctx.comp), slices.toArray(new SortedSourceSlice[0]));
long[] offsets = ctx.offsets;
final boolean recordOffsets = offsets != null;
long offset = 0;
BytesRef currentMergedBytes;
merger.pushTop();
while (merger.queue.size() > 0) {
merger.pullTop();
currentMergedBytes = merger.current;
assert ctx.sizePerValues == -1 || ctx.sizePerValues == currentMergedBytes.length : "size: "
+ ctx.sizePerValues + " spare: " + currentMergedBytes.length;
offset += currentMergedBytes.length;
if (recordOffsets) {
if (merger.currentOrd >= offsets.length) {
offsets = ArrayUtil.grow(offsets, merger.currentOrd + 1);
}
offsets[merger.currentOrd] = offset;
}
consumer.consume(currentMergedBytes, merger.currentOrd, offset);
merger.pushTop();
}
ctx.offsets = offsets;
assert offsets == null || offsets[merger.currentOrd - 1] == offset;
return merger.currentOrd;
}
// in lucene/core/src/java/org/apache/lucene/index/SortedBytesMergeUtils.java
Override
public void consume(BytesRef currentMergedBytes, int ord, long offset) throws IOException {
datOut.writeBytes(currentMergedBytes.bytes, currentMergedBytes.offset,
currentMergedBytes.length);
}
// in lucene/core/src/java/org/apache/lucene/index/SortedBytesMergeUtils.java
private void pushTop() throws IOException {
// call next() on each top, and put back into queue
for (int i = 0; i < numTop; i++) {
top[i].current = top[i].next();
if (top[i].current != null) {
queue.add(top[i]);
}
}
currentOrd++;
numTop = 0;
}
// in lucene/core/src/java/org/apache/lucene/index/SortedBytesMergeUtils.java
public void writeOrds(PackedInts.Writer writer) throws IOException {
for (int i = docToOrdStart; i < docToOrdEnd; i++) {
final int mappedOrd = docIDToRelativeOrd[i];
assert mappedOrd < ordMapping.length;
assert ordMapping[mappedOrd] > 0 : "illegal mapping ord maps to an unreferenced value";
writer.add(ordMapping[mappedOrd] - 1);
}
}
// in lucene/core/src/java/org/apache/lucene/index/LogByteSizeMergePolicy.java
Override
protected long size(SegmentInfoPerCommit info) throws IOException {
return sizeBytes(info);
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public FieldsEnum iterator() throws IOException {
return in.iterator();
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public Terms terms(String field) throws IOException {
return in.terms(field);
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public int size() throws IOException {
return in.size();
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public long getUniqueTermCount() throws IOException {
return in.getUniqueTermCount();
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public TermsEnum iterator(TermsEnum reuse) throws IOException {
return in.iterator(reuse);
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public Comparator<BytesRef> getComparator() throws IOException {
return in.getComparator();
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public long size() throws IOException {
return in.size();
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public long getSumTotalTermFreq() throws IOException {
return in.getSumTotalTermFreq();
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public long getSumDocFreq() throws IOException {
return in.getSumDocFreq();
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public int getDocCount() throws IOException {
return in.getDocCount();
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public TermsEnum intersect(CompiledAutomaton automaton, BytesRef bytes) throws java.io.IOException {
return in.intersect(automaton, bytes);
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public String next() throws IOException {
return in.next();
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public Terms terms() throws IOException {
return in.terms();
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public boolean seekExact(BytesRef text, boolean useCache) throws IOException {
return in.seekExact(text, useCache);
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public SeekStatus seekCeil(BytesRef text, boolean useCache) throws IOException {
return in.seekCeil(text, useCache);
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public void seekExact(long ord) throws IOException {
in.seekExact(ord);
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public BytesRef next() throws IOException {
return in.next();
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public BytesRef term() throws IOException {
return in.term();
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public long ord() throws IOException {
return in.ord();
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public int docFreq() throws IOException {
return in.docFreq();
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public long totalTermFreq() throws IOException {
return in.totalTermFreq();
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
return in.docs(liveDocs, reuse, needsFreqs);
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
return in.docsAndPositions(liveDocs, reuse, needsOffsets);
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public void seekExact(BytesRef term, TermState state) throws IOException {
in.seekExact(term, state);
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public TermState termState() throws IOException {
return in.termState();
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public int freq() throws IOException {
return in.freq();
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public int nextDoc() throws IOException {
return in.nextDoc();
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public int advance(int target) throws IOException {
return in.advance(target);
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public int freq() throws IOException {
return in.freq();
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public int nextDoc() throws IOException {
return in.nextDoc();
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public int advance(int target) throws IOException {
return in.advance(target);
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public int nextPosition() throws IOException {
return in.nextPosition();
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public int startOffset() throws IOException {
return in.startOffset();
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public int endOffset() throws IOException {
return in.endOffset();
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public BytesRef getPayload() throws IOException {
return in.getPayload();
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public Fields getTermVectors(int docID)
throws IOException {
ensureOpen();
return in.getTermVectors(docID);
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public void document(int docID, StoredFieldVisitor visitor) throws CorruptIndexException, IOException {
ensureOpen();
in.document(docID, visitor);
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
protected void doClose() throws IOException {
in.close();
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public Fields fields() throws IOException {
ensureOpen();
return in.fields();
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public DocValues docValues(String field) throws IOException {
ensureOpen();
return in.docValues(field);
}
// in lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
Override
public DocValues normValues(String field) throws IOException {
ensureOpen();
return in.normValues(field);
}
// in lucene/core/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java
Override
public void merge(IndexWriter writer) throws IOException {
assert !Thread.holdsLock(writer);
this.writer = writer;
initMergeThreadPriority();
dir = writer.getDirectory();
// First, quickly run through the newly proposed merges
// and add any orthogonal merges (ie a merge not
// involving segments already pending to be merged) to
// the queue. If we are way behind on merging, many of
// these newly proposed merges will likely already be
// registered.
if (verbose()) {
message("now merge");
message(" index: " + writer.segString());
}
// Iterate, pulling from the IndexWriter's queue of
// pending merges, until it's empty:
while (true) {
synchronized(this) {
long startStallTime = 0;
while (mergeThreadCount() >= 1+maxMergeCount) {
startStallTime = System.currentTimeMillis();
if (verbose()) {
message(" too many merges; stalling...");
}
try {
wait();
} catch (InterruptedException ie) {
throw new ThreadInterruptedException(ie);
}
}
if (verbose()) {
if (startStallTime != 0) {
message(" stalled for " + (System.currentTimeMillis()-startStallTime) + " msec");
}
}
}
// TODO: we could be careful about which merges to do in
// the BG (eg maybe the "biggest" ones) vs FG, which
// merges to do first (the easiest ones?), etc.
MergePolicy.OneMerge merge = writer.getNextMerge();
if (merge == null) {
if (verbose()) {
message(" no more merges pending; now return");
}
return;
}
// We do this w/ the primary thread to keep
// deterministic assignment of segment names
writer.mergeInit(merge);
boolean success = false;
try {
synchronized(this) {
if (verbose()) {
message(" consider merge " + writer.segString(merge.segments));
}
// OK to spawn a new merge thread to handle this
// merge:
final MergeThread merger = getMergeThread(writer, merge);
mergeThreads.add(merger);
if (verbose()) {
message(" launch new thread [" + merger.getName() + "]");
}
merger.start();
// Must call this after starting the thread else
// the new thread is removed from mergeThreads
// (since it's not alive yet):
updateMergeThreads();
success = true;
}
} finally {
if (!success) {
writer.mergeFinish(merge);
}
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java
protected void doMerge(MergePolicy.OneMerge merge) throws IOException {
writer.merge(merge);
}
// in lucene/core/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java
protected synchronized MergeThread getMergeThread(IndexWriter writer, MergePolicy.OneMerge merge) throws IOException {
final MergeThread thread = new MergeThread(writer, merge);
thread.setThreadPriority(mergeThreadPriority);
thread.setDaemon(true);
thread.setName("Lucene Merge Thread #" + mergeThreadCount++);
return thread;
}
// in lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
Override
public void flush(Map<String,TermsHashConsumerPerField> fieldsToFlush, final SegmentWriteState state) throws IOException {
// Gather all FieldData's that have postings, across all
// ThreadStates
List<FreqProxTermsWriterPerField> allFields = new ArrayList<FreqProxTermsWriterPerField>();
for (TermsHashConsumerPerField f : fieldsToFlush.values()) {
final FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField) f;
if (perField.termsHashPerField.bytesHash.size() > 0) {
allFields.add(perField);
}
}
final int numAllFields = allFields.size();
// Sort by field name
CollectionUtil.quickSort(allFields);
final FieldsConsumer consumer = state.segmentInfo.getCodec().postingsFormat().fieldsConsumer(state);
boolean success = false;
try {
TermsHash termsHash = null;
/*
Current writer chain:
FieldsConsumer
-> IMPL: FormatPostingsTermsDictWriter
-> TermsConsumer
-> IMPL: FormatPostingsTermsDictWriter.TermsWriter
-> DocsConsumer
-> IMPL: FormatPostingsDocsWriter
-> PositionsConsumer
-> IMPL: FormatPostingsPositionsWriter
*/
for (int fieldNumber = 0; fieldNumber < numAllFields; fieldNumber++) {
final FieldInfo fieldInfo = allFields.get(fieldNumber).fieldInfo;
final FreqProxTermsWriterPerField fieldWriter = allFields.get(fieldNumber);
// If this field has postings then add them to the
// segment
fieldWriter.flush(fieldInfo.name, consumer, state);
TermsHashPerField perField = fieldWriter.termsHashPerField;
assert termsHash == null || termsHash == perField.termsHash;
termsHash = perField.termsHash;
int numPostings = perField.bytesHash.size();
perField.reset();
perField.shrinkHash(numPostings);
fieldWriter.reset();
}
if (termsHash != null) {
termsHash.reset();
}
success = true;
} finally {
if (success) {
IOUtils.close(consumer);
} else {
IOUtils.closeWhileHandlingException(consumer);
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
Override
void finishDocument(TermsHash termsHash) throws IOException {
}
// in lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
Override
void startDocument() throws IOException {
}
// in lucene/core/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java
public MultiDocsAndPositionsEnum reset(final EnumWithSlice[] subs, final int numSubs) throws IOException {
this.numSubs = numSubs;
this.subs = new EnumWithSlice[subs.length];
for(int i=0;i<subs.length;i++) {
this.subs[i] = new EnumWithSlice();
this.subs[i].docsAndPositionsEnum = subs[i].docsAndPositionsEnum;
this.subs[i].slice = subs[i].slice;
}
upto = -1;
current = null;
return this;
}
// in lucene/core/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java
Override
public int freq() throws IOException {
return current.freq();
}
// in lucene/core/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java
Override
public int advance(int target) throws IOException {
while(true) {
if (current != null) {
final int doc = current.advance(target-currentBase);
if (doc == NO_MORE_DOCS) {
current = null;
} else {
return this.doc = doc + currentBase;
}
} else if (upto == numSubs-1) {
return this.doc = NO_MORE_DOCS;
} else {
upto++;
current = subs[upto].docsAndPositionsEnum;
currentBase = subs[upto].slice.start;
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java
Override
public int nextDoc() throws IOException {
while(true) {
if (current == null) {
if (upto == numSubs-1) {
return this.doc = NO_MORE_DOCS;
} else {
upto++;
current = subs[upto].docsAndPositionsEnum;
currentBase = subs[upto].slice.start;
}
}
final int doc = current.nextDoc();
if (doc != NO_MORE_DOCS) {
return this.doc = currentBase + doc;
} else {
current = null;
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java
Override
public int nextPosition() throws IOException {
return current.nextPosition();
}
// in lucene/core/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java
Override
public int startOffset() throws IOException {
return current.startOffset();
}
// in lucene/core/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java
Override
public int endOffset() throws IOException {
return current.endOffset();
}
// in lucene/core/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java
Override
public BytesRef getPayload() throws IOException {
return current.getPayload();
}
// in lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
Override
void skippingLongTerm() throws IOException {}
// in lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
void flush(String fieldName, FieldsConsumer consumer, final SegmentWriteState state)
throws CorruptIndexException, IOException {
if (!fieldInfo.isIndexed()) {
return; // nothing to flush, don't bother the codec with the unindexed field
}
final TermsConsumer termsConsumer = consumer.addField(fieldInfo);
final Comparator<BytesRef> termComp = termsConsumer.getComparator();
// CONFUSING: this.indexOptions holds the index options
// that were current when we first saw this field. But
// it's possible this has changed, eg when other
// documents are indexed that cause a "downgrade" of the
// IndexOptions. So we must decode the in-RAM buffer
// according to this.indexOptions, but then write the
// new segment to the directory according to
// currentFieldIndexOptions:
final IndexOptions currentFieldIndexOptions = fieldInfo.getIndexOptions();
assert currentFieldIndexOptions != null;
final boolean writeTermFreq = currentFieldIndexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
final boolean writePositions = currentFieldIndexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
final boolean writeOffsets = currentFieldIndexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
final boolean readTermFreq = this.hasFreq;
final boolean readPositions = this.hasProx;
final boolean readOffsets = this.hasOffsets;
//System.out.println("flush readTF=" + readTermFreq + " readPos=" + readPositions + " readOffs=" + readOffsets);
// Make sure FieldInfo.update is working correctly!:
assert !writeTermFreq || readTermFreq;
assert !writePositions || readPositions;
assert !writeOffsets || readOffsets;
assert !writeOffsets || writePositions;
final Map<Term,Integer> segDeletes;
if (state.segDeletes != null && state.segDeletes.terms.size() > 0) {
segDeletes = state.segDeletes.terms;
} else {
segDeletes = null;
}
final int[] termIDs = termsHashPerField.sortPostings(termComp);
final int numTerms = termsHashPerField.bytesHash.size();
final BytesRef text = new BytesRef();
final FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
final ByteSliceReader freq = new ByteSliceReader();
final ByteSliceReader prox = new ByteSliceReader();
FixedBitSet visitedDocs = new FixedBitSet(state.segmentInfo.getDocCount());
long sumTotalTermFreq = 0;
long sumDocFreq = 0;
for (int i = 0; i < numTerms; i++) {
final int termID = termIDs[i];
//System.out.println("term=" + termID);
// Get BytesRef
final int textStart = postings.textStarts[termID];
termsHashPerField.bytePool.setBytesRef(text, textStart);
termsHashPerField.initReader(freq, termID, 0);
if (readPositions || readOffsets) {
termsHashPerField.initReader(prox, termID, 1);
}
// TODO: really TermsHashPerField should take over most
// of this loop, including merge sort of terms from
// multiple threads and interacting with the
// TermsConsumer, only calling out to us (passing us the
// DocsConsumer) to handle delivery of docs/positions
final PostingsConsumer postingsConsumer = termsConsumer.startTerm(text);
final int delDocLimit;
if (segDeletes != null) {
final Integer docIDUpto = segDeletes.get(new Term(fieldName, text));
if (docIDUpto != null) {
delDocLimit = docIDUpto;
} else {
delDocLimit = 0;
}
} else {
delDocLimit = 0;
}
// Now termStates has numToMerge FieldMergeStates
// which all share the same term. Now we must
// interleave the docID streams.
int numDocs = 0;
long totTF = 0;
int docID = 0;
while(true) {
//System.out.println(" cycle");
final int termDocFreq;
if (freq.eof()) {
if (postings.lastDocCodes[termID] != -1) {
// Return last doc
docID = postings.lastDocIDs[termID];
if (readTermFreq) {
termDocFreq = postings.docFreqs[termID];
} else {
termDocFreq = 0;
}
postings.lastDocCodes[termID] = -1;
} else {
// EOF
break;
}
} else {
final int code = freq.readVInt();
if (!readTermFreq) {
docID += code;
termDocFreq = 0;
} else {
docID += code >>> 1;
if ((code & 1) != 0) {
termDocFreq = 1;
} else {
termDocFreq = freq.readVInt();
}
}
assert docID != postings.lastDocIDs[termID];
}
numDocs++;
assert docID < state.segmentInfo.getDocCount(): "doc=" + docID + " maxDoc=" + state.segmentInfo.getDocCount();
// NOTE: we could check here if the docID was
// deleted, and skip it. However, this is somewhat
// dangerous because it can yield non-deterministic
// behavior since we may see the docID before we see
// the term that caused it to be deleted. This
// would mean some (but not all) of its postings may
// make it into the index, which'd alter the docFreq
// for those terms. We could fix this by doing two
// passes, ie first sweep marks all del docs, and
// 2nd sweep does the real flush, but I suspect
// that'd add too much time to flush.
visitedDocs.set(docID);
postingsConsumer.startDoc(docID, termDocFreq);
if (docID < delDocLimit) {
// Mark it deleted. TODO: we could also skip
// writing its postings; this would be
// deterministic (just for this Term's docs).
// TODO: can we do this reach-around in a cleaner way????
if (state.liveDocs == null) {
state.liveDocs = docState.docWriter.codec.liveDocsFormat().newLiveDocs(state.segmentInfo.getDocCount());
}
if (state.liveDocs.get(docID)) {
state.delCountOnFlush++;
state.liveDocs.clear(docID);
}
}
totTF += termDocFreq;
// Carefully copy over the prox + payload info,
// changing the format to match Lucene's segment
// format.
if (readPositions || readOffsets) {
// we did record positions (& maybe payload) and/or offsets
int position = 0;
int offset = 0;
for(int j=0;j<termDocFreq;j++) {
final BytesRef thisPayload;
if (readPositions) {
final int code = prox.readVInt();
position += code >>> 1;
if ((code & 1) != 0) {
// This position has a payload
final int payloadLength = prox.readVInt();
if (payload == null) {
payload = new BytesRef();
payload.bytes = new byte[payloadLength];
} else if (payload.bytes.length < payloadLength) {
payload.grow(payloadLength);
}
prox.readBytes(payload.bytes, 0, payloadLength);
payload.length = payloadLength;
thisPayload = payload;
} else {
thisPayload = null;
}
if (readOffsets) {
final int startOffset = offset + prox.readVInt();
final int endOffset = startOffset + prox.readVInt();
offset = startOffset;
if (writePositions) {
if (writeOffsets) {
postingsConsumer.addPosition(position, thisPayload, startOffset, endOffset);
} else {
postingsConsumer.addPosition(position, thisPayload, -1, -1);
}
}
} else if (writePositions) {
postingsConsumer.addPosition(position, thisPayload, -1, -1);
}
}
}
}
postingsConsumer.finishDoc();
}
termsConsumer.finishTerm(text, new TermStats(numDocs, totTF));
sumTotalTermFreq += totTF;
sumDocFreq += numDocs;
}
termsConsumer.finish(sumTotalTermFreq, sumDocFreq, visitedDocs.cardinality());
}
// in lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java
Override
public MergeSpecification findMerges(SegmentInfos infos) throws IOException {
if (verbose()) {
message("findMerges: " + infos.size() + " segments");
}
if (infos.size() == 0) {
return null;
}
final Collection<SegmentInfoPerCommit> merging = writer.get().getMergingSegments();
final Collection<SegmentInfoPerCommit> toBeMerged = new HashSet<SegmentInfoPerCommit>();
final List<SegmentInfoPerCommit> infosSorted = new ArrayList<SegmentInfoPerCommit>(infos.asList());
Collections.sort(infosSorted, new SegmentByteSizeDescending());
// Compute total index bytes & print details about the index
long totIndexBytes = 0;
long minSegmentBytes = Long.MAX_VALUE;
for(SegmentInfoPerCommit info : infosSorted) {
final long segBytes = size(info);
if (verbose()) {
String extra = merging.contains(info) ? " [merging]" : "";
if (segBytes >= maxMergedSegmentBytes/2.0) {
extra += " [skip: too large]";
} else if (segBytes < floorSegmentBytes) {
extra += " [floored]";
}
message(" seg=" + writer.get().segString(info) + " size=" + String.format("%.3f", segBytes/1024/1024.) + " MB" + extra);
}
minSegmentBytes = Math.min(segBytes, minSegmentBytes);
// Accum total byte size
totIndexBytes += segBytes;
}
// If we have too-large segments, grace them out
// of the maxSegmentCount:
int tooBigCount = 0;
while (tooBigCount < infosSorted.size() && size(infosSorted.get(tooBigCount)) >= maxMergedSegmentBytes/2.0) {
totIndexBytes -= size(infosSorted.get(tooBigCount));
tooBigCount++;
}
minSegmentBytes = floorSize(minSegmentBytes);
// Compute max allowed segs in the index
long levelSize = minSegmentBytes;
long bytesLeft = totIndexBytes;
double allowedSegCount = 0;
while(true) {
final double segCountLevel = bytesLeft / (double) levelSize;
if (segCountLevel < segsPerTier) {
allowedSegCount += Math.ceil(segCountLevel);
break;
}
allowedSegCount += segsPerTier;
bytesLeft -= segsPerTier * levelSize;
levelSize *= maxMergeAtOnce;
}
int allowedSegCountInt = (int) allowedSegCount;
MergeSpecification spec = null;
// Cycle to possibly select more than one merge:
while(true) {
long mergingBytes = 0;
// Gather eligible segments for merging, ie segments
// not already being merged and not already picked (by
// prior iteration of this loop) for merging:
final List<SegmentInfoPerCommit> eligible = new ArrayList<SegmentInfoPerCommit>();
for(int idx = tooBigCount; idx<infosSorted.size(); idx++) {
final SegmentInfoPerCommit info = infosSorted.get(idx);
if (merging.contains(info)) {
mergingBytes += info.info.sizeInBytes();
} else if (!toBeMerged.contains(info)) {
eligible.add(info);
}
}
final boolean maxMergeIsRunning = mergingBytes >= maxMergedSegmentBytes;
if (verbose()) {
message(" allowedSegmentCount=" + allowedSegCountInt + " vs count=" + infosSorted.size() + " (eligible count=" + eligible.size() + ") tooBigCount=" + tooBigCount);
}
if (eligible.size() == 0) {
return spec;
}
if (eligible.size() >= allowedSegCountInt) {
// OK we are over budget -- find best merge!
MergeScore bestScore = null;
List<SegmentInfoPerCommit> best = null;
boolean bestTooLarge = false;
long bestMergeBytes = 0;
// Consider all merge starts:
for(int startIdx = 0;startIdx <= eligible.size()-maxMergeAtOnce; startIdx++) {
long totAfterMergeBytes = 0;
final List<SegmentInfoPerCommit> candidate = new ArrayList<SegmentInfoPerCommit>();
boolean hitTooLarge = false;
for(int idx = startIdx;idx<eligible.size() && candidate.size() < maxMergeAtOnce;idx++) {
final SegmentInfoPerCommit info = eligible.get(idx);
final long segBytes = size(info);
if (totAfterMergeBytes + segBytes > maxMergedSegmentBytes) {
hitTooLarge = true;
// NOTE: we continue, so that we can try
// "packing" smaller segments into this merge
// to see if we can get closer to the max
// size; this in general is not perfect since
// this is really "bin packing" and we'd have
// to try different permutations.
continue;
}
candidate.add(info);
totAfterMergeBytes += segBytes;
}
final MergeScore score = score(candidate, hitTooLarge, mergingBytes);
if (verbose()) {
message(" maybe=" + writer.get().segString(candidate) + " score=" + score.getScore() + " " + score.getExplanation() + " tooLarge=" + hitTooLarge + " size=" + String.format("%.3f MB", totAfterMergeBytes/1024./1024.));
}
// If we are already running a max sized merge
// (maxMergeIsRunning), don't allow another max
// sized merge to kick off:
if ((bestScore == null || score.getScore() < bestScore.getScore()) && (!hitTooLarge || !maxMergeIsRunning)) {
best = candidate;
bestScore = score;
bestTooLarge = hitTooLarge;
bestMergeBytes = totAfterMergeBytes;
}
}
if (best != null) {
if (spec == null) {
spec = new MergeSpecification();
}
final OneMerge merge = new OneMerge(best);
spec.add(merge);
for(SegmentInfoPerCommit info : merge.segments) {
toBeMerged.add(info);
}
if (verbose()) {
message(" add merge=" + writer.get().segString(merge.segments) + " size=" + String.format("%.3f MB", bestMergeBytes/1024./1024.) + " score=" + String.format("%.3f", bestScore.getScore()) + " " + bestScore.getExplanation() + (bestTooLarge ? " [max merge]" : ""));
}
} else {
return spec;
}
} else {
return spec;
}
}
}
// in lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java
protected MergeScore score(List<SegmentInfoPerCommit> candidate, boolean hitTooLarge, long mergingBytes) throws IOException {
long totBeforeMergeBytes = 0;
long totAfterMergeBytes = 0;
long totAfterMergeBytesFloored = 0;
for(SegmentInfoPerCommit info : candidate) {
final long segBytes = size(info);
totAfterMergeBytes += segBytes;
totAfterMergeBytesFloored += floorSize(segBytes);
totBeforeMergeBytes += info.info.sizeInBytes();
}
// Measure "skew" of the merge, which can range
// from 1.0/numSegsBeingMerged (good) to 1.0
// (poor):
final double skew;
if (hitTooLarge) {
// Pretend the merge has perfect skew; skew doesn't
// matter in this case because this merge will not
// "cascade" and so it cannot lead to N^2 merge cost
// over time:
skew = 1.0/maxMergeAtOnce;
} else {
skew = ((double) floorSize(size(candidate.get(0))))/totAfterMergeBytesFloored;
}
// Strongly favor merges with less skew (smaller
// mergeScore is better):
double mergeScore = skew;
// Gently favor smaller merges over bigger ones. We
// don't want to make this exponent too large else we
// can end up doing poor merges of small segments in
// order to avoid the large merges:
mergeScore *= Math.pow(totAfterMergeBytes, 0.05);
// Strongly favor merges that reclaim deletes:
final double nonDelRatio = ((double) totAfterMergeBytes)/totBeforeMergeBytes;
mergeScore *= Math.pow(nonDelRatio, reclaimDeletesWeight);
final double finalMergeScore = mergeScore;
return new MergeScore() {
@Override
public double getScore() {
return finalMergeScore;
}
@Override
public String getExplanation() {
return "skew=" + String.format("%.3f", skew) + " nonDelRatio=" + String.format("%.3f", nonDelRatio);
}
};
}
// in lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java
Override
public MergeSpecification findForcedMerges(SegmentInfos infos, int maxSegmentCount, Map<SegmentInfoPerCommit,Boolean> segmentsToMerge) throws IOException {
if (verbose()) {
message("findForcedMerges maxSegmentCount=" + maxSegmentCount + " infos=" + writer.get().segString(infos) + " segmentsToMerge=" + segmentsToMerge);
}
List<SegmentInfoPerCommit> eligible = new ArrayList<SegmentInfoPerCommit>();
boolean forceMergeRunning = false;
final Collection<SegmentInfoPerCommit> merging = writer.get().getMergingSegments();
boolean segmentIsOriginal = false;
for(SegmentInfoPerCommit info : infos) {
final Boolean isOriginal = segmentsToMerge.get(info);
if (isOriginal != null) {
segmentIsOriginal = isOriginal;
if (!merging.contains(info)) {
eligible.add(info);
} else {
forceMergeRunning = true;
}
}
}
if (eligible.size() == 0) {
return null;
}
if ((maxSegmentCount > 1 && eligible.size() <= maxSegmentCount) ||
(maxSegmentCount == 1 && eligible.size() == 1 && (!segmentIsOriginal || isMerged(eligible.get(0))))) {
if (verbose()) {
message("already merged");
}
return null;
}
Collections.sort(eligible, new SegmentByteSizeDescending());
if (verbose()) {
message("eligible=" + eligible);
message("forceMergeRunning=" + forceMergeRunning);
}
int end = eligible.size();
MergeSpecification spec = null;
// Do full merges, first, backwards:
while(end >= maxMergeAtOnceExplicit + maxSegmentCount - 1) {
if (spec == null) {
spec = new MergeSpecification();
}
final OneMerge merge = new OneMerge(eligible.subList(end-maxMergeAtOnceExplicit, end));
if (verbose()) {
message("add merge=" + writer.get().segString(merge.segments));
}
spec.add(merge);
end -= maxMergeAtOnceExplicit;
}
if (spec == null && !forceMergeRunning) {
// Do final merge
final int numToMerge = end - maxSegmentCount + 1;
final OneMerge merge = new OneMerge(eligible.subList(end-numToMerge, end));
if (verbose()) {
message("add final merge=" + merge.segString(writer.get().getDirectory()));
}
spec = new MergeSpecification();
spec.add(merge);
}
return spec;
}
// in lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java
Override
public MergeSpecification findForcedDeletesMerges(SegmentInfos infos)
throws CorruptIndexException, IOException {
if (verbose()) {
message("findForcedDeletesMerges infos=" + writer.get().segString(infos) + " forceMergeDeletesPctAllowed=" + forceMergeDeletesPctAllowed);
}
final List<SegmentInfoPerCommit> eligible = new ArrayList<SegmentInfoPerCommit>();
final Collection<SegmentInfoPerCommit> merging = writer.get().getMergingSegments();
for(SegmentInfoPerCommit info : infos) {
double pctDeletes = 100.*((double) writer.get().numDeletedDocs(info))/info.info.getDocCount();
if (pctDeletes > forceMergeDeletesPctAllowed && !merging.contains(info)) {
eligible.add(info);
}
}
if (eligible.size() == 0) {
return null;
}
Collections.sort(eligible, new SegmentByteSizeDescending());
if (verbose()) {
message("eligible=" + eligible);
}
int start = 0;
MergeSpecification spec = null;
while(start < eligible.size()) {
// Don't enforce max merged size here: app is explicitly
// calling forceMergeDeletes, and knows this may take a
// long time / produce big segments (like forceMerge):
final int end = Math.min(start + maxMergeAtOnceExplicit, eligible.size());
if (spec == null) {
spec = new MergeSpecification();
}
final OneMerge merge = new OneMerge(eligible.subList(start, end));
if (verbose()) {
message("add merge=" + writer.get().segString(merge.segments));
}
spec.add(merge);
start = end;
}
return spec;
}
// in lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java
Override
public boolean useCompoundFile(SegmentInfos infos, SegmentInfoPerCommit mergedInfo) throws IOException {
final boolean doCFS;
if (!useCompoundFile) {
doCFS = false;
} else if (noCFSRatio == 1.0) {
doCFS = true;
} else {
long totalSize = 0;
for (SegmentInfoPerCommit info : infos) {
totalSize += size(info);
}
doCFS = size(mergedInfo) <= noCFSRatio * totalSize;
}
return doCFS;
}
// in lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java
private boolean isMerged(SegmentInfoPerCommit info)
throws IOException {
IndexWriter w = writer.get();
assert w != null;
boolean hasDeletions = w.numDeletedDocs(info) > 0;
return !hasDeletions &&
info.info.dir == w.getDirectory() &&
(info.info.getUseCompoundFile() == useCompoundFile || noCFSRatio < 1.0);
}
// in lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java
private long size(SegmentInfoPerCommit info) throws IOException {
final long byteSize = info.info.sizeInBytes();
final int delCount = writer.get().numDeletedDocs(info);
final double delRatio = (info.info.getDocCount() <= 0 ? 0.0f : ((double)delCount / (double)info.info.getDocCount()));
assert delRatio <= 1.0;
return (long) (byteSize * (1.0-delRatio));
}
// in lucene/core/src/java/org/apache/lucene/util/IOUtils.java
public static <E extends Exception> void closeWhileHandlingException(E priorException, Closeable... objects) throws E, IOException {
Throwable th = null;
for (Closeable object : objects) {
try {
if (object != null) {
object.close();
}
} catch (Throwable t) {
addSuppressed((priorException == null) ? th : priorException, t);
if (th == null) {
th = t;
}
}
}
if (priorException != null) {
throw priorException;
} else if (th != null) {
if (th instanceof IOException) throw (IOException) th;
if (th instanceof RuntimeException) throw (RuntimeException) th;
if (th instanceof Error) throw (Error) th;
throw new RuntimeException(th);
}
}
// in lucene/core/src/java/org/apache/lucene/util/IOUtils.java
public static <E extends Exception> void closeWhileHandlingException(E priorException, Iterable<? extends Closeable> objects) throws E, IOException {
Throwable th = null;
for (Closeable object : objects) {
try {
if (object != null) {
object.close();
}
} catch (Throwable t) {
addSuppressed((priorException == null) ? th : priorException, t);
if (th == null) {
th = t;
}
}
}
if (priorException != null) {
throw priorException;
} else if (th != null) {
if (th instanceof IOException) throw (IOException) th;
if (th instanceof RuntimeException) throw (RuntimeException) th;
if (th instanceof Error) throw (Error) th;
throw new RuntimeException(th);
}
}
// in lucene/core/src/java/org/apache/lucene/util/IOUtils.java
public static void close(Closeable... objects) throws IOException {
Throwable th = null;
for (Closeable object : objects) {
try {
if (object != null) {
object.close();
}
} catch (Throwable t) {
addSuppressed(th, t);
if (th == null) {
th = t;
}
}
}
if (th != null) {
if (th instanceof IOException) throw (IOException) th;
if (th instanceof RuntimeException) throw (RuntimeException) th;
if (th instanceof Error) throw (Error) th;
throw new RuntimeException(th);
}
}
// in lucene/core/src/java/org/apache/lucene/util/IOUtils.java
public static void close(Iterable<? extends Closeable> objects) throws IOException {
Throwable th = null;
for (Closeable object : objects) {
try {
if (object != null) {
object.close();
}
} catch (Throwable t) {
addSuppressed(th, t);
if (th == null) {
th = t;
}
}
}
if (th != null) {
if (th instanceof IOException) throw (IOException) th;
if (th instanceof RuntimeException) throw (RuntimeException) th;
if (th instanceof Error) throw (Error) th;
throw new RuntimeException(th);
}
}
// in lucene/core/src/java/org/apache/lucene/util/IOUtils.java
public static Reader getDecodingReader(File file, Charset charSet) throws IOException {
FileInputStream stream = null;
boolean success = false;
try {
stream = new FileInputStream(file);
final Reader reader = getDecodingReader(stream, charSet);
success = true;
return reader;
} finally {
if (!success) {
IOUtils.close(stream);
}
}
}
// in lucene/core/src/java/org/apache/lucene/util/IOUtils.java
public static Reader getDecodingReader(Class<?> clazz, String resource, Charset charSet) throws IOException {
InputStream stream = null;
boolean success = false;
try {
stream = clazz
.getResourceAsStream(resource);
final Reader reader = getDecodingReader(stream, charSet);
success = true;
return reader;
} finally {
if (!success) {
IOUtils.close(stream);
}
}
}
// in lucene/core/src/java/org/apache/lucene/util/TwoPhaseCommitTool.java
public void prepareCommit() throws IOException {
prepareCommit(commitData);
}
// in lucene/core/src/java/org/apache/lucene/util/TwoPhaseCommitTool.java
public void prepareCommit(Map<String, String> commitData) throws IOException {
tpc.prepareCommit(this.commitData);
}
// in lucene/core/src/java/org/apache/lucene/util/TwoPhaseCommitTool.java
public void commit() throws IOException {
commit(commitData);
}
// in lucene/core/src/java/org/apache/lucene/util/TwoPhaseCommitTool.java
public void commit(Map<String, String> commitData) throws IOException {
tpc.commit(this.commitData);
}
// in lucene/core/src/java/org/apache/lucene/util/TwoPhaseCommitTool.java
public void rollback() throws IOException {
tpc.rollback();
}
// in lucene/core/src/java/org/apache/lucene/util/PrintStreamInfoStream.java
Override
public void close() throws IOException {
if (!isSystemStream()) {
stream.close();
}
}
// in lucene/core/src/java/org/apache/lucene/util/ScorerDocQueue.java
public final float topScore() throws IOException {
// assert size > 0;
return topHSD.scorer.score();
}
// in lucene/core/src/java/org/apache/lucene/util/ScorerDocQueue.java
public final boolean topNextAndAdjustElsePop() throws IOException {
return checkAdjustElsePop(topHSD.scorer.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
}
// in lucene/core/src/java/org/apache/lucene/util/ScorerDocQueue.java
public final boolean topSkipToAndAdjustElsePop(int target) throws IOException {
return checkAdjustElsePop(topHSD.scorer.advance(target) != DocIdSetIterator.NO_MORE_DOCS);
}
// in lucene/core/src/java/org/apache/lucene/util/CodecUtil.java
public static void writeHeader(DataOutput out, String codec, int version)
throws IOException {
BytesRef bytes = new BytesRef(codec);
if (bytes.length != codec.length() || bytes.length >= 128) {
throw new IllegalArgumentException("codec must be simple ASCII, less than 128 characters in length [got " + codec + "]");
}
out.writeInt(CODEC_MAGIC);
out.writeString(codec);
out.writeInt(version);
}
// in lucene/core/src/java/org/apache/lucene/util/CodecUtil.java
public static int checkHeader(DataInput in, String codec, int minVersion, int maxVersion)
throws IOException {
// Safety to guard against reading a bogus string:
final int actualHeader = in.readInt();
if (actualHeader != CODEC_MAGIC) {
throw new CorruptIndexException("codec header mismatch: actual header=" + actualHeader + " vs expected header=" + CODEC_MAGIC + " (resource: " + in + ")");
}
return checkHeaderNoMagic(in, codec, minVersion, maxVersion);
}
// in lucene/core/src/java/org/apache/lucene/util/CodecUtil.java
public static int checkHeaderNoMagic(DataInput in, String codec, int minVersion, int maxVersion) throws IOException {
final String actualCodec = in.readString();
if (!actualCodec.equals(codec)) {
throw new CorruptIndexException("codec mismatch: actual codec=" + actualCodec + " vs expected codec=" + codec + " (resource: " + in + ")");
}
final int actualVersion = in.readInt();
if (actualVersion < minVersion) {
throw new IndexFormatTooOldException(in, actualVersion, minVersion, maxVersion);
}
if (actualVersion > maxVersion) {
throw new IndexFormatTooNewException(in, actualVersion, minVersion, maxVersion);
}
return actualVersion;
}
// in lucene/core/src/java/org/apache/lucene/util/TermContext.java
public static TermContext build(IndexReaderContext context, Term term, boolean cache)
throws IOException {
assert context != null && context.isTopLevel;
final String field = term.field();
final BytesRef bytes = term.bytes();
final TermContext perReaderTermState = new TermContext(context);
final AtomicReaderContext[] leaves = context.leaves();
//if (DEBUG) System.out.println("prts.build term=" + term);
for (int i = 0; i < leaves.length; i++) {
//if (DEBUG) System.out.println(" r=" + leaves[i].reader);
final Fields fields = leaves[i].reader().fields();
if (fields != null) {
final Terms terms = fields.terms(field);
if (terms != null) {
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(bytes, cache)) {
final TermState termState = termsEnum.termState();
//if (DEBUG) System.out.println(" found");
perReaderTermState.register(termState, leaves[i].ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
}
}
}
}
return perReaderTermState;
}
// in lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java
Override
public void close() throws IOException {
in.close();
}
// in lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java
public static Reader getReader(DataInput in) throws IOException {
CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
final int bitsPerValue = in.readVInt();
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
final int valueCount = in.readVInt();
final int format = in.readVInt();
switch (format) {
case PACKED:
switch (bitsPerValue) {
case 8:
return new Direct8(in, valueCount);
case 16:
return new Direct16(in, valueCount);
case 24:
return new Packed8ThreeBlocks(in, valueCount);
case 32:
return new Direct32(in, valueCount);
case 48:
return new Packed16ThreeBlocks(in, valueCount);
case 64:
return new Direct64(in, valueCount);
default:
return new Packed64(in, valueCount, bitsPerValue);
}
case PACKED_SINGLE_BLOCK:
return Packed64SingleBlock.create(in, valueCount, bitsPerValue);
default:
throw new AssertionError("Unknwown Writer format: " + format);
}
}
// in lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java
public static ReaderIterator getReaderIterator(IndexInput in) throws IOException {
CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
final int bitsPerValue = in.readVInt();
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
final int valueCount = in.readVInt();
final int format = in.readVInt();
switch (format) {
case PACKED:
return new PackedReaderIterator(valueCount, bitsPerValue, in);
case PACKED_SINGLE_BLOCK:
return new Packed64SingleBlockReaderIterator(valueCount, bitsPerValue, in);
default:
throw new AssertionError("Unknwown Writer format: " + format);
}
}
// in lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java
public static Reader getDirectReader(IndexInput in) throws IOException {
CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_START);
final int bitsPerValue = in.readVInt();
assert bitsPerValue > 0 && bitsPerValue <= 64: "bitsPerValue=" + bitsPerValue;
final int valueCount = in.readVInt();
final int format = in.readVInt();
switch (format) {
case PACKED:
return new DirectPackedReader(bitsPerValue, valueCount, in);
case PACKED_SINGLE_BLOCK:
return new DirectPacked64SingleBlockReader(bitsPerValue, valueCount, in);
default:
throw new AssertionError("Unknwown Writer format: " + format);
}
}
// in lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java
public static Writer getWriter(DataOutput out,
int valueCount, int bitsPerValue, float acceptableOverheadRatio)
throws IOException {
acceptableOverheadRatio = Math.max(COMPACT, acceptableOverheadRatio);
acceptableOverheadRatio = Math.min(FASTEST, acceptableOverheadRatio);
float acceptableOverheadPerValue = acceptableOverheadRatio * bitsPerValue; // in bits
int maxBitsPerValue = bitsPerValue + (int) acceptableOverheadPerValue;
if (bitsPerValue <= 8 && maxBitsPerValue >= 8) {
return new PackedWriter(out, valueCount, 8);
} else if (bitsPerValue <= 16 && maxBitsPerValue >= 16) {
return new PackedWriter(out, valueCount, 16);
} else if (bitsPerValue <= 32 && maxBitsPerValue >= 32) {
return new PackedWriter(out, valueCount, 32);
} else if (bitsPerValue <= 64 && maxBitsPerValue >= 64) {
return new PackedWriter(out, valueCount, 64);
} else if (valueCount <= Packed8ThreeBlocks.MAX_SIZE && bitsPerValue <= 24 && maxBitsPerValue >= 24) {
return new PackedWriter(out, valueCount, 24);
} else if (valueCount <= Packed16ThreeBlocks.MAX_SIZE && bitsPerValue <= 48 && maxBitsPerValue >= 48) {
return new PackedWriter(out, valueCount, bitsPerValue);
} else {
for (int bpv = bitsPerValue; bpv <= maxBitsPerValue; ++bpv) {
if (Packed64SingleBlock.isSupported(bpv)) {
float overhead = Packed64SingleBlock.overheadPerValue(bpv);
float acceptableOverhead = acceptableOverheadPerValue + bitsPerValue - bpv;
if (overhead <= acceptableOverhead) {
return new Packed64SingleBlockWriter(out, valueCount, bpv);
}
}
}
return new PackedWriter(out, valueCount, bitsPerValue);
}
}
// in lucene/core/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java
public long next() throws IOException {
if (pendingBitsLeft == 0) {
pending = in.readLong();
pendingBitsLeft = 64;
}
final long result;
if (pendingBitsLeft >= bitsPerValue) { // not split
result = (pending >> (pendingBitsLeft - bitsPerValue)) & masks[bitsPerValue-1];
pendingBitsLeft -= bitsPerValue;
} else { // split
final int bits1 = bitsPerValue - pendingBitsLeft;
final long result1 = (pending & masks[pendingBitsLeft-1]) << bits1;
pending = in.readLong();
final long result2 = (pending >> (64 - bits1)) & masks[bits1-1];
pendingBitsLeft = 64 + pendingBitsLeft - bitsPerValue;
result = result1 | result2;
}
++position;
return result;
}
// in lucene/core/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java
public long advance(final int ord) throws IOException{
assert ord < valueCount : "ord must be less than valueCount";
assert ord > position : "ord must be greater than the current position";
final long bits = (long) bitsPerValue;
final int posToSkip = ord - 1 - position;
final long bitsToSkip = (bits * (long)posToSkip);
if (bitsToSkip < pendingBitsLeft) { // enough bits left - no seek required
pendingBitsLeft -= bitsToSkip;
} else {
final long skip = bitsToSkip-pendingBitsLeft;
final long closestByte = (skip >> 6) << 3;
if (closestByte != 0) { // need to seek
final long filePointer = in.getFilePointer();
in.seek(filePointer + closestByte);
}
pending = in.readLong();
pendingBitsLeft = 64 - (int)(skip % 64);
}
position = ord-1;
return next();
}
// in lucene/core/src/java/org/apache/lucene/util/packed/PackedWriter.java
Override
public void add(long v) throws IOException {
assert v <= PackedInts.maxValue(bitsPerValue) : "v=" + v
+ " maxValue=" + PackedInts.maxValue(bitsPerValue);
assert v >= 0;
//System.out.println(" packedw add v=" + v + " pendingBitPos=" + pendingBitPos);
// TODO
if (pendingBitPos >= bitsPerValue) {
// not split
// write-once, so we can |= w/o first masking to 0s
pending |= v << (pendingBitPos - bitsPerValue);
if (pendingBitPos == bitsPerValue) {
// flush
out.writeLong(pending);
pending = 0;
pendingBitPos = 64;
} else {
pendingBitPos -= bitsPerValue;
}
} else {
// split
// write top pendingBitPos bits of value into bottom bits of pending
pending |= (v >> (bitsPerValue - pendingBitPos)) & masks[pendingBitPos - 1];
//System.out.println(" part1 (v >> " + (bitsPerValue - pendingBitPos) + ") & " + masks[pendingBitPos-1]);
// flush
out.writeLong(pending);
// write bottom (bitsPerValue - pendingBitPos) bits of value into top bits of pending
pendingBitPos = 64 - bitsPerValue + pendingBitPos;
//System.out.println(" part2 v << " + pendingBitPos);
pending = (v << pendingBitPos);
}
written++;
}
// in lucene/core/src/java/org/apache/lucene/util/packed/PackedWriter.java
Override
public void finish() throws IOException {
while (written < valueCount) {
add(0L); // Auto flush
}
if (pendingBitPos != 64) {
out.writeLong(pending);
}
}
// in lucene/core/src/java/org/apache/lucene/util/packed/Packed64SingleBlockReaderIterator.java
Override
public long next() throws IOException {
if (shift + bitsPerValue > 64) {
pending = in.readLong();
shift = 0;
}
final long next = (pending >>> shift) & mask;
shift += bitsPerValue;
++position;
return next;
}
// in lucene/core/src/java/org/apache/lucene/util/packed/Packed64SingleBlockReaderIterator.java
Override
public long advance(int ord) throws IOException {
assert ord < valueCount : "ord must be less than valueCount";
assert ord > position : "ord must be greater than the current position";
final int valuesPerBlock = 64 / bitsPerValue;
final long nextBlock = (position + valuesPerBlock) / valuesPerBlock;
final long targetBlock = ord / valuesPerBlock;
final long blocksToSkip = targetBlock - nextBlock;
if (blocksToSkip > 0) {
final long skip = blocksToSkip << 3;
final long filePointer = in.getFilePointer();
in.seek(filePointer + skip);
shift = 64;
final int offsetInBlock = ord % valuesPerBlock;
for (int i = 0; i < offsetInBlock; ++i) {
next();
}
} else {
for (int i = position; i < ord - 1; ++i) {
next();
}
}
position = ord - 1;
return next();
}
// in lucene/core/src/java/org/apache/lucene/util/packed/Packed64SingleBlockWriter.java
Override
public void add(long v) throws IOException {
assert v <= PackedInts.maxValue(bitsPerValue) : "v=" + v
+ " maxValue=" + PackedInts.maxValue(bitsPerValue);
assert v >= 0;
if (shift + bitsPerValue > Long.SIZE) {
out.writeLong(pending);
pending = 0;
shift = 0;
}
pending |= v << shift;
shift += bitsPerValue;
++written;
}
// in lucene/core/src/java/org/apache/lucene/util/packed/Packed64SingleBlockWriter.java
Override
public void finish() throws IOException {
while (written < valueCount) {
add(0L); // Auto flush
}
if (shift > 0) {
// add was called at least once
out.writeLong(pending);
}
}
// in lucene/core/src/java/org/apache/lucene/util/packed/Packed64SingleBlock.java
public static Packed64SingleBlock create(DataInput in,
int valueCount, int bitsPerValue) throws IOException {
Packed64SingleBlock reader = create(valueCount, bitsPerValue);
for (int i = 0; i < reader.blocks.length; ++i) {
reader.blocks[i] = in.readLong();
}
return reader;
}
// in lucene/core/src/java/org/apache/lucene/util/RollingCharBuffer.java
public int get(int pos) throws IOException {
//System.out.println(" get pos=" + pos + " nextPos=" + nextPos + " count=" + count);
if (pos == nextPos) {
if (end) {
return -1;
}
if (count == buffer.length) {
// Grow
final char[] newBuffer = new char[ArrayUtil.oversize(1+count, RamUsageEstimator.NUM_BYTES_CHAR)];
//System.out.println(Thread.currentThread().getName() + ": cb grow " + newBuffer.length);
System.arraycopy(buffer, nextWrite, newBuffer, 0, buffer.length - nextWrite);
System.arraycopy(buffer, 0, newBuffer, buffer.length - nextWrite, nextWrite);
nextWrite = buffer.length;
buffer = newBuffer;
}
if (nextWrite == buffer.length) {
nextWrite = 0;
}
final int toRead = buffer.length - Math.max(count, nextWrite);
final int readCount = reader.read(buffer, nextWrite, toRead);
if (readCount == -1) {
end = true;
return -1;
}
final int ch = buffer[nextWrite];
nextWrite += readCount;
count += readCount;
nextPos += readCount;
return ch;
} else {
// Cannot read from future (except by 1):
assert pos < nextPos;
// Cannot read from already freed past:
assert nextPos - pos <= count: "nextPos=" + nextPos + " pos=" + pos + " count=" + count;
return buffer[getIndex(pos)];
}
}
// in lucene/core/src/java/org/apache/lucene/util/ReaderUtil.java
public int run() throws IOException {
return run(0, topReader);
}
// in lucene/core/src/java/org/apache/lucene/util/ReaderUtil.java
public int run(int docBase) throws IOException {
return run(docBase, topReader);
}
// in lucene/core/src/java/org/apache/lucene/util/ReaderUtil.java
private int run(int base, IndexReader reader) throws IOException {
if (reader instanceof AtomicReader) {
// atomic reader
add(base, (AtomicReader) reader);
base += reader.maxDoc();
} else {
assert reader instanceof CompositeReader : "must be a composite reader";
IndexReader[] subReaders = ((CompositeReader) reader).getSequentialSubReaders();
for (int i = 0; i < subReaders.length; i++) {
base = run(base, subReaders[i]);
}
}
return base;
}
// in lucene/core/src/java/org/apache/lucene/util/BytesRefIterator.java
Override
public BytesRef next() throws IOException {
return null;
}
// in lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java
public void or(DocIdSetIterator iter) throws IOException {
if (iter instanceof OpenBitSetIterator && iter.docID() == -1) {
final OpenBitSetIterator obs = (OpenBitSetIterator) iter;
or(obs.arr, obs.words);
// advance after last doc that would be accepted if standard
// iteration is used (to exhaust it):
obs.advance(numBits);
} else {
int doc;
while ((doc = iter.nextDoc()) < numBits) {
set(doc);
}
}
}
// in lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java
public void and(DocIdSetIterator iter) throws IOException {
if (iter instanceof OpenBitSetIterator && iter.docID() == -1) {
final OpenBitSetIterator obs = (OpenBitSetIterator) iter;
and(obs.arr, obs.words);
// advance after last doc that would be accepted if standard
// iteration is used (to exhaust it):
obs.advance(numBits);
} else {
if (numBits == 0) return;
int disiDoc, bitSetDoc = nextSetBit(0);
while (bitSetDoc != -1 && (disiDoc = iter.advance(bitSetDoc)) < numBits) {
clear(bitSetDoc, disiDoc);
disiDoc++;
bitSetDoc = (disiDoc < numBits) ? nextSetBit(disiDoc) : -1;
}
if (bitSetDoc != -1) {
clear(bitSetDoc, numBits);
}
}
}
// in lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java
public void andNot(DocIdSetIterator iter) throws IOException {
if (iter instanceof OpenBitSetIterator && iter.docID() == -1) {
final OpenBitSetIterator obs = (OpenBitSetIterator) iter;
andNot(obs.arr, obs.words);
// advance after last doc that would be accepted if standard
// iteration is used (to exhaust it):
obs.advance(numBits);
} else {
int doc;
while ((doc = iter.nextDoc()) < numBits) {
clear(doc);
}
}
}
// in lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java
public final void writePool(final DataOutput out) throws IOException {
int bytesOffset = byteOffset;
int block = 0;
while (bytesOffset > 0) {
out.writeBytes(buffers[block++], BYTE_BLOCK_SIZE);
bytesOffset -= BYTE_BLOCK_SIZE;
}
out.writeBytes(buffers[block], byteUpto);
}
// in lucene/core/src/java/org/apache/lucene/util/fst/Builder.java
private CompiledNode compileNode(UnCompiledNode<T> nodeIn, int tailLength) throws IOException {
final int node;
if (dedupHash != null && (doShareNonSingletonNodes || nodeIn.numArcs <= 1) && tailLength <= shareMaxTailLength) {
if (nodeIn.numArcs == 0) {
node = fst.addNode(nodeIn);
} else {
node = dedupHash.add(nodeIn);
}
} else {
node = fst.addNode(nodeIn);
}
assert node != -2;
nodeIn.clear();
final CompiledNode fn = new CompiledNode();
fn.node = node;
return fn;
}
// in lucene/core/src/java/org/apache/lucene/util/fst/Builder.java
private void freezeTail(int prefixLenPlus1) throws IOException {
if (freezeTail != null) {
// Custom plugin:
freezeTail.freeze(frontier, prefixLenPlus1, lastInput);
} else {
//System.out.println(" compileTail " + prefixLenPlus1);
final int downTo = Math.max(1, prefixLenPlus1);
for(int idx=lastInput.length; idx >= downTo; idx--) {
boolean doPrune = false;
boolean doCompile = false;
final UnCompiledNode<T> node = frontier[idx];
final UnCompiledNode<T> parent = frontier[idx-1];
if (node.inputCount < minSuffixCount1) {
doPrune = true;
doCompile = true;
} else if (idx > prefixLenPlus1) {
// prune if parent's inputCount is less than suffixMinCount2
if (parent.inputCount < minSuffixCount2 || (minSuffixCount2 == 1 && parent.inputCount == 1 && idx > 1)) {
// my parent, about to be compiled, doesn't make the cut, so
// I'm definitely pruned
// if minSuffixCount2 is 1, we keep only up
// until the 'distinguished edge', ie we keep only the
// 'divergent' part of the FST. if my parent, about to be
// compiled, has inputCount 1 then we are already past the
// distinguished edge. NOTE: this only works if
// the FST outputs are not "compressible" (simple
// ords ARE compressible).
doPrune = true;
} else {
// my parent, about to be compiled, does make the cut, so
// I'm definitely not pruned
doPrune = false;
}
doCompile = true;
} else {
// if pruning is disabled (count is 0) we can always
// compile current node
doCompile = minSuffixCount2 == 0;
}
//System.out.println(" label=" + ((char) lastInput.ints[lastInput.offset+idx-1]) + " idx=" + idx + " inputCount=" + frontier[idx].inputCount + " doCompile=" + doCompile + " doPrune=" + doPrune);
if (node.inputCount < minSuffixCount2 || (minSuffixCount2 == 1 && node.inputCount == 1 && idx > 1)) {
// drop all arcs
for(int arcIdx=0;arcIdx<node.numArcs;arcIdx++) {
@SuppressWarnings({"rawtypes","unchecked"}) final UnCompiledNode<T> target =
(UnCompiledNode<T>) node.arcs[arcIdx].target;
target.clear();
}
node.numArcs = 0;
}
if (doPrune) {
// this node doesn't make it -- deref it
node.clear();
parent.deleteLast(lastInput.ints[lastInput.offset+idx-1], node);
} else {
if (minSuffixCount2 != 0) {
compileAllTargets(node, lastInput.length-idx);
}
final T nextFinalOutput = node.output;
// We "fake" the node as being final if it has no
// outgoing arcs; in theory we could leave it
// as non-final (the FST can represent this), but
// FSTEnum, Util, etc., have trouble w/ non-final
// dead-end states:
final boolean isFinal = node.isFinal || node.numArcs == 0;
if (doCompile) {
// this node makes it and we now compile it. first,
// compile any targets that were previously
// undecided:
parent.replaceLast(lastInput.ints[lastInput.offset + idx-1],
compileNode(node, 1+lastInput.length-idx),
nextFinalOutput,
isFinal);
} else {
// replaceLast just to install
// nextFinalOutput/isFinal onto the arc
parent.replaceLast(lastInput.ints[lastInput.offset + idx-1],
node,
nextFinalOutput,
isFinal);
// this node will stay in play for now, since we are
// undecided on whether to prune it. later, it
// will be either compiled or pruned, so we must
// allocate a new node:
frontier[idx] = new UnCompiledNode<T>(this, idx);
}
}
}
}
// in lucene/core/src/java/org/apache/lucene/util/fst/Builder.java
public void add(IntsRef input, T output) throws IOException {
/*
if (DEBUG) {
BytesRef b = new BytesRef(input.length);
for(int x=0;x<input.length;x++) {
b.bytes[x] = (byte) input.ints[x];
}
b.length = input.length;
if (output == NO_OUTPUT) {
System.out.println("\nFST ADD: input=" + toString(b) + " " + b);
} else {
System.out.println("\nFST ADD: input=" + toString(b) + " " + b + " output=" + fst.outputs.outputToString(output));
}
}
*/
// De-dup NO_OUTPUT since it must be a singleton:
if (output.equals(NO_OUTPUT)) {
output = NO_OUTPUT;
}
assert lastInput.length == 0 || input.compareTo(lastInput) >= 0: "inputs are added out of order lastInput=" + lastInput + " vs input=" + input;
assert validOutput(output);
//System.out.println("\nadd: " + input);
if (input.length == 0) {
// empty input: only allowed as first input. we have
// to special case this because the packed FST
// format cannot represent the empty input since
// 'finalness' is stored on the incoming arc, not on
// the node
frontier[0].inputCount++;
frontier[0].isFinal = true;
fst.setEmptyOutput(output);
return;
}
// compare shared prefix length
int pos1 = 0;
int pos2 = input.offset;
final int pos1Stop = Math.min(lastInput.length, input.length);
while(true) {
frontier[pos1].inputCount++;
//System.out.println(" incr " + pos1 + " ct=" + frontier[pos1].inputCount + " n=" + frontier[pos1]);
if (pos1 >= pos1Stop || lastInput.ints[pos1] != input.ints[pos2]) {
break;
}
pos1++;
pos2++;
}
final int prefixLenPlus1 = pos1+1;
if (frontier.length < input.length+1) {
@SuppressWarnings({"rawtypes","unchecked"}) final UnCompiledNode<T>[] next =
new UnCompiledNode[ArrayUtil.oversize(input.length+1, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
System.arraycopy(frontier, 0, next, 0, frontier.length);
for(int idx=frontier.length;idx<next.length;idx++) {
next[idx] = new UnCompiledNode<T>(this, idx);
}
frontier = next;
}
// in lucene/core/src/java/org/apache/lucene/util/fst/Builder.java
public FST<T> finish() throws IOException {
final UnCompiledNode<T> root = frontier[0];
// minimize nodes in the last word's suffix
freezeTail(0);
if (root.inputCount < minSuffixCount1 || root.inputCount < minSuffixCount2 || root.numArcs == 0) {
if (fst.emptyOutput == null) {
return null;
} else if (minSuffixCount1 > 0 || minSuffixCount2 > 0) {
// empty string got pruned
return null;
}
} else {
if (minSuffixCount2 != 0) {
compileAllTargets(root, lastInput.length);
}
}
//if (DEBUG) System.out.println(" builder.finish root.isFinal=" + root.isFinal + " root.output=" + root.output);
fst.finish(compileNode(root, lastInput.length).node);
return fst;
}
// in lucene/core/src/java/org/apache/lucene/util/fst/Builder.java
private void compileAllTargets(UnCompiledNode<T> node, int tailLength) throws IOException {
for(int arcIdx=0;arcIdx<node.numArcs;arcIdx++) {
final Arc<T> arc = node.arcs[arcIdx];
if (!arc.target.isCompiled()) {
// not yet compiled
@SuppressWarnings({"rawtypes","unchecked"}) final UnCompiledNode<T> n = (UnCompiledNode<T>) arc.target;
if (n.numArcs == 0) {
//System.out.println("seg=" + segment + " FORCE final arc=" + (char) arc.label);
arc.isFinal = n.isFinal = true;
}
arc.target = compileNode(n, tailLength-1);
}
}
// in lucene/core/src/java/org/apache/lucene/util/fst/IntsRefFSTEnum.java
public InputOutput<T> next() throws IOException {
//System.out.println(" enum.next");
doNext();
return setResult();
}
// in lucene/core/src/java/org/apache/lucene/util/fst/IntsRefFSTEnum.java
public InputOutput<T> seekCeil(IntsRef target) throws IOException {
this.target = target;
targetLength = target.length;
super.doSeekCeil();
return setResult();
}
// in lucene/core/src/java/org/apache/lucene/util/fst/IntsRefFSTEnum.java
public InputOutput<T> seekFloor(IntsRef target) throws IOException {
this.target = target;
targetLength = target.length;
super.doSeekFloor();
return setResult();
}
// in lucene/core/src/java/org/apache/lucene/util/fst/IntsRefFSTEnum.java
public InputOutput<T> seekExact(IntsRef target) throws IOException {
this.target = target;
targetLength = target.length;
if (super.doSeekExact()) {
assert upto == 1+target.length;
return setResult();
} else {
return null;
}
}
// in lucene/core/src/java/org/apache/lucene/util/fst/ByteSequenceOutputs.java
Override
public void write(BytesRef prefix, DataOutput out) throws IOException {
assert prefix != null;
out.writeVInt(prefix.length);
out.writeBytes(prefix.bytes, prefix.offset, prefix.length);
}
// in lucene/core/src/java/org/apache/lucene/util/fst/ByteSequenceOutputs.java
Override
public BytesRef read(DataInput in) throws IOException {
final int len = in.readVInt();
if (len == 0) {
return NO_OUTPUT;
} else {
final BytesRef output = new BytesRef(len);
in.readBytes(output.bytes, 0, len);
output.length = len;
return output;
}
}
// in lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java
private boolean nodesEqual(Builder.UnCompiledNode<T> node, int address, FST.BytesReader in) throws IOException {
fst.readFirstRealTargetArc(address, scratchArc, in);
if (scratchArc.bytesPerArc != 0 && node.numArcs != scratchArc.numArcs) {
return false;
}
for(int arcUpto=0;arcUpto<node.numArcs;arcUpto++) {
final Builder.Arc<T> arc = node.arcs[arcUpto];
if (arc.label != scratchArc.label ||
!arc.output.equals(scratchArc.output) ||
((Builder.CompiledNode) arc.target).node != scratchArc.target ||
!arc.nextFinalOutput.equals(scratchArc.nextFinalOutput) ||
arc.isFinal != scratchArc.isFinal()) {
return false;
}
if (scratchArc.isLast()) {
if (arcUpto == node.numArcs-1) {
return true;
} else {
return false;
}
}
fst.readNextRealArc(scratchArc, in);
}
return false;
}
// in lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java
private int hash(int node) throws IOException {
final int PRIME = 31;
final FST.BytesReader in = fst.getBytesReader(0);
//System.out.println("hash frozen node=" + node);
int h = 0;
fst.readFirstRealTargetArc(node, scratchArc, in);
while(true) {
//System.out.println(" label=" + scratchArc.label + " target=" + scratchArc.target + " h=" + h + " output=" + fst.outputs.outputToString(scratchArc.output) + " next?=" + scratchArc.flag(4) + " final?=" + scratchArc.isFinal());
h = PRIME * h + scratchArc.label;
h = PRIME * h + scratchArc.target;
h = PRIME * h + scratchArc.output.hashCode();
h = PRIME * h + scratchArc.nextFinalOutput.hashCode();
if (scratchArc.isFinal()) {
h += 17;
}
if (scratchArc.isLast()) {
break;
}
fst.readNextRealArc(scratchArc, in);
}
//System.out.println(" ret " + (h&Integer.MAX_VALUE));
return h & Integer.MAX_VALUE;
}
// in lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java
public int add(Builder.UnCompiledNode<T> nodeIn) throws IOException {
// System.out.println("hash: add count=" + count + " vs " + table.length);
final FST.BytesReader in = fst.getBytesReader(0);
final int h = hash(nodeIn);
int pos = h & mask;
int c = 0;
while(true) {
final int v = table[pos];
if (v == 0) {
// freeze & add
final int node = fst.addNode(nodeIn);
//System.out.println(" now freeze node=" + node);
assert hash(node) == h : "frozenHash=" + hash(node) + " vs h=" + h;
count++;
table[pos] = node;
if (table.length < 2*count) {
rehash();
}
return node;
} else if (nodesEqual(nodeIn, v, in)) {
// same node is already here
return v;
}
// quadratic probe
pos = (pos + (++c)) & mask;
}
}
// in lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java
private void addNew(int address) throws IOException {
int pos = hash(address) & mask;
int c = 0;
while(true) {
if (table[pos] == 0) {
table[pos] = address;
break;
}
// quadratic probe
pos = (pos + (++c)) & mask;
}
}
// in lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java
private void rehash() throws IOException {
final int[] oldTable = table;
table = new int[2*table.length];
mask = table.length-1;
for(int idx=0;idx<oldTable.length;idx++) {
final int address = oldTable[idx];
if (address != 0) {
addNew(address);
}
}
}
// in lucene/core/src/java/org/apache/lucene/util/fst/Util.java
public static<T> T get(FST<T> fst, IntsRef input) throws IOException {
// TODO: would be nice not to alloc this on every lookup
final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());
final FST.BytesReader fstReader = fst.getBytesReader(0);
// Accumulate output as we go
T output = fst.outputs.getNoOutput();
for(int i=0;i<input.length;i++) {
if (fst.findTargetArc(input.ints[input.offset + i], arc, arc, fstReader) == null) {
return null;
}
output = fst.outputs.add(output, arc.output);
}
if (arc.isFinal()) {
return fst.outputs.add(output, arc.nextFinalOutput);
} else {
return null;
}
}
// in lucene/core/src/java/org/apache/lucene/util/fst/Util.java
public static<T> T get(FST<T> fst, BytesRef input) throws IOException {
assert fst.inputType == FST.INPUT_TYPE.BYTE1;
final FST.BytesReader fstReader = fst.getBytesReader(0);
// TODO: would be nice not to alloc this on every lookup
final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());
// Accumulate output as we go
T output = fst.outputs.getNoOutput();
for(int i=0;i<input.length;i++) {
if (fst.findTargetArc(input.bytes[i+input.offset] & 0xFF, arc, arc, fstReader) == null) {
return null;
}
output = fst.outputs.add(output, arc.output);
}
if (arc.isFinal()) {
return fst.outputs.add(output, arc.nextFinalOutput);
} else {
return null;
}
}
// in lucene/core/src/java/org/apache/lucene/util/fst/Util.java
public static IntsRef getByOutput(FST<Long> fst, long targetOutput) throws IOException {
final FST.BytesReader in = fst.getBytesReader(0);
// TODO: would be nice not to alloc this on every lookup
FST.Arc<Long> arc = fst.getFirstArc(new FST.Arc<Long>());
FST.Arc<Long> scratchArc = new FST.Arc<Long>();
final IntsRef result = new IntsRef();
long output = arc.output;
int upto = 0;
//System.out.println("reverseLookup output=" + targetOutput);
while(true) {
//System.out.println("loop: output=" + output + " upto=" + upto + " arc=" + arc);
if (arc.isFinal()) {
final long finalOutput = output + arc.nextFinalOutput;
//System.out.println(" isFinal finalOutput=" + finalOutput);
if (finalOutput == targetOutput) {
result.length = upto;
//System.out.println(" found!");
return result;
} else if (finalOutput > targetOutput) {
//System.out.println(" not found!");
return null;
}
}
if (FST.targetHasArcs(arc)) {
//System.out.println(" targetHasArcs");
if (result.ints.length == upto) {
result.grow(1+upto);
}
fst.readFirstRealTargetArc(arc.target, arc, in);
if (arc.bytesPerArc != 0) {
int low = 0;
int high = arc.numArcs-1;
int mid = 0;
//System.out.println("bsearch: numArcs=" + arc.numArcs + " target=" + targetOutput + " output=" + output);
boolean exact = false;
while (low <= high) {
mid = (low + high) >>> 1;
in.pos = arc.posArcsStart;
in.skip(arc.bytesPerArc*mid);
final byte flags = in.readByte();
fst.readLabel(in);
final long minArcOutput;
if ((flags & FST.BIT_ARC_HAS_OUTPUT) != 0) {
final long arcOutput = fst.outputs.read(in);
minArcOutput = output + arcOutput;
} else {
minArcOutput = output;
}
//System.out.println(" cycle mid=" + mid + " label=" + (char) label + " output=" + minArcOutput);
if (minArcOutput == targetOutput) {
exact = true;
break;
} else if (minArcOutput < targetOutput) {
low = mid + 1;
} else {
high = mid - 1;
}
}
if (high == -1) {
return null;
} else if (exact) {
arc.arcIdx = mid-1;
} else {
arc.arcIdx = low-2;
}
fst.readNextRealArc(arc, in);
result.ints[upto++] = arc.label;
output += arc.output;
} else {
FST.Arc<Long> prevArc = null;
while(true) {
//System.out.println(" cycle label=" + arc.label + " output=" + arc.output);
// This is the min output we'd hit if we follow
// this arc:
final long minArcOutput = output + arc.output;
if (minArcOutput == targetOutput) {
// Recurse on this arc:
//System.out.println(" match! break");
output = minArcOutput;
result.ints[upto++] = arc.label;
break;
} else if (minArcOutput > targetOutput) {
if (prevArc == null) {
// Output doesn't exist
return null;
} else {
// Recurse on previous arc:
arc.copyFrom(prevArc);
result.ints[upto++] = arc.label;
output += arc.output;
//System.out.println(" recurse prev label=" + (char) arc.label + " output=" + output);
break;
}
} else if (arc.isLast()) {
// Recurse on this arc:
output = minArcOutput;
//System.out.println(" recurse last label=" + (char) arc.label + " output=" + output);
result.ints[upto++] = arc.label;
break;
} else {
// Read next arc in this node:
prevArc = scratchArc;
prevArc.copyFrom(arc);
//System.out.println(" after copy label=" + (char) prevArc.label + " vs " + (char) arc.label);
fst.readNextRealArc(arc, in);
}
}
}
} else {
//System.out.println(" no target arcs; not found!");
return null;
}
}
}
// in lucene/core/src/java/org/apache/lucene/util/fst/Util.java
public MinResult<T>[] search() throws IOException {
//System.out.println(" search topN=" + topN);
final FST.Arc<T> scratchArc = new FST.Arc<T>();
final List<MinResult<T>> results = new ArrayList<MinResult<T>>();
final T NO_OUTPUT = fst.outputs.getNoOutput();
// TODO: we could enable FST to sorting arcs by weight
// as it freezes... can easily do this on first pass
// (w/o requiring rewrite)
// TODO: maybe we should make an FST.INPUT_TYPE.BYTE0.5!?
// (nibbles)
// For each top N path:
while (results.size() < topN) {
//System.out.println("\nfind next path");
FSTPath<T> path;
if (queue == null) {
if (results.size() != 0) {
// Ran out of paths
break;
}
// First pass (top path): start from original fromNode
if (topN > 1) {
queue = new TreeSet<FSTPath<T>>();
}
T minArcCost = null;
FST.Arc<T> minArc = null;
path = new FSTPath<T>(NO_OUTPUT, fromNode, comparator);
fst.readFirstTargetArc(fromNode, path.arc);
// Bootstrap: find the min starting arc
while (true) {
T arcScore = path.arc.output;
//System.out.println(" arc=" + (char) path.arc.label + " cost=" + arcScore);
if (minArcCost == null || comparator.compare(arcScore, minArcCost) < 0) {
minArcCost = arcScore;
minArc = scratchArc.copyFrom(path.arc);
//System.out.println(" **");
}
if (queue != null) {
addIfCompetitive(path);
}
if (path.arc.isLast()) {
break;
}
fst.readNextArc(path.arc);
}
assert minArc != null;
if (queue != null) {
// Remove top path since we are now going to
// pursue it:
path = queue.pollFirst();
//System.out.println(" remove init path=" + path);
assert path.arc.label == minArc.label;
if (bottom != null && queue.size() == topN-1) {
bottom = queue.last();
//System.out.println(" set init bottom: " + bottom);
}
} else {
path.arc.copyFrom(minArc);
path.input.grow(1);
path.input.ints[0] = minArc.label;
path.input.length = 1;
path.cost = minArc.output;
}
} else {
path = queue.pollFirst();
if (path == null) {
// There were less than topN paths available:
break;
}
}
if (path.arc.label == FST.END_LABEL) {
//System.out.println(" empty string! cost=" + path.cost);
// Empty string!
path.input.length--;
results.add(new MinResult<T>(path.input, path.cost, comparator));
continue;
}
if (results.size() == topN-1) {
// Last path -- don't bother w/ queue anymore:
queue = null;
}
//System.out.println(" path: " + path);
// We take path and find its "0 output completion",
// ie, just keep traversing the first arc with
// NO_OUTPUT that we can find, since this must lead
// to the minimum path that completes from
// path.arc.
// For each input letter:
while (true) {
//System.out.println("\n cycle path: " + path);
fst.readFirstTargetArc(path.arc, path.arc);
// For each arc leaving this node:
boolean foundZero = false;
while(true) {
//System.out.println(" arc=" + (char) path.arc.label + " cost=" + path.arc.output);
// tricky: instead of comparing output == 0, we must
// express it via the comparator compare(output, 0) == 0
if (comparator.compare(NO_OUTPUT, path.arc.output) == 0) {
if (queue == null) {
foundZero = true;
break;
} else if (!foundZero) {
scratchArc.copyFrom(path.arc);
foundZero = true;
} else {
addIfCompetitive(path);
}
} else if (queue != null) {
addIfCompetitive(path);
}
if (path.arc.isLast()) {
break;
}
fst.readNextArc(path.arc);
}
assert foundZero;
if (queue != null) {
// TODO: maybe we can save this copyFrom if we
// are more clever above... eg on finding the
// first NO_OUTPUT arc we'd switch to using
// scratchArc
path.arc.copyFrom(scratchArc);
}
if (path.arc.label == FST.END_LABEL) {
// Add final output:
//System.out.println(" done!: " + path);
results.add(new MinResult<T>(path.input, fst.outputs.add(path.cost, path.arc.output), comparator));
break;
} else {
path.input.grow(1+path.input.length);
path.input.ints[path.input.length] = path.arc.label;
path.input.length++;
path.cost = fst.outputs.add(path.cost, path.arc.output);
}
}
}
@SuppressWarnings({"rawtypes","unchecked"}) final MinResult<T>[] arr =
(MinResult<T>[]) new MinResult[results.size()];
// in lucene/core/src/java/org/apache/lucene/util/fst/Util.java
public static <T> MinResult<T>[] shortestPaths(FST<T> fst, FST.Arc<T> fromNode, Comparator<T> comparator, int topN) throws IOException {
return new TopNSearcher<T>(fst, fromNode, topN, comparator).search();
}
// in lucene/core/src/java/org/apache/lucene/util/fst/Util.java
public static <T> void toDot(FST<T> fst, Writer out, boolean sameRank, boolean labelStates)
throws IOException {
final String expandedNodeColor = "blue";
// This is the start arc in the automaton (from the epsilon state to the first state
// with outgoing transitions.
final FST.Arc<T> startArc = fst.getFirstArc(new FST.Arc<T>());
// A queue of transitions to consider for the next level.
final List<FST.Arc<T>> thisLevelQueue = new ArrayList<FST.Arc<T>>();
// A queue of transitions to consider when processing the next level.
final List<FST.Arc<T>> nextLevelQueue = new ArrayList<FST.Arc<T>>();
nextLevelQueue.add(startArc);
//System.out.println("toDot: startArc: " + startArc);
// A list of states on the same level (for ranking).
final List<Integer> sameLevelStates = new ArrayList<Integer>();
// A bitset of already seen states (target offset).
final BitSet seen = new BitSet();
seen.set(startArc.target);
// Shape for states.
final String stateShape = "circle";
final String finalStateShape = "doublecircle";
// Emit DOT prologue.
out.write("digraph FST {\n");
out.write(" rankdir = LR; splines=true; concentrate=true; ordering=out; ranksep=2.5; \n");
if (!labelStates) {
out.write(" node [shape=circle, width=.2, height=.2, style=filled]\n");
}
emitDotState(out, "initial", "point", "white", "");
final T NO_OUTPUT = fst.outputs.getNoOutput();
// final FST.Arc<T> scratchArc = new FST.Arc<T>();
{
final String stateColor;
if (fst.isExpandedTarget(startArc)) {
stateColor = expandedNodeColor;
} else {
stateColor = null;
}
final boolean isFinal;
final T finalOutput;
if (startArc.isFinal()) {
isFinal = true;
finalOutput = startArc.nextFinalOutput == NO_OUTPUT ? null : startArc.nextFinalOutput;
} else {
isFinal = false;
finalOutput = null;
}
emitDotState(out, Integer.toString(startArc.target), isFinal ? finalStateShape : stateShape, stateColor, finalOutput == null ? "" : fst.outputs.outputToString(finalOutput));
}
out.write(" initial -> " + startArc.target + "\n");
int level = 0;
final FST.BytesReader r = fst.getBytesReader(0);
while (!nextLevelQueue.isEmpty()) {
// we could double buffer here, but it doesn't matter probably.
//System.out.println("next level=" + level);
thisLevelQueue.addAll(nextLevelQueue);
nextLevelQueue.clear();
level++;
out.write("\n // Transitions and states at level: " + level + "\n");
while (!thisLevelQueue.isEmpty()) {
final FST.Arc<T> arc = thisLevelQueue.remove(thisLevelQueue.size() - 1);
//System.out.println(" pop: " + arc);
if (FST.targetHasArcs(arc)) {
// scan all target arcs
//System.out.println(" readFirstTarget...");
final int node = arc.target;
fst.readFirstRealTargetArc(arc.target, arc, r);
//System.out.println(" firstTarget: " + arc);
while (true) {
//System.out.println(" cycle arc=" + arc);
// Emit the unseen state and add it to the queue for the next level.
if (arc.target >= 0 && !seen.get(arc.target)) {
/*
boolean isFinal = false;
T finalOutput = null;
fst.readFirstTargetArc(arc, scratchArc);
if (scratchArc.isFinal() && fst.targetHasArcs(scratchArc)) {
// target is final
isFinal = true;
finalOutput = scratchArc.output == NO_OUTPUT ? null : scratchArc.output;
System.out.println("dot hit final label=" + (char) scratchArc.label);
}
*/
final String stateColor;
if (fst.isExpandedTarget(arc)) {
stateColor = expandedNodeColor;
} else {
stateColor = null;
}
final String finalOutput;
if (arc.nextFinalOutput != null && arc.nextFinalOutput != NO_OUTPUT) {
finalOutput = fst.outputs.outputToString(arc.nextFinalOutput);
} else {
finalOutput = "";
}
emitDotState(out, Integer.toString(arc.target), stateShape, stateColor, finalOutput);
// To see the node address, use this instead:
//emitDotState(out, Integer.toString(arc.target), stateShape, stateColor, String.valueOf(arc.target));
seen.set(arc.target);
nextLevelQueue.add(new FST.Arc<T>().copyFrom(arc));
sameLevelStates.add(arc.target);
}
String outs;
if (arc.output != NO_OUTPUT) {
outs = "/" + fst.outputs.outputToString(arc.output);
} else {
outs = "";
}
if (!FST.targetHasArcs(arc) && arc.isFinal() && arc.nextFinalOutput != NO_OUTPUT) {
// Tricky special case: sometimes, due to
// pruning, the builder can [sillily] produce
// an FST with an arc into the final end state
// (-1) but also with a next final output; in
// this case we pull that output up onto this
// arc
outs = outs + "/[" + fst.outputs.outputToString(arc.nextFinalOutput) + "]";
}
final String arcColor;
if (arc.flag(FST.BIT_TARGET_NEXT)) {
arcColor = "red";
} else {
arcColor = "black";
}
assert arc.label != FST.END_LABEL;
out.write(" " + node + " -> " + arc.target + " [label=\"" + printableLabel(arc.label) + outs + "\"" + (arc.isFinal() ? " style=\"bold\"" : "" ) + " color=\"" + arcColor + "\"]\n");
// Break the loop if we're on the last arc of this state.
if (arc.isLast()) {
//System.out.println(" break");
break;
}
fst.readNextRealArc(arc, r);
}
}
}
// Emit state ranking information.
if (sameRank && sameLevelStates.size() > 1) {
out.write(" {rank=same; ");
for (int state : sameLevelStates) {
out.write(state + "; ");
}
out.write(" }\n");
}
sameLevelStates.clear();
}
// Emit terminating state (always there anyway).
out.write(" -1 [style=filled, color=black, shape=doublecircle, label=\"\"]\n\n");
out.write(" {rank=sink; -1 }\n");
out.write("}\n");
out.flush();
}
// in lucene/core/src/java/org/apache/lucene/util/fst/Util.java
private static void emitDotState(Writer out, String name, String shape,
String color, String label) throws IOException {
out.write(" " + name
+ " ["
+ (shape != null ? "shape=" + shape : "") + " "
+ (color != null ? "color=" + color : "") + " "
+ (label != null ? "label=\"" + label + "\"" : "label=\"\"") + " "
+ "]\n");
}
// in lucene/core/src/java/org/apache/lucene/util/fst/UpToTwoPositiveIntOutputs.java
Override
public void write(Object _output, DataOutput out) throws IOException {
assert valid(_output, true);
if (_output instanceof Long) {
final Long output = (Long) _output;
out.writeVLong(output<<1);
} else {
final TwoLongs output = (TwoLongs) _output;
out.writeVLong((output.first<<1) | 1);
out.writeVLong(output.second);
}
}
// in lucene/core/src/java/org/apache/lucene/util/fst/UpToTwoPositiveIntOutputs.java
Override
public Object read(DataInput in) throws IOException {
final long code = in.readVLong();
if ((code & 1) == 0) {
// single long
final long v = code >>> 1;
if (v == 0) {
return NO_OUTPUT;
} else {
return Long.valueOf(v);
}
} else {
// two longs
final long first = code >>> 1;
final long second = in.readVLong();
return new TwoLongs(first, second);
}
}
// in lucene/core/src/java/org/apache/lucene/util/fst/FST.java
void finish(int startNode) throws IOException {
if (startNode == FINAL_END_NODE && emptyOutput != null) {
startNode = 0;
}
if (this.startNode != -1) {
throw new IllegalStateException("already finished");
}
byte[] finalBytes = new byte[writer.posWrite];
System.arraycopy(bytes, 0, finalBytes, 0, writer.posWrite);
bytes = finalBytes;
this.startNode = startNode;
cacheRootArcs();
}
// in lucene/core/src/java/org/apache/lucene/util/fst/FST.java
private void cacheRootArcs() throws IOException {
cachedRootArcs = (Arc<T>[]) new Arc[0x80];
final Arc<T> arc = new Arc<T>();
getFirstArc(arc);
final BytesReader in = getBytesReader(0);
if (targetHasArcs(arc)) {
readFirstRealTargetArc(arc.target, arc, in);
while(true) {
assert arc.label != END_LABEL;
if (arc.label < cachedRootArcs.length) {
cachedRootArcs[arc.label] = new Arc<T>().copyFrom(arc);
} else {
break;
}
if (arc.isLast()) {
break;
}
readNextRealArc(arc, in);
}
}
}
// in lucene/core/src/java/org/apache/lucene/util/fst/FST.java
void setEmptyOutput(T v) throws IOException {
if (emptyOutput != null) {
emptyOutput = outputs.merge(emptyOutput, v);
} else {
emptyOutput = v;
}
// TODO: this is messy -- replace with sillyBytesWriter; maybe make
// bytes private
final int posSave = writer.posWrite;
outputs.write(emptyOutput, writer);
emptyOutputBytes = new byte[writer.posWrite-posSave];
if (!packed) {
// reverse
final int stopAt = (writer.posWrite - posSave)/2;
int upto = 0;
while(upto < stopAt) {
final byte b = bytes[posSave + upto];
bytes[posSave+upto] = bytes[writer.posWrite-upto-1];
bytes[writer.posWrite-upto-1] = b;
upto++;
}
}
System.arraycopy(bytes, posSave, emptyOutputBytes, 0, writer.posWrite-posSave);
writer.posWrite = posSave;
}
// in lucene/core/src/java/org/apache/lucene/util/fst/FST.java
public void save(DataOutput out) throws IOException {
if (startNode == -1) {
throw new IllegalStateException("call finish first");
}
if (nodeAddress != null) {
throw new IllegalStateException("cannot save an FST pre-packed FST; it must first be packed");
}
CodecUtil.writeHeader(out, FILE_FORMAT_NAME, VERSION_CURRENT);
if (packed) {
out.writeByte((byte) 1);
} else {
out.writeByte((byte) 0);
}
// TODO: really we should encode this as an arc, arriving
// to the root node, instead of special casing here:
if (emptyOutput != null) {
out.writeByte((byte) 1);
out.writeVInt(emptyOutputBytes.length);
out.writeBytes(emptyOutputBytes, 0, emptyOutputBytes.length);
} else {
out.writeByte((byte) 0);
}
final byte t;
if (inputType == INPUT_TYPE.BYTE1) {
t = 0;
} else if (inputType == INPUT_TYPE.BYTE2) {
t = 1;
} else {
t = 2;
}
out.writeByte(t);
if (packed) {
assert nodeRefToAddress != null;
out.writeVInt(nodeRefToAddress.length);
for(int idx=0;idx<nodeRefToAddress.length;idx++) {
out.writeVInt(nodeRefToAddress[idx]);
}
}
out.writeVInt(startNode);
out.writeVInt(nodeCount);
out.writeVInt(arcCount);
out.writeVInt(arcWithOutputCount);
out.writeVInt(bytes.length);
out.writeBytes(bytes, 0, bytes.length);
}
// in lucene/core/src/java/org/apache/lucene/util/fst/FST.java
public void save(final File file) throws IOException {
boolean success = false;
OutputStream os = new BufferedOutputStream(new FileOutputStream(file));
try {
save(new OutputStreamDataOutput(os));
success = true;
} finally {
if (success) {
IOUtils.close(os);
} else {
IOUtils.closeWhileHandlingException(os);
}
}
}
// in lucene/core/src/java/org/apache/lucene/util/fst/FST.java
public static <T> FST<T> read(File file, Outputs<T> outputs) throws IOException {
InputStream is = new BufferedInputStream(new FileInputStream(file));
boolean success = false;
try {
FST<T> fst = new FST<T>(new InputStreamDataInput(is), outputs);
success = true;
return fst;
} finally {
if (success) {
IOUtils.close(is);
} else {
IOUtils.closeWhileHandlingException(is);
}
}
}
// in lucene/core/src/java/org/apache/lucene/util/fst/FST.java
private void writeLabel(int v) throws IOException {
assert v >= 0: "v=" + v;
if (inputType == INPUT_TYPE.BYTE1) {
assert v <= 255: "v=" + v;
writer.writeByte((byte) v);
} else if (inputType == INPUT_TYPE.BYTE2) {
assert v <= 65535: "v=" + v;
writer.writeShort((short) v);
} else {
//writeInt(v);
writer.writeVInt(v);
}
}
// in lucene/core/src/java/org/apache/lucene/util/fst/FST.java
int readLabel(DataInput in) throws IOException {
final int v;
if (inputType == INPUT_TYPE.BYTE1) {
// Unsigned byte:
v = in.readByte()&0xFF;
} else if (inputType == INPUT_TYPE.BYTE2) {
// Unsigned short:
v = in.readShort()&0xFFFF;
} else {
v = in.readVInt();
}
return v;
}
// in lucene/core/src/java/org/apache/lucene/util/fst/FST.java
int addNode(Builder.UnCompiledNode<T> nodeIn) throws IOException {
//System.out.println("FST.addNode pos=" + writer.posWrite + " numArcs=" + nodeIn.numArcs);
if (nodeIn.numArcs == 0) {
if (nodeIn.isFinal) {
return FINAL_END_NODE;
} else {
return NON_FINAL_END_NODE;
}
}
int startAddress = writer.posWrite;
//System.out.println(" startAddr=" + startAddress);
final boolean doFixedArray = shouldExpand(nodeIn);
final int fixedArrayStart;
if (doFixedArray) {
if (bytesPerArc.length < nodeIn.numArcs) {
bytesPerArc = new int[ArrayUtil.oversize(nodeIn.numArcs, 1)];
}
// write a "false" first arc:
writer.writeByte(ARCS_AS_FIXED_ARRAY);
writer.writeVInt(nodeIn.numArcs);
// placeholder -- we'll come back and write the number
// of bytes per arc (int) here:
// TODO: we could make this a vInt instead
writer.writeInt(0);
fixedArrayStart = writer.posWrite;
//System.out.println(" do fixed arcs array arcsStart=" + fixedArrayStart);
} else {
fixedArrayStart = 0;
}
arcCount += nodeIn.numArcs;
final int lastArc = nodeIn.numArcs-1;
int lastArcStart = writer.posWrite;
int maxBytesPerArc = 0;
for(int arcIdx=0;arcIdx<nodeIn.numArcs;arcIdx++) {
final Builder.Arc<T> arc = nodeIn.arcs[arcIdx];
final Builder.CompiledNode target = (Builder.CompiledNode) arc.target;
int flags = 0;
if (arcIdx == lastArc) {
flags += BIT_LAST_ARC;
}
if (lastFrozenNode == target.node && !doFixedArray) {
// TODO: for better perf (but more RAM used) we
// could avoid this except when arc is "near" the
// last arc:
flags += BIT_TARGET_NEXT;
}
if (arc.isFinal) {
flags += BIT_FINAL_ARC;
if (arc.nextFinalOutput != NO_OUTPUT) {
flags += BIT_ARC_HAS_FINAL_OUTPUT;
}
} else {
assert arc.nextFinalOutput == NO_OUTPUT;
}
boolean targetHasArcs = target.node > 0;
if (!targetHasArcs) {
flags += BIT_STOP_NODE;
} else if (inCounts != null) {
inCounts[target.node]++;
}
if (arc.output != NO_OUTPUT) {
flags += BIT_ARC_HAS_OUTPUT;
}
writer.writeByte((byte) flags);
writeLabel(arc.label);
// System.out.println(" write arc: label=" + (char) arc.label + " flags=" + flags + " target=" + target.node + " pos=" + writer.posWrite + " output=" + outputs.outputToString(arc.output));
if (arc.output != NO_OUTPUT) {
outputs.write(arc.output, writer);
//System.out.println(" write output");
arcWithOutputCount++;
}
if (arc.nextFinalOutput != NO_OUTPUT) {
//System.out.println(" write final output");
outputs.write(arc.nextFinalOutput, writer);
}
if (targetHasArcs && (flags & BIT_TARGET_NEXT) == 0) {
assert target.node > 0;
//System.out.println(" write target");
writer.writeInt(target.node);
}
// just write the arcs "like normal" on first pass,
// but record how many bytes each one took, and max
// byte size:
if (doFixedArray) {
bytesPerArc[arcIdx] = writer.posWrite - lastArcStart;
lastArcStart = writer.posWrite;
maxBytesPerArc = Math.max(maxBytesPerArc, bytesPerArc[arcIdx]);
//System.out.println(" bytes=" + bytesPerArc[arcIdx]);
}
}
// TODO: if arc'd arrays will be "too wasteful" by some
// measure, eg if arcs have vastly different sized
// outputs, then we should selectively disable array for
// such cases
if (doFixedArray) {
//System.out.println(" doFixedArray");
assert maxBytesPerArc > 0;
// 2nd pass just "expands" all arcs to take up a fixed
// byte size
final int sizeNeeded = fixedArrayStart + nodeIn.numArcs * maxBytesPerArc;
bytes = ArrayUtil.grow(bytes, sizeNeeded);
// TODO: we could make this a vInt instead
bytes[fixedArrayStart-4] = (byte) (maxBytesPerArc >> 24);
bytes[fixedArrayStart-3] = (byte) (maxBytesPerArc >> 16);
bytes[fixedArrayStart-2] = (byte) (maxBytesPerArc >> 8);
bytes[fixedArrayStart-1] = (byte) maxBytesPerArc;
// expand the arcs in place, backwards
int srcPos = writer.posWrite;
int destPos = fixedArrayStart + nodeIn.numArcs*maxBytesPerArc;
writer.posWrite = destPos;
for(int arcIdx=nodeIn.numArcs-1;arcIdx>=0;arcIdx--) {
//System.out.println(" repack arcIdx=" + arcIdx + " srcPos=" + srcPos + " destPos=" + destPos);
destPos -= maxBytesPerArc;
srcPos -= bytesPerArc[arcIdx];
if (srcPos != destPos) {
assert destPos > srcPos;
System.arraycopy(bytes, srcPos, bytes, destPos, bytesPerArc[arcIdx]);
}
}
}
// reverse bytes in-place; we do this so that the
// "BIT_TARGET_NEXT" opto can work, ie, it reads the
// node just before the current one
final int endAddress = writer.posWrite - 1;
int left = startAddress;
int right = endAddress;
while (left < right) {
final byte b = bytes[left];
bytes[left++] = bytes[right];
bytes[right--] = b;
}
//System.out.println(" endAddress=" + endAddress);
nodeCount++;
final int node;
if (nodeAddress != null) {
// Nodes are addressed by 1+ord:
if (nodeCount == nodeAddress.length) {
nodeAddress = ArrayUtil.grow(nodeAddress);
inCounts = ArrayUtil.grow(inCounts);
}
nodeAddress[nodeCount] = endAddress;
// System.out.println(" write nodeAddress[" + nodeCount + "] = " + endAddress);
node = nodeCount;
} else {
node = endAddress;
}
lastFrozenNode = node;
return node;
}
// in lucene/core/src/java/org/apache/lucene/util/fst/FST.java
public Arc<T> readLastTargetArc(Arc<T> follow, Arc<T> arc) throws IOException {
//System.out.println("readLast");
if (!targetHasArcs(follow)) {
//System.out.println(" end node");
assert follow.isFinal();
arc.label = END_LABEL;
arc.target = FINAL_END_NODE;
arc.output = follow.nextFinalOutput;
arc.flags = BIT_LAST_ARC;
return arc;
} else {
final BytesReader in = getBytesReader(getNodeAddress(follow.target));
arc.node = follow.target;
final byte b = in.readByte();
if (b == ARCS_AS_FIXED_ARRAY) {
// array: jump straight to end
arc.numArcs = in.readVInt();
if (packed) {
arc.bytesPerArc = in.readVInt();
} else {
arc.bytesPerArc = in.readInt();
}
//System.out.println(" array numArcs=" + arc.numArcs + " bpa=" + arc.bytesPerArc);
arc.posArcsStart = in.pos;
arc.arcIdx = arc.numArcs - 2;
} else {
arc.flags = b;
// non-array: linear scan
arc.bytesPerArc = 0;
//System.out.println(" scan");
while(!arc.isLast()) {
// skip this arc:
readLabel(in);
if (arc.flag(BIT_ARC_HAS_OUTPUT)) {
outputs.read(in);
}
if (arc.flag(BIT_ARC_HAS_FINAL_OUTPUT)) {
outputs.read(in);
}
if (arc.flag(BIT_STOP_NODE)) {
} else if (arc.flag(BIT_TARGET_NEXT)) {
} else {
if (packed) {
in.readVInt();
} else {
in.skip(4);
}
}
arc.flags = in.readByte();
}
// Undo the byte flags we read:
in.skip(-1);
arc.nextArc = in.pos;
}
readNextRealArc(arc, in);
assert arc.isLast();
return arc;
}
}
// in lucene/core/src/java/org/apache/lucene/util/fst/FST.java
public Arc<T> readFirstTargetArc(Arc<T> follow, Arc<T> arc) throws IOException {
//int pos = address;
//System.out.println(" readFirstTarget follow.target=" + follow.target + " isFinal=" + follow.isFinal());
if (follow.isFinal()) {
// Insert "fake" final first arc:
arc.label = END_LABEL;
arc.output = follow.nextFinalOutput;
arc.flags = BIT_FINAL_ARC;
if (follow.target <= 0) {
arc.flags |= BIT_LAST_ARC;
} else {
arc.node = follow.target;
// NOTE: nextArc is a node (not an address!) in this case:
arc.nextArc = follow.target;
}
arc.target = FINAL_END_NODE;
//System.out.println(" insert isFinal; nextArc=" + follow.target + " isLast=" + arc.isLast() + " output=" + outputs.outputToString(arc.output));
return arc;
} else {
return readFirstRealTargetArc(follow.target, arc, getBytesReader(0));
}
}
// in lucene/core/src/java/org/apache/lucene/util/fst/FST.java
public Arc<T> readFirstRealTargetArc(int node, Arc<T> arc, final BytesReader in) throws IOException {
assert in.bytes == bytes;
final int address = getNodeAddress(node);
in.pos = address;
//System.out.println(" readFirstRealTargtArc address="
//+ address);
//System.out.println(" flags=" + arc.flags);
arc.node = node;
if (in.readByte() == ARCS_AS_FIXED_ARRAY) {
//System.out.println(" fixedArray");
// this is first arc in a fixed-array
arc.numArcs = in.readVInt();
if (packed) {
arc.bytesPerArc = in.readVInt();
} else {
arc.bytesPerArc = in.readInt();
}
arc.arcIdx = -1;
arc.nextArc = arc.posArcsStart = in.pos;
//System.out.println(" bytesPer=" + arc.bytesPerArc + " numArcs=" + arc.numArcs + " arcsStart=" + pos);
} else {
//arc.flags = b;
arc.nextArc = address;
arc.bytesPerArc = 0;
}
return readNextRealArc(arc, in);
}
// in lucene/core/src/java/org/apache/lucene/util/fst/FST.java
boolean isExpandedTarget(Arc<T> follow) throws IOException {
if (!targetHasArcs(follow)) {
return false;
} else {
final BytesReader in = getBytesReader(getNodeAddress(follow.target));
return in.readByte() == ARCS_AS_FIXED_ARRAY;
}
}
// in lucene/core/src/java/org/apache/lucene/util/fst/FST.java
public Arc<T> readNextArc(Arc<T> arc) throws IOException {
if (arc.label == END_LABEL) {
// This was a fake inserted "final" arc
if (arc.nextArc <= 0) {
throw new IllegalArgumentException("cannot readNextArc when arc.isLast()=true");
}
return readFirstRealTargetArc(arc.nextArc, arc, getBytesReader(0));
} else {
return readNextRealArc(arc, getBytesReader(0));
}
}
// in lucene/core/src/java/org/apache/lucene/util/fst/FST.java
public int readNextArcLabel(Arc<T> arc) throws IOException {
assert !arc.isLast();
final BytesReader in;
if (arc.label == END_LABEL) {
//System.out.println(" nextArc fake " + arc.nextArc);
in = getBytesReader(getNodeAddress(arc.nextArc));
final byte b = bytes[in.pos];
if (b == ARCS_AS_FIXED_ARRAY) {
//System.out.println(" nextArc fake array");
in.skip(1);
in.readVInt();
if (packed) {
in.readVInt();
} else {
in.readInt();
}
}
} else {
if (arc.bytesPerArc != 0) {
//System.out.println(" nextArc real array");
// arcs are at fixed entries
in = getBytesReader(arc.posArcsStart);
in.skip((1+arc.arcIdx)*arc.bytesPerArc);
} else {
// arcs are packed
//System.out.println(" nextArc real packed");
in = getBytesReader(arc.nextArc);
}
}
// skip flags
in.readByte();
return readLabel(in);
}
// in lucene/core/src/java/org/apache/lucene/util/fst/FST.java
public Arc<T> readNextRealArc(Arc<T> arc, final BytesReader in) throws IOException {
assert in.bytes == bytes;
// TODO: can't assert this because we call from readFirstArc
// assert !flag(arc.flags, BIT_LAST_ARC);
// this is a continuing arc in a fixed array
if (arc.bytesPerArc != 0) {
// arcs are at fixed entries
arc.arcIdx++;
assert arc.arcIdx < arc.numArcs;
in.skip(arc.posArcsStart, arc.arcIdx*arc.bytesPerArc);
} else {
// arcs are packed
in.pos = arc.nextArc;
}
arc.flags = in.readByte();
arc.label = readLabel(in);
if (arc.flag(BIT_ARC_HAS_OUTPUT)) {
arc.output = outputs.read(in);
} else {
arc.output = outputs.getNoOutput();
}
if (arc.flag(BIT_ARC_HAS_FINAL_OUTPUT)) {
arc.nextFinalOutput = outputs.read(in);
} else {
arc.nextFinalOutput = outputs.getNoOutput();
}
if (arc.flag(BIT_STOP_NODE)) {
if (arc.flag(BIT_FINAL_ARC)) {
arc.target = FINAL_END_NODE;
} else {
arc.target = NON_FINAL_END_NODE;
}
arc.nextArc = in.pos;
} else if (arc.flag(BIT_TARGET_NEXT)) {
arc.nextArc = in.pos;
// TODO: would be nice to make this lazy -- maybe
// caller doesn't need the target and is scanning arcs...
if (nodeAddress == null) {
if (!arc.flag(BIT_LAST_ARC)) {
if (arc.bytesPerArc == 0) {
// must scan
seekToNextNode(in);
} else {
in.skip(arc.posArcsStart, arc.bytesPerArc * arc.numArcs);
}
}
arc.target = in.pos;
} else {
arc.target = arc.node - 1;
assert arc.target > 0;
}
} else {
if (packed) {
final int pos = in.pos;
final int code = in.readVInt();
if (arc.flag(BIT_TARGET_DELTA)) {
// Address is delta-coded from current address:
arc.target = pos + code;
//System.out.println(" delta pos=" + pos + " delta=" + code + " target=" + arc.target);
} else if (code < nodeRefToAddress.length) {
// Deref
arc.target = nodeRefToAddress[code];
//System.out.println(" deref code=" + code + " target=" + arc.target);
} else {
// Absolute
arc.target = code;
//System.out.println(" abs code=" + code + " derefLen=" + nodeRefToAddress.length);
}
} else {
arc.target = in.readInt();
}
arc.nextArc = in.pos;
}
return arc;
}
// in lucene/core/src/java/org/apache/lucene/util/fst/FST.java
public Arc<T> findTargetArc(int labelToMatch, Arc<T> follow, Arc<T> arc, BytesReader in) throws IOException {
assert cachedRootArcs != null;
assert in.bytes == bytes;
if (labelToMatch == END_LABEL) {
if (follow.isFinal()) {
if (follow.target <= 0) {
arc.flags = BIT_LAST_ARC;
} else {
arc.flags = 0;
// NOTE: nextArc is a node (not an address!) in this case:
arc.nextArc = follow.target;
arc.node = follow.target;
}
arc.output = follow.nextFinalOutput;
arc.label = END_LABEL;
return arc;
} else {
return null;
}
}
// Short-circuit if this arc is in the root arc cache:
if (follow.target == startNode && labelToMatch < cachedRootArcs.length) {
final Arc<T> result = cachedRootArcs[labelToMatch];
if (result == null) {
return result;
} else {
arc.copyFrom(result);
return arc;
}
}
if (!targetHasArcs(follow)) {
return null;
}
in.pos = getNodeAddress(follow.target);
arc.node = follow.target;
// System.out.println("fta label=" + (char) labelToMatch);
if (in.readByte() == ARCS_AS_FIXED_ARRAY) {
// Arcs are full array; do binary search:
arc.numArcs = in.readVInt();
if (packed) {
arc.bytesPerArc = in.readVInt();
} else {
arc.bytesPerArc = in.readInt();
}
arc.posArcsStart = in.pos;
int low = 0;
int high = arc.numArcs-1;
while (low <= high) {
//System.out.println(" cycle");
int mid = (low + high) >>> 1;
in.skip(arc.posArcsStart, arc.bytesPerArc*mid + 1);
int midLabel = readLabel(in);
final int cmp = midLabel - labelToMatch;
if (cmp < 0) {
low = mid + 1;
} else if (cmp > 0) {
high = mid - 1;
} else {
arc.arcIdx = mid-1;
//System.out.println(" found!");
return readNextRealArc(arc, in);
}
}
return null;
}
// Linear scan
readFirstRealTargetArc(follow.target, arc, in);
while(true) {
//System.out.println(" non-bs cycle");
// TODO: we should fix this code to not have to create
// object for the output of every arc we scan... only
// for the matching arc, if found
if (arc.label == labelToMatch) {
//System.out.println(" found!");
return arc;
} else if (arc.label > labelToMatch) {
return null;
} else if (arc.isLast()) {
return null;
} else {
readNextRealArc(arc, in);
}
}
}
// in lucene/core/src/java/org/apache/lucene/util/fst/FST.java
private void seekToNextNode(BytesReader in) throws IOException {
while(true) {
final int flags = in.readByte();
readLabel(in);
if (flag(flags, BIT_ARC_HAS_OUTPUT)) {
outputs.read(in);
}
if (flag(flags, BIT_ARC_HAS_FINAL_OUTPUT)) {
outputs.read(in);
}
if (!flag(flags, BIT_STOP_NODE) && !flag(flags, BIT_TARGET_NEXT)) {
if (packed) {
in.readVInt();
} else {
in.readInt();
}
}
if (flag(flags, BIT_LAST_ARC)) {
return;
}
}
}
// in lucene/core/src/java/org/apache/lucene/util/fst/FST.java
public FST<T> pack(int minInCountDeref, int maxDerefNodes) throws IOException {
// TODO: other things to try
// - renumber the nodes to get more next / better locality?
// - allow multiple input labels on an arc, so
// singular chain of inputs can take one arc (on
// wikipedia terms this could save another ~6%)
// - in the ord case, the output '1' is presumably
// very common (after NO_OUTPUT)... maybe use a bit
// for it..?
// - use spare bits in flags.... for top few labels /
// outputs / targets
if (nodeAddress == null) {
throw new IllegalArgumentException("this FST was not built with willPackFST=true");
}
Arc<T> arc = new Arc<T>();
final BytesReader r = getBytesReader(0);
final int topN = Math.min(maxDerefNodes, inCounts.length);
// Find top nodes with highest number of incoming arcs:
NodeQueue q = new NodeQueue(topN);
// TODO: we could use more RAM efficient selection algo here...
NodeAndInCount bottom = null;
for(int node=0;node<inCounts.length;node++) {
if (inCounts[node] >= minInCountDeref) {
if (bottom == null) {
q.add(new NodeAndInCount(node, inCounts[node]));
if (q.size() == topN) {
bottom = q.top();
}
} else if (inCounts[node] > bottom.count) {
q.insertWithOverflow(new NodeAndInCount(node, inCounts[node]));
}
}
}
// Free up RAM:
inCounts = null;
final Map<Integer,Integer> topNodeMap = new HashMap<Integer,Integer>();
for(int downTo=q.size()-1;downTo>=0;downTo--) {
NodeAndInCount n = q.pop();
topNodeMap.put(n.node, downTo);
//System.out.println("map node=" + n.node + " inCount=" + n.count + " to newID=" + downTo);
}
// TODO: we can use packed ints:
// +1 because node ords start at 1 (0 is reserved as
// stop node):
final int[] nodeRefToAddressIn = new int[topNodeMap.size()];
final FST<T> fst = new FST<T>(inputType, nodeRefToAddressIn, outputs);
final BytesWriter writer = fst.writer;
final int[] newNodeAddress = new int[1+nodeCount];
// Fill initial coarse guess:
for(int node=1;node<=nodeCount;node++) {
newNodeAddress[node] = 1 + bytes.length - nodeAddress[node];
}
int absCount;
int deltaCount;
int topCount;
int nextCount;
// Iterate until we converge:
while(true) {
//System.out.println("\nITER");
boolean changed = false;
// for assert:
boolean negDelta = false;
writer.posWrite = 0;
// Skip 0 byte since 0 is reserved target:
writer.writeByte((byte) 0);
fst.arcWithOutputCount = 0;
fst.nodeCount = 0;
fst.arcCount = 0;
absCount = deltaCount = topCount = nextCount = 0;
int changedCount = 0;
int addressError = 0;
//int totWasted = 0;
// Since we re-reverse the bytes, we now write the
// nodes backwards, so that BIT_TARGET_NEXT is
// unchanged:
for(int node=nodeCount;node>=1;node--) {
fst.nodeCount++;
final int address = writer.posWrite;
//System.out.println(" node: " + node + " address=" + address);
if (address != newNodeAddress[node]) {
addressError = address - newNodeAddress[node];
//System.out.println(" change: " + (address - newNodeAddress[node]));
changed = true;
newNodeAddress[node] = address;
changedCount++;
}
int nodeArcCount = 0;
int bytesPerArc = 0;
boolean retry = false;
// for assert:
boolean anyNegDelta = false;
// Retry loop: possibly iterate more than once, if
// this is an array'd node and bytesPerArc changes:
writeNode:
while(true) { // retry writing this node
readFirstRealTargetArc(node, arc, r);
final boolean useArcArray = arc.bytesPerArc != 0;
if (useArcArray) {
// Write false first arc:
if (bytesPerArc == 0) {
bytesPerArc = arc.bytesPerArc;
}
writer.writeByte(ARCS_AS_FIXED_ARRAY);
writer.writeVInt(arc.numArcs);
writer.writeVInt(bytesPerArc);
//System.out.println("node " + node + ": " + arc.numArcs + " arcs");
}
int maxBytesPerArc = 0;
//int wasted = 0;
while(true) { // iterate over all arcs for this node
//System.out.println(" arc label=" + arc.label + " target=" + arc.target + " pos=" + writer.posWrite);
final int arcStartPos = writer.posWrite;
nodeArcCount++;
byte flags = 0;
if (arc.isLast()) {
flags += BIT_LAST_ARC;
}
/*
if (!useArcArray && nodeUpto < nodes.length-1 && arc.target == nodes[nodeUpto+1]) {
flags += BIT_TARGET_NEXT;
}
*/
if (!useArcArray && node != 1 && arc.target == node-1) {
flags += BIT_TARGET_NEXT;
if (!retry) {
nextCount++;
}
}
if (arc.isFinal()) {
flags += BIT_FINAL_ARC;
if (arc.nextFinalOutput != NO_OUTPUT) {
flags += BIT_ARC_HAS_FINAL_OUTPUT;
}
} else {
assert arc.nextFinalOutput == NO_OUTPUT;
}
if (!targetHasArcs(arc)) {
flags += BIT_STOP_NODE;
}
if (arc.output != NO_OUTPUT) {
flags += BIT_ARC_HAS_OUTPUT;
}
final Integer ptr;
final int absPtr;
final boolean doWriteTarget = targetHasArcs(arc) && (flags & BIT_TARGET_NEXT) == 0;
if (doWriteTarget) {
ptr = topNodeMap.get(arc.target);
if (ptr != null) {
absPtr = ptr;
} else {
absPtr = topNodeMap.size() + newNodeAddress[arc.target] + addressError;
}
int delta = newNodeAddress[arc.target] + addressError - writer.posWrite - 2;
if (delta < 0) {
//System.out.println("neg: " + delta);
anyNegDelta = true;
delta = 0;
}
if (delta < absPtr) {
flags |= BIT_TARGET_DELTA;
}
} else {
ptr = null;
absPtr = 0;
}
writer.writeByte(flags);
fst.writeLabel(arc.label);
if (arc.output != NO_OUTPUT) {
outputs.write(arc.output, writer);
if (!retry) {
fst.arcWithOutputCount++;
}
}
if (arc.nextFinalOutput != NO_OUTPUT) {
outputs.write(arc.nextFinalOutput, writer);
}
if (doWriteTarget) {
int delta = newNodeAddress[arc.target] + addressError - writer.posWrite;
if (delta < 0) {
anyNegDelta = true;
//System.out.println("neg: " + delta);
delta = 0;
}
if (flag(flags, BIT_TARGET_DELTA)) {
//System.out.println(" delta");
writer.writeVInt(delta);
if (!retry) {
deltaCount++;
}
} else {
/*
if (ptr != null) {
System.out.println(" deref");
} else {
System.out.println(" abs");
}
*/
writer.writeVInt(absPtr);
if (!retry) {
if (absPtr >= topNodeMap.size()) {
absCount++;
} else {
topCount++;
}
}
}
}
if (useArcArray) {
final int arcBytes = writer.posWrite - arcStartPos;
//System.out.println(" " + arcBytes + " bytes");
maxBytesPerArc = Math.max(maxBytesPerArc, arcBytes);
// NOTE: this may in fact go "backwards", if
// somehow (rarely, possibly never) we use
// more bytesPerArc in this rewrite than the
// incoming FST did... but in this case we
// will retry (below) so it's OK to ovewrite
// bytes:
//wasted += bytesPerArc - arcBytes;
writer.setPosWrite(arcStartPos + bytesPerArc);
}
if (arc.isLast()) {
break;
}
readNextRealArc(arc, r);
}
if (useArcArray) {
if (maxBytesPerArc == bytesPerArc || (retry && maxBytesPerArc <= bytesPerArc)) {
// converged
//System.out.println(" bba=" + bytesPerArc + " wasted=" + wasted);
//totWasted += wasted;
break;
}
} else {
break;
}
//System.out.println(" retry this node maxBytesPerArc=" + maxBytesPerArc + " vs " + bytesPerArc);
// Retry:
bytesPerArc = maxBytesPerArc;
writer.posWrite = address;
nodeArcCount = 0;
retry = true;
anyNegDelta = false;
}
negDelta |= anyNegDelta;
fst.arcCount += nodeArcCount;
}
if (!changed) {
// We don't renumber the nodes (just reverse their
// order) so nodes should only point forward to
// other nodes because we only produce acyclic FSTs
// w/ nodes only pointing "forwards":
assert !negDelta;
//System.out.println("TOT wasted=" + totWasted);
// Converged!
break;
}
//System.out.println(" " + changedCount + " of " + fst.nodeCount + " changed; retry");
}
for(Map.Entry<Integer,Integer> ent : topNodeMap.entrySet()) {
nodeRefToAddressIn[ent.getValue()] = newNodeAddress[ent.getKey()];
}
fst.startNode = newNodeAddress[startNode];
//System.out.println("new startNode=" + fst.startNode + " old startNode=" + startNode);
if (emptyOutput != null) {
fst.setEmptyOutput(emptyOutput);
}
assert fst.nodeCount == nodeCount: "fst.nodeCount=" + fst.nodeCount + " nodeCount=" + nodeCount;
assert fst.arcCount == arcCount;
assert fst.arcWithOutputCount == arcWithOutputCount: "fst.arcWithOutputCount=" + fst.arcWithOutputCount + " arcWithOutputCount=" + arcWithOutputCount;
final byte[] finalBytes = new byte[writer.posWrite];
//System.out.println("resize " + fst.bytes.length + " down to " + writer.posWrite);
System.arraycopy(fst.bytes, 0, finalBytes, 0, writer.posWrite);
fst.bytes = finalBytes;
fst.cacheRootArcs();
//final int size = fst.sizeInBytes();
//System.out.println("nextCount=" + nextCount + " topCount=" + topCount + " deltaCount=" + deltaCount + " absCount=" + absCount);
return fst;
}
// in lucene/core/src/java/org/apache/lucene/util/fst/IntSequenceOutputs.java
Override
public void write(IntsRef prefix, DataOutput out) throws IOException {
assert prefix != null;
out.writeVInt(prefix.length);
for(int idx=0;idx<prefix.length;idx++) {
out.writeVInt(prefix.ints[prefix.offset+idx]);
}
}
// in lucene/core/src/java/org/apache/lucene/util/fst/IntSequenceOutputs.java
Override
public IntsRef read(DataInput in) throws IOException {
final int len = in.readVInt();
if (len == 0) {
return NO_OUTPUT;
} else {
final IntsRef output = new IntsRef(len);
for(int idx=0;idx<len;idx++) {
output.ints[idx] = in.readVInt();
}
output.length = len;
return output;
}
}
// in lucene/core/src/java/org/apache/lucene/util/fst/FSTEnum.java
protected final void rewindPrefix() throws IOException {
if (upto == 0) {
//System.out.println(" init");
upto = 1;
fst.readFirstTargetArc(getArc(0), getArc(1));
return;
}
//System.out.println(" rewind upto=" + upto + " vs targetLength=" + targetLength);
final int currentLimit = upto;
upto = 1;
while (upto < currentLimit && upto <= targetLength+1) {
final int cmp = getCurrentLabel() - getTargetLabel();
if (cmp < 0) {
// seek forward
//System.out.println(" seek fwd");
break;
} else if (cmp > 0) {
// seek backwards -- reset this arc to the first arc
final FST.Arc<T> arc = getArc(upto);
fst.readFirstTargetArc(getArc(upto-1), arc);
//System.out.println(" seek first arc");
break;
}
upto++;
}
//System.out.println(" fall through upto=" + upto);
}
// in lucene/core/src/java/org/apache/lucene/util/fst/FSTEnum.java
protected void doNext() throws IOException {
//System.out.println("FE: next upto=" + upto);
if (upto == 0) {
//System.out.println(" init");
upto = 1;
fst.readFirstTargetArc(getArc(0), getArc(1));
} else {
// pop
//System.out.println(" check pop curArc target=" + arcs[upto].target + " label=" + arcs[upto].label + " isLast?=" + arcs[upto].isLast());
while (arcs[upto].isLast()) {
upto--;
if (upto == 0) {
//System.out.println(" eof");
return;
}
}
fst.readNextArc(arcs[upto]);
}
pushFirst();
}
// in lucene/core/src/java/org/apache/lucene/util/fst/FSTEnum.java
protected void doSeekCeil() throws IOException {
//System.out.println(" advance len=" + target.length + " curlen=" + current.length);
// TODO: possibly caller could/should provide common
// prefix length? ie this work may be redundant if
// caller is in fact intersecting against its own
// automaton
//System.out.println("FE.seekCeil upto=" + upto);
// Save time by starting at the end of the shared prefix
// b/w our current term & the target:
rewindPrefix();
//System.out.println(" after rewind upto=" + upto);
FST.Arc<T> arc = getArc(upto);
int targetLabel = getTargetLabel();
//System.out.println(" init targetLabel=" + targetLabel);
// Now scan forward, matching the new suffix of the target
while(true) {
//System.out.println(" cycle upto=" + upto + " arc.label=" + arc.label + " (" + (char) arc.label + ") vs targetLabel=" + targetLabel);
if (arc.bytesPerArc != 0 && arc.label != -1) {
// Arcs are fixed array -- use binary search to find
// the target.
final FST.BytesReader in = fst.getBytesReader(0);
int low = arc.arcIdx;
int high = arc.numArcs-1;
int mid = 0;
//System.out.println("do arc array low=" + low + " high=" + high + " targetLabel=" + targetLabel);
boolean found = false;
while (low <= high) {
mid = (low + high) >>> 1;
in.pos = arc.posArcsStart;
in.skip(arc.bytesPerArc*mid+1);
final int midLabel = fst.readLabel(in);
final int cmp = midLabel - targetLabel;
//System.out.println(" cycle low=" + low + " high=" + high + " mid=" + mid + " midLabel=" + midLabel + " cmp=" + cmp);
if (cmp < 0)
low = mid + 1;
else if (cmp > 0)
high = mid - 1;
else {
found = true;
break;
}
}
// NOTE: this code is dup'd w/ the code below (in
// the outer else clause):
if (found) {
// Match
arc.arcIdx = mid-1;
fst.readNextRealArc(arc, in);
assert arc.arcIdx == mid;
assert arc.label == targetLabel: "arc.label=" + arc.label + " vs targetLabel=" + targetLabel + " mid=" + mid;
output[upto] = fst.outputs.add(output[upto-1], arc.output);
if (targetLabel == FST.END_LABEL) {
return;
}
setCurrentLabel(arc.label);
incr();
arc = fst.readFirstTargetArc(arc, getArc(upto));
targetLabel = getTargetLabel();
continue;
} else if (low == arc.numArcs) {
// Dead end
arc.arcIdx = arc.numArcs-2;
fst.readNextRealArc(arc, in);
assert arc.isLast();
// Dead end (target is after the last arc);
// rollback to last fork then push
upto--;
while(true) {
if (upto == 0) {
return;
}
final FST.Arc<T> prevArc = getArc(upto);
//System.out.println(" rollback upto=" + upto + " arc.label=" + prevArc.label + " isLast?=" + prevArc.isLast());
if (!prevArc.isLast()) {
fst.readNextArc(prevArc);
pushFirst();
return;
}
upto--;
}
} else {
arc.arcIdx = (low > high ? low : high)-1;
fst.readNextRealArc(arc, in);
assert arc.label > targetLabel;
pushFirst();
return;
}
} else {
// Arcs are not array'd -- must do linear scan:
if (arc.label == targetLabel) {
// recurse
output[upto] = fst.outputs.add(output[upto-1], arc.output);
if (targetLabel == FST.END_LABEL) {
return;
}
setCurrentLabel(arc.label);
incr();
arc = fst.readFirstTargetArc(arc, getArc(upto));
targetLabel = getTargetLabel();
} else if (arc.label > targetLabel) {
pushFirst();
return;
} else if (arc.isLast()) {
// Dead end (target is after the last arc);
// rollback to last fork then push
upto--;
while(true) {
if (upto == 0) {
return;
}
final FST.Arc<T> prevArc = getArc(upto);
//System.out.println(" rollback upto=" + upto + " arc.label=" + prevArc.label + " isLast?=" + prevArc.isLast());
if (!prevArc.isLast()) {
fst.readNextArc(prevArc);
pushFirst();
return;
}
upto--;
}
} else {
// keep scanning
//System.out.println(" next scan");
fst.readNextArc(arc);
}
}
}
}
// in lucene/core/src/java/org/apache/lucene/util/fst/FSTEnum.java
protected void doSeekFloor() throws IOException {
// TODO: possibly caller could/should provide common
// prefix length? ie this work may be redundant if
// caller is in fact intersecting against its own
// automaton
//System.out.println("FE: seek floor upto=" + upto);
// Save CPU by starting at the end of the shared prefix
// b/w our current term & the target:
rewindPrefix();
//System.out.println("FE: after rewind upto=" + upto);
FST.Arc<T> arc = getArc(upto);
int targetLabel = getTargetLabel();
//System.out.println("FE: init targetLabel=" + targetLabel);
// Now scan forward, matching the new suffix of the target
while(true) {
//System.out.println(" cycle upto=" + upto + " arc.label=" + arc.label + " (" + (char) arc.label + ") targetLabel=" + targetLabel + " isLast?=" + arc.isLast() + " bba=" + arc.bytesPerArc);
if (arc.bytesPerArc != 0 && arc.label != FST.END_LABEL) {
// Arcs are fixed array -- use binary search to find
// the target.
final FST.BytesReader in = fst.getBytesReader(0);
int low = arc.arcIdx;
int high = arc.numArcs-1;
int mid = 0;
//System.out.println("do arc array low=" + low + " high=" + high + " targetLabel=" + targetLabel);
boolean found = false;
while (low <= high) {
mid = (low + high) >>> 1;
in.pos = arc.posArcsStart;
in.skip(arc.bytesPerArc*mid+1);
final int midLabel = fst.readLabel(in);
final int cmp = midLabel - targetLabel;
//System.out.println(" cycle low=" + low + " high=" + high + " mid=" + mid + " midLabel=" + midLabel + " cmp=" + cmp);
if (cmp < 0) {
low = mid + 1;
} else if (cmp > 0) {
high = mid - 1;
} else {
found = true;
break;
}
}
// NOTE: this code is dup'd w/ the code below (in
// the outer else clause):
if (found) {
// Match -- recurse
//System.out.println(" match! arcIdx=" + mid);
arc.arcIdx = mid-1;
fst.readNextRealArc(arc, in);
assert arc.arcIdx == mid;
assert arc.label == targetLabel: "arc.label=" + arc.label + " vs targetLabel=" + targetLabel + " mid=" + mid;
output[upto] = fst.outputs.add(output[upto-1], arc.output);
if (targetLabel == FST.END_LABEL) {
return;
}
setCurrentLabel(arc.label);
incr();
arc = fst.readFirstTargetArc(arc, getArc(upto));
targetLabel = getTargetLabel();
continue;
} else if (high == -1) {
//System.out.println(" before first");
// Very first arc is after our target
// TODO: if each arc could somehow read the arc just
// before, we can save this re-scan. The ceil case
// doesn't need this because it reads the next arc
// instead:
while(true) {
// First, walk backwards until we find a first arc
// that's before our target label:
fst.readFirstTargetArc(getArc(upto-1), arc);
if (arc.label < targetLabel) {
// Then, scan forwards to the arc just before
// the targetLabel:
while(!arc.isLast() && fst.readNextArcLabel(arc) < targetLabel) {
fst.readNextArc(arc);
}
pushLast();
return;
}
upto--;
if (upto == 0) {
return;
}
targetLabel = getTargetLabel();
arc = getArc(upto);
}
} else {
// There is a floor arc:
arc.arcIdx = (low > high ? high : low)-1;
//System.out.println(" hasFloor arcIdx=" + (arc.arcIdx+1));
fst.readNextRealArc(arc, in);
assert arc.isLast() || fst.readNextArcLabel(arc) > targetLabel;
assert arc.label < targetLabel: "arc.label=" + arc.label + " vs targetLabel=" + targetLabel;
pushLast();
return;
}
} else {
if (arc.label == targetLabel) {
// Match -- recurse
output[upto] = fst.outputs.add(output[upto-1], arc.output);
if (targetLabel == FST.END_LABEL) {
return;
}
setCurrentLabel(arc.label);
incr();
arc = fst.readFirstTargetArc(arc, getArc(upto));
targetLabel = getTargetLabel();
} else if (arc.label > targetLabel) {
// TODO: if each arc could somehow read the arc just
// before, we can save this re-scan. The ceil case
// doesn't need this because it reads the next arc
// instead:
while(true) {
// First, walk backwards until we find a first arc
// that's before our target label:
fst.readFirstTargetArc(getArc(upto-1), arc);
if (arc.label < targetLabel) {
// Then, scan forwards to the arc just before
// the targetLabel:
while(!arc.isLast() && fst.readNextArcLabel(arc) < targetLabel) {
fst.readNextArc(arc);
}
pushLast();
return;
}
upto--;
if (upto == 0) {
return;
}
targetLabel = getTargetLabel();
arc = getArc(upto);
}
} else if (!arc.isLast()) {
//System.out.println(" check next label=" + fst.readNextArcLabel(arc) + " (" + (char) fst.readNextArcLabel(arc) + ")");
if (fst.readNextArcLabel(arc) > targetLabel) {
pushLast();
return;
} else {
// keep scanning
fst.readNextArc(arc);
}
} else {
pushLast();
return;
}
}
}
}
// in lucene/core/src/java/org/apache/lucene/util/fst/FSTEnum.java
protected boolean doSeekExact() throws IOException {
// TODO: possibly caller could/should provide common
// prefix length? ie this work may be redundant if
// caller is in fact intersecting against its own
// automaton
//System.out.println("FE: seek exact upto=" + upto);
// Save time by starting at the end of the shared prefix
// b/w our current term & the target:
rewindPrefix();
//System.out.println("FE: after rewind upto=" + upto);
FST.Arc<T> arc = getArc(upto-1);
int targetLabel = getTargetLabel();
final FST.BytesReader fstReader = fst.getBytesReader(0);
while(true) {
//System.out.println(" cycle target=" + (targetLabel == -1 ? "-1" : (char) targetLabel));
final FST.Arc<T> nextArc = fst.findTargetArc(targetLabel, arc, getArc(upto), fstReader);
if (nextArc == null) {
// short circuit
//upto--;
//upto = 0;
fst.readFirstTargetArc(arc, getArc(upto));
//System.out.println(" no match upto=" + upto);
return false;
}
// Match -- recurse:
output[upto] = fst.outputs.add(output[upto-1], nextArc.output);
if (targetLabel == FST.END_LABEL) {
//System.out.println(" return found; upto=" + upto + " output=" + output[upto] + " nextArc=" + nextArc.isLast());
return true;
}
setCurrentLabel(targetLabel);
incr();
targetLabel = getTargetLabel();
arc = nextArc;
}
}
// in lucene/core/src/java/org/apache/lucene/util/fst/FSTEnum.java
private void pushFirst() throws IOException {
FST.Arc<T> arc = arcs[upto];
assert arc != null;
while (true) {
output[upto] = fst.outputs.add(output[upto-1], arc.output);
if (arc.label == FST.END_LABEL) {
// Final node
break;
}
//System.out.println(" pushFirst label=" + (char) arc.label + " upto=" + upto + " output=" + fst.outputs.outputToString(output[upto]));
setCurrentLabel(arc.label);
incr();
final FST.Arc<T> nextArc = getArc(upto);
fst.readFirstTargetArc(arc, nextArc);
arc = nextArc;
}
}
// in lucene/core/src/java/org/apache/lucene/util/fst/FSTEnum.java
private void pushLast() throws IOException {
FST.Arc<T> arc = arcs[upto];
assert arc != null;
while (true) {
setCurrentLabel(arc.label);
output[upto] = fst.outputs.add(output[upto-1], arc.output);
if (arc.label == FST.END_LABEL) {
// Final node
break;
}
incr();
arc = fst.readLastTargetArc(arc, getArc(upto));
}
}
// in lucene/core/src/java/org/apache/lucene/util/fst/PairOutputs.java
Override
public void write(Pair<A,B> output, DataOutput writer) throws IOException {
assert valid(output);
outputs1.write(output.output1, writer);
outputs2.write(output.output2, writer);
}
// in lucene/core/src/java/org/apache/lucene/util/fst/PairOutputs.java
Override
public Pair<A,B> read(DataInput in) throws IOException {
A output1 = outputs1.read(in);
B output2 = outputs2.read(in);
return newPair(output1, output2);
}
// in lucene/core/src/java/org/apache/lucene/util/fst/PositiveIntOutputs.java
Override
public void write(Long output, DataOutput out) throws IOException {
assert valid(output);
out.writeVLong(output);
}
// in lucene/core/src/java/org/apache/lucene/util/fst/PositiveIntOutputs.java
Override
public Long read(DataInput in) throws IOException {
long v = in.readVLong();
if (v == 0) {
return NO_OUTPUT;
} else {
return v;
}
}
// in lucene/core/src/java/org/apache/lucene/util/fst/CharSequenceOutputs.java
Override
public void write(CharsRef prefix, DataOutput out) throws IOException {
assert prefix != null;
out.writeVInt(prefix.length);
// TODO: maybe UTF8?
for(int idx=0;idx<prefix.length;idx++) {
out.writeVInt(prefix.chars[prefix.offset+idx]);
}
}
// in lucene/core/src/java/org/apache/lucene/util/fst/CharSequenceOutputs.java
Override
public CharsRef read(DataInput in) throws IOException {
final int len = in.readVInt();
if (len == 0) {
return NO_OUTPUT;
} else {
final CharsRef output = new CharsRef(len);
for(int idx=0;idx<len;idx++) {
output.chars[idx] = (char) in.readVInt();
}
output.length = len;
return output;
}
}
// in lucene/core/src/java/org/apache/lucene/util/fst/BytesRefFSTEnum.java
public InputOutput<T> next() throws IOException {
//System.out.println(" enum.next");
doNext();
return setResult();
}
// in lucene/core/src/java/org/apache/lucene/util/fst/BytesRefFSTEnum.java
public InputOutput<T> seekCeil(BytesRef target) throws IOException {
this.target = target;
targetLength = target.length;
super.doSeekCeil();
return setResult();
}
// in lucene/core/src/java/org/apache/lucene/util/fst/BytesRefFSTEnum.java
public InputOutput<T> seekFloor(BytesRef target) throws IOException {
this.target = target;
targetLength = target.length;
super.doSeekFloor();
return setResult();
}
// in lucene/core/src/java/org/apache/lucene/util/fst/BytesRefFSTEnum.java
public InputOutput<T> seekExact(BytesRef target) throws IOException {
this.target = target;
targetLength = target.length;
if (super.doSeekExact()) {
assert upto == 1+target.length;
return setResult();
} else {
return null;
}
}
// in lucene/core/src/java/org/apache/lucene/util/OpenBitSetDISI.java
public void inPlaceOr(DocIdSetIterator disi) throws IOException {
int doc;
long size = size();
while ((doc = disi.nextDoc()) < size) {
fastSet(doc);
}
}
// in lucene/core/src/java/org/apache/lucene/util/OpenBitSetDISI.java
public void inPlaceAnd(DocIdSetIterator disi) throws IOException {
int bitSetDoc = nextSetBit(0);
int disiDoc;
while (bitSetDoc != -1 && (disiDoc = disi.advance(bitSetDoc)) != DocIdSetIterator.NO_MORE_DOCS) {
clear(bitSetDoc, disiDoc);
bitSetDoc = nextSetBit(disiDoc + 1);
}
if (bitSetDoc != -1) {
clear(bitSetDoc, size());
}
}
// in lucene/core/src/java/org/apache/lucene/util/OpenBitSetDISI.java
public void inPlaceNot(DocIdSetIterator disi) throws IOException {
int doc;
long size = size();
while ((doc = disi.nextDoc()) < size) {
fastClear(doc);
}
}
// in lucene/core/src/java/org/apache/lucene/util/OpenBitSetDISI.java
public void inPlaceXor(DocIdSetIterator disi) throws IOException {
int doc;
long size = size();
while ((doc = disi.nextDoc()) < size) {
fastFlip(doc);
}
}
// in lucene/core/src/java/org/apache/lucene/util/PagedBytes.java
public void copy(IndexInput in, long byteCount) throws IOException {
while (byteCount > 0) {
int left = blockSize - upto;
if (left == 0) {
if (currentBlock != null) {
blocks.add(currentBlock);
blockEnd.add(upto);
}
currentBlock = new byte[blockSize];
upto = 0;
left = blockSize;
}
if (left < byteCount) {
in.readBytes(currentBlock, upto, left, false);
upto = blockSize;
byteCount -= left;
} else {
in.readBytes(currentBlock, upto, (int) byteCount, false);
upto += byteCount;
break;
}
}
}
// in lucene/core/src/java/org/apache/lucene/util/PagedBytes.java
public void copy(BytesRef bytes) throws IOException {
int byteCount = bytes.length;
int bytesUpto = bytes.offset;
while (byteCount > 0) {
int left = blockSize - upto;
if (left == 0) {
if (currentBlock != null) {
blocks.add(currentBlock);
blockEnd.add(upto);
}
currentBlock = new byte[blockSize];
upto = 0;
left = blockSize;
}
if (left < byteCount) {
System.arraycopy(bytes.bytes, bytesUpto, currentBlock, upto, left);
upto = blockSize;
byteCount -= left;
bytesUpto += left;
} else {
System.arraycopy(bytes.bytes, bytesUpto, currentBlock, upto, byteCount);
upto += byteCount;
break;
}
}
}
// in lucene/core/src/java/org/apache/lucene/util/PagedBytes.java
public void copy(BytesRef bytes, BytesRef out) throws IOException {
int left = blockSize - upto;
if (bytes.length > left || currentBlock==null) {
if (currentBlock != null) {
blocks.add(currentBlock);
blockEnd.add(upto);
didSkipBytes = true;
}
currentBlock = new byte[blockSize];
upto = 0;
left = blockSize;
assert bytes.length <= blockSize;
// TODO: we could also support variable block sizes
}
out.bytes = currentBlock;
out.offset = upto;
out.length = bytes.length;
System.arraycopy(bytes.bytes, bytes.offset, currentBlock, upto, bytes.length);
upto += bytes.length;
}
// in lucene/core/src/java/org/apache/lucene/util/PagedBytes.java
public long copyUsingLengthPrefix(BytesRef bytes) throws IOException {
if (bytes.length >= 32768) {
throw new IllegalArgumentException("max length is 32767 (got " + bytes.length + ")");
}
if (upto + bytes.length + 2 > blockSize) {
if (bytes.length + 2 > blockSize) {
throw new IllegalArgumentException("block size " + blockSize + " is too small to store length " + bytes.length + " bytes");
}
if (currentBlock != null) {
blocks.add(currentBlock);
blockEnd.add(upto);
}
currentBlock = new byte[blockSize];
upto = 0;
}
final long pointer = getPointer();
if (bytes.length < 128) {
currentBlock[upto++] = (byte) bytes.length;
} else {
currentBlock[upto++] = (byte) (0x80 | (bytes.length >> 8));
currentBlock[upto++] = (byte) (bytes.length & 0xff);
}
System.arraycopy(bytes.bytes, bytes.offset, currentBlock, upto, bytes.length);
upto += bytes.length;
return pointer;
}
// in lucene/core/src/java/org/apache/lucene/util/PagedBytes.java
Override
public void writeBytes(byte[] b, int offset, int length) throws IOException {
assert b.length >= offset + length;
if (length == 0) {
return;
}
if (upto == blockSize) {
if (currentBlock != null) {
blocks.add(currentBlock);
blockEnd.add(upto);
}
currentBlock = new byte[blockSize];
upto = 0;
}
final int offsetEnd = offset + length;
while(true) {
final int left = offsetEnd - offset;
final int blockLeft = blockSize - upto;
if (blockLeft < left) {
System.arraycopy(b, offset, currentBlock, upto, blockLeft);
blocks.add(currentBlock);
blockEnd.add(blockSize);
currentBlock = new byte[blockSize];
upto = 0;
offset += blockLeft;
} else {
// Last block
System.arraycopy(b, offset, currentBlock, upto, left);
upto += left;
break;
}
}
}
// in lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java
public TermsEnum getTermsEnum(Terms terms) throws IOException {
switch(type) {
case NONE:
return TermsEnum.EMPTY;
case ALL:
return terms.iterator(null);
case SINGLE:
return new SingleTermsEnum(terms.iterator(null), term);
case PREFIX:
// TODO: this is very likely faster than .intersect,
// but we should test and maybe cutover
return new PrefixTermsEnum(terms.iterator(null), term);
case NORMAL:
return terms.intersect(this, null);
default:
// unreachable
throw new RuntimeException("unhandled case");
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/PerDocConsumer.java
public void merge(MergeState mergeState) throws IOException {
final DocValues[] docValues = new DocValues[mergeState.readers.size()];
for (FieldInfo fieldInfo : mergeState.fieldInfos) {
mergeState.fieldInfo = fieldInfo; // set the field we are merging
if (canMerge(fieldInfo)) {
for (int i = 0; i < docValues.length; i++) {
docValues[i] = getDocValuesForMerge(mergeState.readers.get(i).reader, fieldInfo);
}
Type docValuesType = getDocValuesType(fieldInfo);
assert docValuesType != null;
final DocValuesConsumer docValuesConsumer = addValuesField(docValuesType, fieldInfo);
assert docValuesConsumer != null;
docValuesConsumer.merge(mergeState, docValues);
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/PerDocConsumer.java
protected DocValues getDocValuesForMerge(AtomicReader reader, FieldInfo info) throws IOException {
return reader.docValues(info.name);
}
// in lucene/core/src/java/org/apache/lucene/codecs/intblock/VariableIntBlockIndexOutput.java
Override
public Index index() throws IOException {
return new Index();
}
// in lucene/core/src/java/org/apache/lucene/codecs/intblock/VariableIntBlockIndexOutput.java
Override
public void mark() throws IOException {
fp = out.getFilePointer();
upto = VariableIntBlockIndexOutput.this.upto;
}
// in lucene/core/src/java/org/apache/lucene/codecs/intblock/VariableIntBlockIndexOutput.java
Override
public void copyFrom(IntIndexOutput.Index other, boolean copyLast) throws IOException {
Index idx = (Index) other;
fp = idx.fp;
upto = idx.upto;
if (copyLast) {
lastFP = fp;
lastUpto = upto;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/intblock/VariableIntBlockIndexOutput.java
Override
public void write(IndexOutput indexOut, boolean absolute) throws IOException {
assert upto >= 0;
if (absolute) {
indexOut.writeVInt(upto);
indexOut.writeVLong(fp);
} else if (fp == lastFP) {
// same block
assert upto >= lastUpto;
int uptoDelta = upto - lastUpto;
indexOut.writeVInt(uptoDelta << 1 | 1);
} else {
// new block
indexOut.writeVInt(upto << 1);
indexOut.writeVLong(fp - lastFP);
}
lastUpto = upto;
lastFP = fp;
}
// in lucene/core/src/java/org/apache/lucene/codecs/intblock/VariableIntBlockIndexOutput.java
Override
public void write(int v) throws IOException {
hitExcDuringWrite = true;
upto -= add(v)-1;
hitExcDuringWrite = false;
assert upto >= 0;
}
// in lucene/core/src/java/org/apache/lucene/codecs/intblock/VariableIntBlockIndexOutput.java
Override
public void close() throws IOException {
try {
if (!hitExcDuringWrite) {
// stuff 0s in until the "real" data is flushed:
int stuffed = 0;
while(upto > stuffed) {
upto -= add(0)-1;
assert upto >= 0;
stuffed += 1;
}
}
} finally {
out.close();
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/intblock/FixedIntBlockIndexInput.java
Override
public Reader reader() throws IOException {
final int[] buffer = new int[blockSize];
final IndexInput clone = (IndexInput) in.clone();
// TODO: can this be simplified?
return new Reader(clone, buffer, this.getBlockReader(clone, buffer));
}
// in lucene/core/src/java/org/apache/lucene/codecs/intblock/FixedIntBlockIndexInput.java
Override
public void close() throws IOException {
in.close();
}
// in lucene/core/src/java/org/apache/lucene/codecs/intblock/FixedIntBlockIndexInput.java
private void maybeSeek() throws IOException {
if (seekPending) {
if (pendingFP != lastBlockFP) {
// need new block
in.seek(pendingFP);
lastBlockFP = pendingFP;
blockReader.readBlock();
}
upto = pendingUpto;
seekPending = false;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/intblock/FixedIntBlockIndexInput.java
Override
public int next() throws IOException {
this.maybeSeek();
if (upto == blockSize) {
lastBlockFP = in.getFilePointer();
blockReader.readBlock();
upto = 0;
}
return pending[upto++];
}
// in lucene/core/src/java/org/apache/lucene/codecs/intblock/FixedIntBlockIndexInput.java
Override
public IntsRef read(final int count) throws IOException {
this.maybeSeek();
if (upto == blockSize) {
blockReader.readBlock();
upto = 0;
}
bulkResult.offset = upto;
if (upto + count < blockSize) {
bulkResult.length = count;
upto += count;
} else {
bulkResult.length = blockSize - upto;
upto = blockSize;
}
return bulkResult;
}
// in lucene/core/src/java/org/apache/lucene/codecs/intblock/FixedIntBlockIndexInput.java
Override
public void read(final DataInput indexIn, final boolean absolute) throws IOException {
if (absolute) {
upto = indexIn.readVInt();
fp = indexIn.readVLong();
} else {
final int uptoDelta = indexIn.readVInt();
if ((uptoDelta & 1) == 1) {
// same block
upto += uptoDelta >>> 1;
} else {
// new block
upto = uptoDelta >>> 1;
fp += indexIn.readVLong();
}
}
assert upto < blockSize;
}
// in lucene/core/src/java/org/apache/lucene/codecs/intblock/FixedIntBlockIndexInput.java
Override
public void seek(final IntIndexInput.Reader other) throws IOException {
((Reader) other).seek(fp, upto);
}
// in lucene/core/src/java/org/apache/lucene/codecs/intblock/VariableIntBlockIndexInput.java
Override
public Reader reader() throws IOException {
final int[] buffer = new int[maxBlockSize];
final IndexInput clone = (IndexInput) in.clone();
// TODO: can this be simplified?
return new Reader(clone, buffer, this.getBlockReader(clone, buffer));
}
// in lucene/core/src/java/org/apache/lucene/codecs/intblock/VariableIntBlockIndexInput.java
Override
public void close() throws IOException {
in.close();
}
// in lucene/core/src/java/org/apache/lucene/codecs/intblock/VariableIntBlockIndexInput.java
void seek(final long fp, final int upto) throws IOException {
// TODO: should we do this in real-time, not lazy?
pendingFP = fp;
pendingUpto = upto;
assert pendingUpto >= 0: "pendingUpto=" + pendingUpto;
seekPending = true;
}
// in lucene/core/src/java/org/apache/lucene/codecs/intblock/VariableIntBlockIndexInput.java
private final void maybeSeek() throws IOException {
if (seekPending) {
if (pendingFP != lastBlockFP) {
// need new block
in.seek(pendingFP);
blockReader.seek(pendingFP);
lastBlockFP = pendingFP;
blockSize = blockReader.readBlock();
}
upto = pendingUpto;
// TODO: if we were more clever when writing the
// index, such that a seek point wouldn't be written
// until the int encoder "committed", we could avoid
// this (likely minor) inefficiency:
// This is necessary for int encoders that are
// non-causal, ie must see future int values to
// encode the current ones.
while(upto >= blockSize) {
upto -= blockSize;
lastBlockFP = in.getFilePointer();
blockSize = blockReader.readBlock();
}
seekPending = false;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/intblock/VariableIntBlockIndexInput.java
Override
public int next() throws IOException {
this.maybeSeek();
if (upto == blockSize) {
lastBlockFP = in.getFilePointer();
blockSize = blockReader.readBlock();
upto = 0;
}
return pending[upto++];
}
// in lucene/core/src/java/org/apache/lucene/codecs/intblock/VariableIntBlockIndexInput.java
Override
public IntsRef read(final int count) throws IOException {
this.maybeSeek();
if (upto == blockSize) {
lastBlockFP = in.getFilePointer();
blockSize = blockReader.readBlock();
upto = 0;
}
bulkResult.offset = upto;
if (upto + count < blockSize) {
bulkResult.length = count;
upto += count;
} else {
bulkResult.length = blockSize - upto;
upto = blockSize;
}
return bulkResult;
}
// in lucene/core/src/java/org/apache/lucene/codecs/intblock/VariableIntBlockIndexInput.java
Override
public void read(final DataInput indexIn, final boolean absolute) throws IOException {
if (absolute) {
upto = indexIn.readVInt();
fp = indexIn.readVLong();
} else {
final int uptoDelta = indexIn.readVInt();
if ((uptoDelta & 1) == 1) {
// same block
upto += uptoDelta >>> 1;
} else {
// new block
upto = uptoDelta >>> 1;
fp += indexIn.readVLong();
}
}
// TODO: we can't do this assert because non-causal
// int encoders can have upto over the buffer size
//assert upto < maxBlockSize: "upto=" + upto + " max=" + maxBlockSize;
}
// in lucene/core/src/java/org/apache/lucene/codecs/intblock/VariableIntBlockIndexInput.java
Override
public void seek(final IntIndexInput.Reader other) throws IOException {
((Reader) other).seek(fp, upto);
}
// in lucene/core/src/java/org/apache/lucene/codecs/intblock/FixedIntBlockIndexOutput.java
Override
public Index index() throws IOException {
return new Index();
}
// in lucene/core/src/java/org/apache/lucene/codecs/intblock/FixedIntBlockIndexOutput.java
Override
public void mark() throws IOException {
fp = out.getFilePointer();
upto = FixedIntBlockIndexOutput.this.upto;
}
// in lucene/core/src/java/org/apache/lucene/codecs/intblock/FixedIntBlockIndexOutput.java
Override
public void copyFrom(IntIndexOutput.Index other, boolean copyLast) throws IOException {
Index idx = (Index) other;
fp = idx.fp;
upto = idx.upto;
if (copyLast) {
lastFP = fp;
lastUpto = upto;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/intblock/FixedIntBlockIndexOutput.java
Override
public void write(IndexOutput indexOut, boolean absolute) throws IOException {
if (absolute) {
indexOut.writeVInt(upto);
indexOut.writeVLong(fp);
} else if (fp == lastFP) {
// same block
assert upto >= lastUpto;
int uptoDelta = upto - lastUpto;
indexOut.writeVInt(uptoDelta << 1 | 1);
} else {
// new block
indexOut.writeVInt(upto << 1);
indexOut.writeVLong(fp - lastFP);
}
lastUpto = upto;
lastFP = fp;
}
// in lucene/core/src/java/org/apache/lucene/codecs/intblock/FixedIntBlockIndexOutput.java
Override
public void write(int v) throws IOException {
buffer[upto++] = v;
if (upto == blockSize) {
flushBlock();
upto = 0;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/intblock/FixedIntBlockIndexOutput.java
Override
public void close() throws IOException {
try {
if (upto > 0) {
// NOTE: entries in the block after current upto are
// invalid
flushBlock();
}
} finally {
out.close();
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java
public int skipTo(int target) throws IOException {
if (!haveSkipped) {
// first time, load skip levels
loadSkipLevels();
haveSkipped = true;
}
// walk up the levels until highest level is found that has a skip
// for this target
int level = 0;
while (level < numberOfSkipLevels - 1 && target > skipDoc[level + 1]) {
level++;
}
while (level >= 0) {
if (target > skipDoc[level]) {
if (!loadNextSkip(level)) {
continue;
}
} else {
// no more skips on this level, go down one level
if (level > 0 && lastChildPointer > skipStream[level - 1].getFilePointer()) {
seekChild(level - 1);
}
level--;
}
}
return numSkipped[0] - skipInterval[0] - 1;
}
// in lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java
private boolean loadNextSkip(int level) throws IOException {
// we have to skip, the target document is greater than the current
// skip list entry
setLastSkipData(level);
numSkipped[level] += skipInterval[level];
if (numSkipped[level] > docCount) {
// this skip list is exhausted
skipDoc[level] = Integer.MAX_VALUE;
if (numberOfSkipLevels > level) numberOfSkipLevels = level;
return false;
}
// read next skip entry
skipDoc[level] += readSkipData(level, skipStream[level]);
if (level != 0) {
// read the child pointer if we are not on the leaf level
childPointer[level] = skipStream[level].readVLong() + skipPointer[level - 1];
}
return true;
}
// in lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java
protected void seekChild(int level) throws IOException {
skipStream[level].seek(lastChildPointer);
numSkipped[level] = numSkipped[level + 1] - skipInterval[level + 1];
skipDoc[level] = lastDoc;
if (level > 0) {
childPointer[level] = skipStream[level].readVLong() + skipPointer[level - 1];
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java
public void close() throws IOException {
for (int i = 1; i < skipStream.length; i++) {
if (skipStream[i] != null) {
skipStream[i].close();
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java
private void loadSkipLevels() throws IOException {
numberOfSkipLevels = MathUtil.log(docCount, skipInterval[0]);
if (numberOfSkipLevels > maxNumberOfSkipLevels) {
numberOfSkipLevels = maxNumberOfSkipLevels;
}
skipStream[0].seek(skipPointer[0]);
int toBuffer = numberOfLevelsToBuffer;
for (int i = numberOfSkipLevels - 1; i > 0; i--) {
// the length of the current level
long length = skipStream[0].readVLong();
// the start pointer of the current level
skipPointer[i] = skipStream[0].getFilePointer();
if (toBuffer > 0) {
// buffer this level
skipStream[i] = new SkipBuffer(skipStream[0], (int) length);
toBuffer--;
} else {
// clone this stream, it is already at the start of the current level
skipStream[i] = (IndexInput) skipStream[0].clone();
if (inputIsBuffered && length < BufferedIndexInput.BUFFER_SIZE) {
((BufferedIndexInput) skipStream[i]).setBufferSize((int) length);
}
// move base stream beyond the current level
skipStream[0].seek(skipStream[0].getFilePointer() + length);
}
}
// use base stream for the lowest level
skipPointer[0] = skipStream[0].getFilePointer();
}
// in lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java
Override
public void close() throws IOException {
data = null;
}
// in lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java
Override
public byte readByte() throws IOException {
return data[pos++];
}
// in lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java
Override
public void readBytes(byte[] b, int offset, int len) throws IOException {
System.arraycopy(data, pos, b, offset, len);
pos += len;
}
// in lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java
Override
public void seek(long pos) throws IOException {
this.pos = (int) (pos - pointer);
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTermsWriter.java
protected void writeHeader(IndexOutput out) throws IOException {
CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
out.writeLong(0); // leave space for end index pointer
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTermsWriter.java
Override
public TermsConsumer addField(FieldInfo field) throws IOException {
//System.out.println("\nBTW.addField seg=" + segment + " field=" + field.name);
assert currentField == null || currentField.name.compareTo(field.name) < 0;
currentField = field;
TermsIndexWriterBase.FieldWriter fieldIndexWriter = termsIndexWriter.addField(field, out.getFilePointer());
final TermsWriter terms = new TermsWriter(fieldIndexWriter, field, postingsWriter);
fields.add(terms);
return terms;
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTermsWriter.java
Override
public void close() throws IOException {
try {
int nonZeroCount = 0;
for(TermsWriter field : fields) {
if (field.numTerms > 0) {
nonZeroCount++;
}
}
final long dirStart = out.getFilePointer();
out.writeVInt(nonZeroCount);
for(TermsWriter field : fields) {
if (field.numTerms > 0) {
out.writeVInt(field.fieldInfo.number);
out.writeVLong(field.numTerms);
out.writeVLong(field.termsStartPointer);
if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
out.writeVLong(field.sumTotalTermFreq);
}
out.writeVLong(field.sumDocFreq);
out.writeVInt(field.docCount);
}
}
writeTrailer(dirStart);
} finally {
IOUtils.close(out, postingsWriter, termsIndexWriter);
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTermsWriter.java
protected void writeTrailer(long dirStart) throws IOException {
out.seek(CodecUtil.headerLength(CODEC_NAME));
out.writeLong(dirStart);
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTermsWriter.java
Override
public PostingsConsumer startTerm(BytesRef text) throws IOException {
//System.out.println("BTW: startTerm term=" + fieldInfo.name + ":" + text.utf8ToString() + " " + text + " seg=" + segment);
postingsWriter.startTerm();
return postingsWriter;
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTermsWriter.java
Override
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
assert stats.docFreq > 0;
//System.out.println("BTW: finishTerm term=" + fieldInfo.name + ":" + text.utf8ToString() + " " + text + " seg=" + segment + " df=" + stats.docFreq);
final boolean isIndexTerm = fieldIndexWriter.checkIndexTerm(text, stats);
if (isIndexTerm) {
if (pendingCount > 0) {
// Instead of writing each term, live, we gather terms
// in RAM in a pending buffer, and then write the
// entire block in between index terms:
flushBlock();
}
fieldIndexWriter.add(text, stats, out.getFilePointer());
//System.out.println(" index term!");
}
if (pendingTerms.length == pendingCount) {
final TermEntry[] newArray = new TermEntry[ArrayUtil.oversize(pendingCount+1, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
System.arraycopy(pendingTerms, 0, newArray, 0, pendingCount);
for(int i=pendingCount;i<newArray.length;i++) {
newArray[i] = new TermEntry();
}
pendingTerms = newArray;
}
final TermEntry te = pendingTerms[pendingCount];
te.term.copyBytes(text);
te.stats = stats;
pendingCount++;
postingsWriter.finishTerm(stats);
numTerms++;
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTermsWriter.java
Override
public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException {
if (pendingCount > 0) {
flushBlock();
}
// EOF marker:
out.writeVInt(0);
this.sumTotalTermFreq = sumTotalTermFreq;
this.sumDocFreq = sumDocFreq;
this.docCount = docCount;
fieldIndexWriter.finish(out.getFilePointer());
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTermsWriter.java
private void flushBlock() throws IOException {
//System.out.println("BTW.flushBlock seg=" + segment + " pendingCount=" + pendingCount + " fp=" + out.getFilePointer());
// First pass: compute common prefix for all terms
// in the block, against term before first term in
// this block:
int commonPrefix = sharedPrefix(lastPrevTerm, pendingTerms[0].term);
for(int termCount=1;termCount<pendingCount;termCount++) {
commonPrefix = Math.min(commonPrefix,
sharedPrefix(lastPrevTerm,
pendingTerms[termCount].term));
}
out.writeVInt(pendingCount);
out.writeVInt(commonPrefix);
// 2nd pass: write suffixes, as separate byte[] blob
for(int termCount=0;termCount<pendingCount;termCount++) {
final int suffix = pendingTerms[termCount].term.length - commonPrefix;
// TODO: cutover to better intblock codec, instead
// of interleaving here:
bytesWriter.writeVInt(suffix);
bytesWriter.writeBytes(pendingTerms[termCount].term.bytes, commonPrefix, suffix);
}
out.writeVInt((int) bytesWriter.getFilePointer());
bytesWriter.writeTo(out);
bytesWriter.reset();
// 3rd pass: write the freqs as byte[] blob
// TODO: cutover to better intblock codec. simple64?
// write prefix, suffix first:
for(int termCount=0;termCount<pendingCount;termCount++) {
final TermStats stats = pendingTerms[termCount].stats;
assert stats != null;
bytesWriter.writeVInt(stats.docFreq);
if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
bytesWriter.writeVLong(stats.totalTermFreq-stats.docFreq);
}
}
out.writeVInt((int) bytesWriter.getFilePointer());
bytesWriter.writeTo(out);
bytesWriter.reset();
postingsWriter.flushTermsBlock(pendingCount, pendingCount);
lastPrevTerm.copyBytes(pendingTerms[pendingCount-1].term);
pendingCount = 0;
}
// in lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
public void merge(MergeState mergeState, DocValues[] docValues) throws IOException {
assert mergeState != null;
boolean hasMerged = false;
for(int readerIDX=0;readerIDX<mergeState.readers.size();readerIDX++) {
final org.apache.lucene.index.MergeState.IndexReaderAndLiveDocs reader = mergeState.readers.get(readerIDX);
if (docValues[readerIDX] != null) {
hasMerged = true;
merge(docValues[readerIDX], mergeState.docBase[readerIDX],
reader.reader.maxDoc(), reader.liveDocs);
mergeState.checkAbort.work(reader.reader.maxDoc());
}
}
// only finish if no exception is thrown!
if (hasMerged) {
finish(mergeState.segmentInfo.getDocCount());
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
protected void merge(DocValues reader, int docBase, int docCount, Bits liveDocs) throws IOException {
// This enables bulk copies in subclasses per MergeState, subclasses can
// simply override this and decide if they want to merge
// segments using this generic implementation or if a bulk merge is possible
// / feasible.
final Source source = reader.getDirectSource();
assert source != null;
int docID = docBase;
final Type type = getType();
final Field scratchField;
switch(type) {
case VAR_INTS:
scratchField = new PackedLongDocValuesField("", (long) 0);
break;
case FIXED_INTS_8:
scratchField = new ByteDocValuesField("", (byte) 0);
break;
case FIXED_INTS_16:
scratchField = new ShortDocValuesField("", (short) 0);
break;
case FIXED_INTS_32:
scratchField = new IntDocValuesField("", 0);
break;
case FIXED_INTS_64:
scratchField = new LongDocValuesField("", (long) 0);
break;
case FLOAT_32:
scratchField = new FloatDocValuesField("", 0f);
break;
case FLOAT_64:
scratchField = new DoubleDocValuesField("", 0d);
break;
case BYTES_FIXED_STRAIGHT:
scratchField = new StraightBytesDocValuesField("", new BytesRef(), true);
break;
case BYTES_VAR_STRAIGHT:
scratchField = new StraightBytesDocValuesField("", new BytesRef(), false);
break;
case BYTES_FIXED_DEREF:
scratchField = new DerefBytesDocValuesField("", new BytesRef(), true);
break;
case BYTES_VAR_DEREF:
scratchField = new DerefBytesDocValuesField("", new BytesRef(), false);
break;
case BYTES_FIXED_SORTED:
scratchField = new SortedBytesDocValuesField("", new BytesRef(), true);
break;
case BYTES_VAR_SORTED:
scratchField = new SortedBytesDocValuesField("", new BytesRef(), false);
break;
default:
throw new IllegalStateException("unknown Type: " + type);
}
for (int i = 0; i < docCount; i++) {
if (liveDocs == null || liveDocs.get(i)) {
mergeDoc(scratchField, source, docID++, i);
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
protected void mergeDoc(Field scratchField, Source source, int docID, int sourceDoc)
throws IOException {
switch(getType()) {
case BYTES_FIXED_DEREF:
case BYTES_FIXED_SORTED:
case BYTES_FIXED_STRAIGHT:
case BYTES_VAR_DEREF:
case BYTES_VAR_SORTED:
case BYTES_VAR_STRAIGHT:
scratchField.setBytesValue(source.getBytes(sourceDoc, spare));
break;
case FIXED_INTS_8:
scratchField.setByteValue((byte) source.getInt(sourceDoc));
break;
case FIXED_INTS_16:
scratchField.setShortValue((short) source.getInt(sourceDoc));
break;
case FIXED_INTS_32:
scratchField.setIntValue((int) source.getInt(sourceDoc));
break;
case FIXED_INTS_64:
scratchField.setLongValue(source.getInt(sourceDoc));
break;
case VAR_INTS:
scratchField.setLongValue(source.getInt(sourceDoc));
break;
case FLOAT_32:
scratchField.setFloatValue((float) source.getFloat(sourceDoc));
break;
case FLOAT_64:
scratchField.setDoubleValue(source.getFloat(sourceDoc));
break;
}
add(docID, scratchField);
}
// in lucene/core/src/java/org/apache/lucene/codecs/FixedGapTermsIndexWriter.java
protected void writeHeader(IndexOutput out) throws IOException {
CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
// Placeholder for dir offset
out.writeLong(0);
}
// in lucene/core/src/java/org/apache/lucene/codecs/FixedGapTermsIndexWriter.java
Override
public boolean checkIndexTerm(BytesRef text, TermStats stats) throws IOException {
// First term is first indexed term:
//System.out.println("FGW: checkIndexTerm text=" + text.utf8ToString());
if (0 == (numTerms++ % termIndexInterval)) {
return true;
} else {
if (0 == numTerms % termIndexInterval) {
// save last term just before next index term so we
// can compute wasted suffix
lastTerm.copyBytes(text);
}
return false;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/FixedGapTermsIndexWriter.java
Override
public void add(BytesRef text, TermStats stats, long termsFilePointer) throws IOException {
final int indexedTermLength = indexedTermPrefixLength(lastTerm, text);
//System.out.println("FGW: add text=" + text.utf8ToString() + " " + text + " fp=" + termsFilePointer);
// write only the min prefix that shows the diff
// against prior term
out.writeBytes(text.bytes, text.offset, indexedTermLength);
if (termLengths.length == numIndexTerms) {
termLengths = ArrayUtil.grow(termLengths);
}
if (termsPointerDeltas.length == numIndexTerms) {
termsPointerDeltas = ArrayUtil.grow(termsPointerDeltas);
}
// save delta terms pointer
termsPointerDeltas[numIndexTerms] = (int) (termsFilePointer - lastTermsPointer);
lastTermsPointer = termsFilePointer;
// save term length (in bytes)
assert indexedTermLength <= Short.MAX_VALUE;
termLengths[numIndexTerms] = (short) indexedTermLength;
totTermLength += indexedTermLength;
lastTerm.copyBytes(text);
numIndexTerms++;
}
// in lucene/core/src/java/org/apache/lucene/codecs/FixedGapTermsIndexWriter.java
Override
public void finish(long termsFilePointer) throws IOException {
// write primary terms dict offsets
packedIndexStart = out.getFilePointer();
PackedInts.Writer w = PackedInts.getWriter(out, numIndexTerms, PackedInts.bitsRequired(termsFilePointer), PackedInts.DEFAULT);
// relative to our indexStart
long upto = 0;
for(int i=0;i<numIndexTerms;i++) {
upto += termsPointerDeltas[i];
w.add(upto);
}
w.finish();
packedOffsetsStart = out.getFilePointer();
// write offsets into the byte[] terms
w = PackedInts.getWriter(out, 1+numIndexTerms, PackedInts.bitsRequired(totTermLength), PackedInts.DEFAULT);
upto = 0;
for(int i=0;i<numIndexTerms;i++) {
w.add(upto);
upto += termLengths[i];
}
w.add(upto);
w.finish();
// our referrer holds onto us, while other fields are
// being written, so don't tie up this RAM:
termLengths = null;
termsPointerDeltas = null;
}
// in lucene/core/src/java/org/apache/lucene/codecs/FixedGapTermsIndexWriter.java
public void close() throws IOException {
boolean success = false;
try {
final long dirStart = out.getFilePointer();
final int fieldCount = fields.size();
int nonNullFieldCount = 0;
for(int i=0;i<fieldCount;i++) {
SimpleFieldWriter field = fields.get(i);
if (field.numIndexTerms > 0) {
nonNullFieldCount++;
}
}
out.writeVInt(nonNullFieldCount);
for(int i=0;i<fieldCount;i++) {
SimpleFieldWriter field = fields.get(i);
if (field.numIndexTerms > 0) {
out.writeVInt(field.fieldInfo.number);
out.writeVInt(field.numIndexTerms);
out.writeVLong(field.termsStart);
out.writeVLong(field.indexStart);
out.writeVLong(field.packedIndexStart);
out.writeVLong(field.packedOffsetsStart);
}
}
writeTrailer(dirStart);
success = true;
} finally {
if (success) {
IOUtils.close(out);
} else {
IOUtils.closeWhileHandlingException(out);
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/FixedGapTermsIndexWriter.java
protected void writeTrailer(long dirStart) throws IOException {
out.seek(CodecUtil.headerLength(CODEC_NAME));
out.writeLong(dirStart);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoWriter.java
Override
public void write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) throws IOException {
final String fileName = IndexFileNames.segmentFileName(si.name, "", Lucene40SegmentInfoFormat.SI_EXTENSION);
si.addFile(fileName);
final IndexOutput output = dir.createOutput(fileName, ioContext);
boolean success = false;
try {
CodecUtil.writeHeader(output, Lucene40SegmentInfoFormat.CODEC_NAME, Lucene40SegmentInfoFormat.VERSION_CURRENT);
// Write the Lucene version that created this segment, since 3.1
output.writeString(si.getVersion());
output.writeInt(si.getDocCount());
output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
output.writeStringStringMap(si.getDiagnostics());
output.writeStringStringMap(si.attributes());
output.writeStringSet(si.files());
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(output);
si.dir.deleteFile(fileName);
} else {
output.close();
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosWriter.java
Override
public void write(Directory directory, String segmentName, FieldInfos infos, IOContext context) throws IOException {
final String fileName = IndexFileNames.segmentFileName(segmentName, "", FIELD_INFOS_EXTENSION);
IndexOutput output = directory.createOutput(fileName, context);
try {
CodecUtil.writeHeader(output, CODEC_NAME, FORMAT_CURRENT);
output.writeVInt(infos.size());
for (FieldInfo fi : infos) {
IndexOptions indexOptions = fi.getIndexOptions();
byte bits = 0x0;
if (fi.hasVectors()) bits |= STORE_TERMVECTOR;
if (fi.omitsNorms()) bits |= OMIT_NORMS;
if (fi.hasPayloads()) bits |= STORE_PAYLOADS;
if (fi.isIndexed()) {
bits |= IS_INDEXED;
assert indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.hasPayloads();
if (indexOptions == IndexOptions.DOCS_ONLY) {
bits |= OMIT_TERM_FREQ_AND_POSITIONS;
} else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
bits |= STORE_OFFSETS_IN_POSTINGS;
} else if (indexOptions == IndexOptions.DOCS_AND_FREQS) {
bits |= OMIT_POSITIONS;
}
}
output.writeString(fi.name);
output.writeVInt(fi.number);
output.writeByte(bits);
// pack the DV types in one byte
final byte dv = docValuesByte(fi.getDocValuesType());
final byte nrm = docValuesByte(fi.getNormType());
assert (dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0;
byte val = (byte) (0xff & ((nrm << 4) | dv));
output.writeByte(val);
output.writeStringStringMap(fi.attributes());
}
} finally {
output.close();
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java
public void startDocument(int numStoredFields) throws IOException {
indexStream.writeLong(fieldsStream.getFilePointer());
fieldsStream.writeVInt(numStoredFields);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java
public void close() throws IOException {
try {
IOUtils.close(fieldsStream, indexStream);
} finally {
fieldsStream = indexStream = null;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java
public void writeField(FieldInfo info, IndexableField field) throws IOException {
fieldsStream.writeVInt(info.number);
int bits = 0;
final BytesRef bytes;
final String string;
// TODO: maybe a field should serialize itself?
// this way we don't bake into indexer all these
// specific encodings for different fields? and apps
// can customize...
Number number = field.numericValue();
if (number != null) {
if (number instanceof Byte || number instanceof Short || number instanceof Integer) {
bits |= FIELD_IS_NUMERIC_INT;
} else if (number instanceof Long) {
bits |= FIELD_IS_NUMERIC_LONG;
} else if (number instanceof Float) {
bits |= FIELD_IS_NUMERIC_FLOAT;
} else if (number instanceof Double) {
bits |= FIELD_IS_NUMERIC_DOUBLE;
} else {
throw new IllegalArgumentException("cannot store numeric type " + number.getClass());
}
string = null;
bytes = null;
} else {
bytes = field.binaryValue();
if (bytes != null) {
bits |= FIELD_IS_BINARY;
string = null;
} else {
string = field.stringValue();
if (string == null) {
throw new IllegalArgumentException("field " + field.name() + " is stored but does not have binaryValue, stringValue nor numericValue");
}
}
}
fieldsStream.writeByte((byte) bits);
if (bytes != null) {
fieldsStream.writeVInt(bytes.length);
fieldsStream.writeBytes(bytes.bytes, bytes.offset, bytes.length);
} else if (string != null) {
fieldsStream.writeString(field.stringValue());
} else {
if (number instanceof Byte || number instanceof Short || number instanceof Integer) {
fieldsStream.writeInt(number.intValue());
} else if (number instanceof Long) {
fieldsStream.writeLong(number.longValue());
} else if (number instanceof Float) {
fieldsStream.writeInt(Float.floatToIntBits(number.floatValue()));
} else if (number instanceof Double) {
fieldsStream.writeLong(Double.doubleToLongBits(number.doubleValue()));
} else {
assert false;
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java
public void addRawDocuments(IndexInput stream, int[] lengths, int numDocs) throws IOException {
long position = fieldsStream.getFilePointer();
long start = position;
for(int i=0;i<numDocs;i++) {
indexStream.writeLong(position);
position += lengths[i];
}
fieldsStream.copyBytes(stream, position-start);
assert fieldsStream.getFilePointer() == position;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java
Override
public void finish(FieldInfos fis, int numDocs) throws IOException {
if (HEADER_LENGTH_IDX+((long) numDocs)*8 != indexStream.getFilePointer())
// This is most likely a bug in Sun JRE 1.6.0_04/_05;
// we detect that the bug has struck, here, and
// throw an exception to prevent the corruption from
// entering the index. See LUCENE-1282 for
// details.
throw new RuntimeException("fdx size mismatch: docCount is " + numDocs + " but fdx file size is " + indexStream.getFilePointer() + " file=" + indexStream.toString() + "; now aborting this merge to prevent index corruption");
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java
Override
public int merge(MergeState mergeState) throws IOException {
int docCount = 0;
// Used for bulk-reading raw bytes for stored fields
int rawDocLengths[] = new int[MAX_RAW_MERGE_DOCS];
int idx = 0;
for (MergeState.IndexReaderAndLiveDocs reader : mergeState.readers) {
final SegmentReader matchingSegmentReader = mergeState.matchingSegmentReaders[idx++];
Lucene40StoredFieldsReader matchingFieldsReader = null;
if (matchingSegmentReader != null) {
final StoredFieldsReader fieldsReader = matchingSegmentReader.getFieldsReader();
// we can only bulk-copy if the matching reader is also a Lucene40FieldsReader
if (fieldsReader != null && fieldsReader instanceof Lucene40StoredFieldsReader) {
matchingFieldsReader = (Lucene40StoredFieldsReader) fieldsReader;
}
}
if (reader.liveDocs != null) {
docCount += copyFieldsWithDeletions(mergeState,
reader, matchingFieldsReader, rawDocLengths);
} else {
docCount += copyFieldsNoDeletions(mergeState,
reader, matchingFieldsReader, rawDocLengths);
}
}
finish(mergeState.fieldInfos, docCount);
return docCount;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java
private int copyFieldsWithDeletions(MergeState mergeState, final MergeState.IndexReaderAndLiveDocs reader,
final Lucene40StoredFieldsReader matchingFieldsReader, int rawDocLengths[])
throws IOException, MergeAbortedException, CorruptIndexException {
int docCount = 0;
final int maxDoc = reader.reader.maxDoc();
final Bits liveDocs = reader.liveDocs;
assert liveDocs != null;
if (matchingFieldsReader != null) {
// We can bulk-copy because the fieldInfos are "congruent"
for (int j = 0; j < maxDoc;) {
if (!liveDocs.get(j)) {
// skip deleted docs
++j;
continue;
}
// We can optimize this case (doing a bulk byte copy) since the field
// numbers are identical
int start = j, numDocs = 0;
do {
j++;
numDocs++;
if (j >= maxDoc) break;
if (!liveDocs.get(j)) {
j++;
break;
}
} while(numDocs < MAX_RAW_MERGE_DOCS);
IndexInput stream = matchingFieldsReader.rawDocs(rawDocLengths, start, numDocs);
addRawDocuments(stream, rawDocLengths, numDocs);
docCount += numDocs;
mergeState.checkAbort.work(300 * numDocs);
}
} else {
for (int j = 0; j < maxDoc; j++) {
if (!liveDocs.get(j)) {
// skip deleted docs
continue;
}
// TODO: this could be more efficient using
// FieldVisitor instead of loading/writing entire
// doc; ie we just have to renumber the field number
// on the fly?
// NOTE: it's very important to first assign to doc then pass it to
// fieldsWriter.addDocument; see LUCENE-1282
Document doc = reader.reader.document(j);
addDocument(doc, mergeState.fieldInfos);
docCount++;
mergeState.checkAbort.work(300);
}
}
return docCount;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsWriter.java
private int copyFieldsNoDeletions(MergeState mergeState, final MergeState.IndexReaderAndLiveDocs reader,
final Lucene40StoredFieldsReader matchingFieldsReader, int rawDocLengths[])
throws IOException, MergeAbortedException, CorruptIndexException {
final int maxDoc = reader.reader.maxDoc();
int docCount = 0;
if (matchingFieldsReader != null) {
// We can bulk-copy because the fieldInfos are "congruent"
while (docCount < maxDoc) {
int len = Math.min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
IndexInput stream = matchingFieldsReader.rawDocs(rawDocLengths, docCount, len);
addRawDocuments(stream, rawDocLengths, len);
docCount += len;
mergeState.checkAbort.work(300 * len);
}
} else {
for (; docCount < maxDoc; docCount++) {
// NOTE: it's very important to first assign to doc then pass it to
// fieldsWriter.addDocument; see LUCENE-1282
Document doc = reader.reader.document(docCount);
addDocument(doc, mergeState.fieldInfos);
mergeState.checkAbort.work(300);
}
}
return docCount;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java
Override
public MutableBits newLiveDocs(int size) throws IOException {
BitVector bitVector = new BitVector(size);
bitVector.invertAll();
return bitVector;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java
Override
public MutableBits newLiveDocs(Bits existing) throws IOException {
final BitVector liveDocs = (BitVector) existing;
return liveDocs.clone();
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java
Override
public Bits readLiveDocs(Directory dir, SegmentInfoPerCommit info, IOContext context) throws IOException {
String filename = IndexFileNames.fileNameFromGeneration(info.info.name, DELETES_EXTENSION, info.getDelGen());
final BitVector liveDocs = new BitVector(dir, filename, context);
assert liveDocs.count() == info.info.getDocCount() - info.getDelCount():
"liveDocs.count()=" + liveDocs.count() + " info.docCount=" + info.info.getDocCount() + " info.getDelCount()=" + info.getDelCount();
assert liveDocs.length() == info.info.getDocCount();
return liveDocs;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java
Override
public void writeLiveDocs(MutableBits bits, Directory dir, SegmentInfoPerCommit info, int newDelCount, IOContext context) throws IOException {
String filename = IndexFileNames.fileNameFromGeneration(info.info.name, DELETES_EXTENSION, info.getNextDelGen());
final BitVector liveDocs = (BitVector) bits;
assert liveDocs.count() == info.info.getDocCount() - info.getDelCount() - newDelCount;
assert liveDocs.length() == info.info.getDocCount();
liveDocs.write(dir, filename, context);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40LiveDocsFormat.java
Override
public void files(SegmentInfoPerCommit info, Collection<String> files) throws IOException {
if (info.hasDeletions()) {
files.add(IndexFileNames.fileNameFromGeneration(info.info.name, DELETES_EXTENSION, info.getDelGen()));
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesProducer.java
Override
protected void closeInternal(Collection<? extends Closeable> closeables) throws IOException {
if (cfs != null) {
final ArrayList<Closeable> list = new ArrayList<Closeable>(closeables);
list.add(cfs);
IOUtils.close(list);
} else {
IOUtils.close(closeables);
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesProducer.java
Override
protected DocValues loadDocValues(int docCount, Directory dir, String id,
Type type, IOContext context) throws IOException {
switch (type) {
case FIXED_INTS_16:
case FIXED_INTS_32:
case FIXED_INTS_64:
case FIXED_INTS_8:
case VAR_INTS:
return Ints.getValues(dir, id, docCount, type, context);
case FLOAT_32:
return Floats.getValues(dir, id, docCount, context, type);
case FLOAT_64:
return Floats.getValues(dir, id, docCount, context, type);
case BYTES_FIXED_STRAIGHT:
return Bytes.getValues(dir, id, Bytes.Mode.STRAIGHT, true, docCount, getComparator(), context);
case BYTES_FIXED_DEREF:
return Bytes.getValues(dir, id, Bytes.Mode.DEREF, true, docCount, getComparator(), context);
case BYTES_FIXED_SORTED:
return Bytes.getValues(dir, id, Bytes.Mode.SORTED, true, docCount, getComparator(), context);
case BYTES_VAR_STRAIGHT:
return Bytes.getValues(dir, id, Bytes.Mode.STRAIGHT, false, docCount, getComparator(), context);
case BYTES_VAR_DEREF:
return Bytes.getValues(dir, id, Bytes.Mode.DEREF, false, docCount, getComparator(), context);
case BYTES_VAR_SORTED:
return Bytes.getValues(dir, id, Bytes.Mode.SORTED, false, docCount, getComparator(), context);
default:
throw new IllegalStateException("unrecognized index values mode " + type);
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosFormat.java
Override
public FieldInfosReader getFieldInfosReader() throws IOException {
return reader;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosFormat.java
Override
public FieldInfosWriter getFieldInfosWriter() throws IOException {
return writer;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java
Override
public void merge(MergeState mergeState, DocValues[] docValues)
throws IOException {
boolean success = false;
try {
MergeContext ctx = SortedBytesMergeUtils.init(Type.BYTES_VAR_SORTED, docValues, comp, mergeState.segmentInfo.getDocCount());
final List<SortedSourceSlice> slices = SortedBytesMergeUtils.buildSlices(mergeState.docBase, mergeState.docMaps, docValues, ctx);
IndexOutput datOut = getOrCreateDataOut();
ctx.offsets = new long[1];
final int maxOrd = SortedBytesMergeUtils.mergeRecords(ctx, new IndexOutputBytesRefConsumer(datOut), slices);
final long[] offsets = ctx.offsets;
maxBytes = offsets[maxOrd-1];
final IndexOutput idxOut = getOrCreateIndexOut();
idxOut.writeLong(maxBytes);
final PackedInts.Writer offsetWriter = PackedInts.getWriter(idxOut, maxOrd+1,
PackedInts.bitsRequired(maxBytes), PackedInts.DEFAULT);
offsetWriter.add(0);
for (int i = 0; i < maxOrd; i++) {
offsetWriter.add(offsets[i]);
}
offsetWriter.finish();
final PackedInts.Writer ordsWriter = PackedInts.getWriter(idxOut, ctx.docToEntry.length,
PackedInts.bitsRequired(maxOrd-1), PackedInts.DEFAULT);
for (SortedSourceSlice slice : slices) {
slice.writeOrds(ordsWriter);
}
ordsWriter.finish();
success = true;
} finally {
releaseResources();
if (success) {
IOUtils.close(getIndexOut(), getDataOut());
} else {
IOUtils.closeWhileHandlingException(getIndexOut(), getDataOut());
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java
Override
public void finishInternal(int docCount) throws IOException {
fillDefault(docCount);
final int count = hash.size();
final IndexOutput datOut = getOrCreateDataOut();
final IndexOutput idxOut = getOrCreateIndexOut();
long offset = 0;
final int[] index = new int[count];
final int[] sortedEntries = hash.sort(comp);
// total bytes of data
idxOut.writeLong(maxBytes);
PackedInts.Writer offsetWriter = PackedInts.getWriter(idxOut, count+1,
PackedInts.bitsRequired(maxBytes), PackedInts.DEFAULT);
// first dump bytes data, recording index & write offset as
// we go
final BytesRef spare = new BytesRef();
for (int i = 0; i < count; i++) {
final int e = sortedEntries[i];
offsetWriter.add(offset);
index[e] = i;
final BytesRef bytes = hash.get(e, spare);
// TODO: we could prefix code...
datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
offset += bytes.length;
}
// write sentinel
offsetWriter.add(offset);
offsetWriter.finish();
// write index
writeIndex(idxOut, docCount, count, index, docToEntry);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java
Override
public org.apache.lucene.index.DocValues.Source load()
throws IOException {
return new VarSortedSource(cloneData(), cloneIndex(), comparator);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarSortedBytesImpl.java
Override
public Source getDirectSource() throws IOException {
return new DirectSortedSource(cloneData(), cloneIndex(), comparator, getType());
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/PackedIntValues.java
Override
public void finish(int docCount) throws IOException {
boolean success = false;
final IndexOutput dataOut = getOrCreateDataOut();
try {
if (!started) {
minValue = maxValue = 0;
}
final long delta = maxValue - minValue;
// if we exceed the range of positive longs we must switch to fixed
// ints
if (delta <= (maxValue >= 0 && minValue <= 0 ? Long.MAX_VALUE
: Long.MAX_VALUE - 1) && delta >= 0) {
dataOut.writeByte(PACKED);
writePackedInts(dataOut, docCount);
return; // done
} else {
dataOut.writeByte(FIXED_64);
}
writeData(dataOut);
writeZeros(docCount - (lastDocID + 1), dataOut);
success = true;
} finally {
resetPool();
if (success) {
IOUtils.close(dataOut);
} else {
IOUtils.closeWhileHandlingException(dataOut);
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/PackedIntValues.java
private void writePackedInts(IndexOutput datOut, int docCount) throws IOException {
datOut.writeLong(minValue);
// write a default value to recognize docs without a value for that
// field
final long defaultValue = maxValue >= 0 && minValue <= 0 ? 0 - minValue
: ++maxValue - minValue;
datOut.writeLong(defaultValue);
PackedInts.Writer w = PackedInts.getWriter(datOut, docCount,
PackedInts.bitsRequired(maxValue - minValue), PackedInts.DEFAULT);
for (int i = 0; i < lastDocID + 1; i++) {
set(bytesRef, i);
byte[] bytes = bytesRef.bytes;
int offset = bytesRef.offset;
long asLong =
(((long)(bytes[offset+0] & 0xff) << 56) |
((long)(bytes[offset+1] & 0xff) << 48) |
((long)(bytes[offset+2] & 0xff) << 40) |
((long)(bytes[offset+3] & 0xff) << 32) |
((long)(bytes[offset+4] & 0xff) << 24) |
((long)(bytes[offset+5] & 0xff) << 16) |
((long)(bytes[offset+6] & 0xff) << 8) |
((long)(bytes[offset+7] & 0xff)));
w.add(asLong == 0 ? defaultValue : asLong - minValue);
}
for (int i = lastDocID + 1; i < docCount; i++) {
w.add(defaultValue);
}
w.finish();
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/PackedIntValues.java
Override
public void add(int docID, IndexableField docValue) throws IOException {
final long v = docValue.numericValue().longValue();
assert lastDocId < docID;
if (!started) {
started = true;
minValue = maxValue = v;
} else {
if (v < minValue) {
minValue = v;
} else if (v > maxValue) {
maxValue = v;
}
}
lastDocId = docID;
DocValuesArraySource.copyLong(bytesRef, v);
bytesSpareField.setBytesValue(bytesRef);
super.add(docID, bytesSpareField);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/PackedIntValues.java
Override
public Source load() throws IOException {
boolean success = false;
final Source source;
IndexInput input = null;
try {
input = (IndexInput) datIn.clone();
if (values == null) {
source = new PackedIntsSource(input, false);
} else {
source = values.newFromInput(input, numDocs);
}
success = true;
return source;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(input, datIn);
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/PackedIntValues.java
Override
public void close() throws IOException {
super.close();
datIn.close();
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/PackedIntValues.java
Override
public Source getDirectSource() throws IOException {
return values != null ? new FixedStraightBytesImpl.DirectFixedStraightSource((IndexInput) datIn.clone(), 8, Type.FIXED_INTS_64) : new PackedIntsSource((IndexInput) datIn.clone(), true);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedSortedBytesImpl.java
Override
public void merge(MergeState mergeState, DocValues[] docValues)
throws IOException {
boolean success = false;
try {
final MergeContext ctx = SortedBytesMergeUtils.init(Type.BYTES_FIXED_SORTED, docValues, comp, mergeState.segmentInfo.getDocCount());
List<SortedSourceSlice> slices = SortedBytesMergeUtils.buildSlices(mergeState.docBase, mergeState.docMaps, docValues, ctx);
final IndexOutput datOut = getOrCreateDataOut();
datOut.writeInt(ctx.sizePerValues);
final int maxOrd = SortedBytesMergeUtils.mergeRecords(ctx, new IndexOutputBytesRefConsumer(datOut), slices);
final IndexOutput idxOut = getOrCreateIndexOut();
idxOut.writeInt(maxOrd);
final PackedInts.Writer ordsWriter = PackedInts.getWriter(idxOut, ctx.docToEntry.length,
PackedInts.bitsRequired(maxOrd), PackedInts.DEFAULT);
for (SortedSourceSlice slice : slices) {
slice.writeOrds(ordsWriter);
}
ordsWriter.finish();
success = true;
} finally {
releaseResources();
if (success) {
IOUtils.close(getIndexOut(), getDataOut());
} else {
IOUtils.closeWhileHandlingException(getIndexOut(), getDataOut());
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedSortedBytesImpl.java
Override
public void finishInternal(int docCount) throws IOException {
fillDefault(docCount);
final IndexOutput datOut = getOrCreateDataOut();
final int count = hash.size();
final int[] address = new int[count];
datOut.writeInt(size);
if (size != -1) {
final int[] sortedEntries = hash.sort(comp);
// first dump bytes data, recording address as we go
final BytesRef spare = new BytesRef(size);
for (int i = 0; i < count; i++) {
final int e = sortedEntries[i];
final BytesRef bytes = hash.get(e, spare);
assert bytes.length == size;
datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
address[e] = i;
}
}
final IndexOutput idxOut = getOrCreateIndexOut();
idxOut.writeInt(count);
writeIndex(idxOut, docCount, count, address, docToEntry);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedSortedBytesImpl.java
Override
public Source load() throws IOException {
return new FixedSortedSource(cloneData(), cloneIndex(), size, valueCount,
comparator);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedSortedBytesImpl.java
Override
public Source getDirectSource() throws IOException {
return new DirectFixedSortedSource(cloneData(), cloneIndex(), size,
valueCount, comparator, type);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/DirectSource.java
double toDouble(IndexInput input) throws IOException {
return toLong(input);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/DirectSource.java
Override
long toLong(IndexInput input) throws IOException {
return input.readByte();
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/DirectSource.java
Override
long toLong(IndexInput input) throws IOException {
return input.readShort();
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/DirectSource.java
Override
long toLong(IndexInput input) throws IOException {
return input.readInt();
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/DirectSource.java
double toDouble(IndexInput input) throws IOException {
return Float.intBitsToFloat(input.readInt());
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/DirectSource.java
Override
long toLong(IndexInput input) throws IOException {
return input.readLong();
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/DirectSource.java
double toDouble(IndexInput input) throws IOException {
return Double.longBitsToDouble(input.readLong());
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Writer.java
public static DocValuesConsumer create(Type type, String id, Directory directory,
Comparator<BytesRef> comp, Counter bytesUsed, IOContext context, float acceptableOverheadRatio) throws IOException {
if (comp == null) {
comp = BytesRef.getUTF8SortedAsUnicodeComparator();
}
switch (type) {
case FIXED_INTS_16:
case FIXED_INTS_32:
case FIXED_INTS_64:
case FIXED_INTS_8:
case VAR_INTS:
return Ints.getWriter(directory, id, bytesUsed, type, context);
case FLOAT_32:
return Floats.getWriter(directory, id, bytesUsed, context, type);
case FLOAT_64:
return Floats.getWriter(directory, id, bytesUsed, context, type);
case BYTES_FIXED_STRAIGHT:
return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, true, comp,
bytesUsed, context, acceptableOverheadRatio);
case BYTES_FIXED_DEREF:
return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, true, comp,
bytesUsed, context, acceptableOverheadRatio);
case BYTES_FIXED_SORTED:
return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, true, comp,
bytesUsed, context, acceptableOverheadRatio);
case BYTES_VAR_STRAIGHT:
return Bytes.getWriter(directory, id, Bytes.Mode.STRAIGHT, false, comp,
bytesUsed, context, acceptableOverheadRatio);
case BYTES_VAR_DEREF:
return Bytes.getWriter(directory, id, Bytes.Mode.DEREF, false, comp,
bytesUsed, context, acceptableOverheadRatio);
case BYTES_VAR_SORTED:
return Bytes.getWriter(directory, id, Bytes.Mode.SORTED, false, comp,
bytesUsed, context, acceptableOverheadRatio);
default:
throw new IllegalArgumentException("Unknown Values: " + type);
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Floats.java
public static DocValuesConsumer getWriter(Directory dir, String id, Counter bytesUsed,
IOContext context, Type type) throws IOException {
return new FloatsWriter(dir, id, bytesUsed, context, type);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Floats.java
public static DocValues getValues(Directory dir, String id, int maxDoc, IOContext context, Type type)
throws IOException {
return new FloatsReader(dir, id, maxDoc, context, type);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Floats.java
Override
public void add(int docID, IndexableField value) throws IOException {
template.toBytes(value.numericValue().doubleValue(), bytesRef);
bytesSpareField.setBytesValue(bytesRef);
super.add(docID, bytesSpareField);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Floats.java
Override
public Source load() throws IOException {
final IndexInput indexInput = cloneData();
try {
return arrayTemplate.newFromInput(indexInput, maxDoc);
} finally {
IOUtils.close(indexInput);
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Ints.java
public static DocValuesConsumer getWriter(Directory dir, String id, Counter bytesUsed,
Type type, IOContext context) throws IOException {
return type == Type.VAR_INTS ? new PackedIntValues.PackedIntsWriter(dir, id,
bytesUsed, context) : new IntsWriter(dir, id, bytesUsed, context, type);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Ints.java
public static DocValues getValues(Directory dir, String id, int numDocs,
Type type, IOContext context) throws IOException {
return type == Type.VAR_INTS ? new PackedIntValues.PackedIntsReader(dir, id,
numDocs, context) : new IntsReader(dir, id, numDocs, context, type);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Ints.java
Override
public void add(int docID, IndexableField value) throws IOException {
template.toBytes(value.numericValue().longValue(), bytesRef);
bytesSpareField.setBytesValue(bytesRef);
super.add(docID, bytesSpareField);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Ints.java
Override
public Source load() throws IOException {
final IndexInput indexInput = cloneData();
try {
return arrayTemplate.newFromInput(indexInput, maxDoc);
} finally {
IOUtils.close(indexInput);
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java
Override
public void add(int docID, IndexableField value) throws IOException {
final BytesRef bytes = value.binaryValue();
assert bytes != null;
assert !merge;
if (bytes.length == 0) {
return; // default
}
fill(docID, address);
docToAddress[docID] = address;
pool.copy(bytes);
address += bytes.length;
lastDocID = docID;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java
Override
protected void merge(DocValues readerIn, int docBase, int docCount, Bits liveDocs) throws IOException {
merge = true;
datOut = getOrCreateDataOut();
boolean success = false;
try {
if (liveDocs == null && readerIn instanceof VarStraightReader) {
// bulk merge since we don't have any deletes
VarStraightReader reader = (VarStraightReader) readerIn;
final int maxDocs = reader.maxDoc;
if (maxDocs == 0) {
return;
}
if (lastDocID+1 < docBase) {
fill(docBase, address);
lastDocID = docBase-1;
}
final long numDataBytes;
final IndexInput cloneIdx = reader.cloneIndex();
try {
numDataBytes = cloneIdx.readVLong();
final ReaderIterator iter = PackedInts.getReaderIterator(cloneIdx);
for (int i = 0; i < maxDocs; i++) {
long offset = iter.next();
++lastDocID;
if (lastDocID >= docToAddress.length) {
int oldSize = docToAddress.length;
docToAddress = ArrayUtil.grow(docToAddress, 1 + lastDocID);
bytesUsed.addAndGet((docToAddress.length - oldSize)
* RamUsageEstimator.NUM_BYTES_INT);
}
docToAddress[lastDocID] = address + offset;
}
address += numDataBytes; // this is the address after all addr pointers are updated
iter.close();
} finally {
IOUtils.close(cloneIdx);
}
final IndexInput cloneData = reader.cloneData();
try {
datOut.copyBytes(cloneData, numDataBytes);
} finally {
IOUtils.close(cloneData);
}
} else {
super.merge(readerIn, docBase, docCount, liveDocs);
}
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(datOut);
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java
Override
protected void mergeDoc(Field scratchField, Source source, int docID, int sourceDoc) throws IOException {
assert merge;
assert lastDocID < docID;
source.getBytes(sourceDoc, bytesRef);
if (bytesRef.length == 0) {
return; // default
}
fill(docID, address);
datOut.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length);
docToAddress[docID] = address;
address += bytesRef.length;
lastDocID = docID;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java
Override
public void finish(int docCount) throws IOException {
boolean success = false;
assert (!merge && datOut == null) || (merge && datOut != null);
final IndexOutput datOut = getOrCreateDataOut();
try {
if (!merge) {
// header is already written in getDataOut()
pool.writePool(datOut);
}
success = true;
} finally {
if (success) {
IOUtils.close(datOut);
} else {
IOUtils.closeWhileHandlingException(datOut);
}
pool.dropBuffersAndReset();
}
success = false;
final IndexOutput idxOut = getOrCreateIndexOut();
try {
if (lastDocID == -1) {
idxOut.writeVLong(0);
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount+1,
PackedInts.bitsRequired(0), PackedInts.DEFAULT);
// docCount+1 so we write sentinel
for (int i = 0; i < docCount+1; i++) {
w.add(0);
}
w.finish();
} else {
fill(docCount, address);
idxOut.writeVLong(address);
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount+1,
PackedInts.bitsRequired(address), PackedInts.DEFAULT);
for (int i = 0; i < docCount; i++) {
w.add(docToAddress[i]);
}
// write sentinel
w.add(address);
w.finish();
}
success = true;
} finally {
bytesUsed.addAndGet(-(docToAddress.length)
* RamUsageEstimator.NUM_BYTES_INT);
docToAddress = null;
if (success) {
IOUtils.close(idxOut);
} else {
IOUtils.closeWhileHandlingException(idxOut);
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java
Override
public Source load() throws IOException {
return new VarStraightSource(cloneData(), cloneIndex());
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java
Override
public Source getDirectSource()
throws IOException {
return new DirectVarStraightSource(cloneData(), cloneIndex(), getType());
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarStraightBytesImpl.java
Override
protected int position(int docID) throws IOException {
final long offset = index.get(docID);
data.seek(baseOffset + offset);
// Safe to do 1+docID because we write sentinel at the end:
final long nextOffset = index.get(1+docID);
return (int) (nextOffset - offset);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java
public static DocValuesConsumer getWriter(Directory dir, String id, Mode mode,
boolean fixedSize, Comparator<BytesRef> sortComparator,
Counter bytesUsed, IOContext context, float acceptableOverheadRatio)
throws IOException {
// TODO -- i shouldn't have to specify fixed? can
// track itself & do the write thing at write time?
if (sortComparator == null) {
sortComparator = BytesRef.getUTF8SortedAsUnicodeComparator();
}
if (fixedSize) {
if (mode == Mode.STRAIGHT) {
return new FixedStraightBytesImpl.Writer(dir, id, bytesUsed, context);
} else if (mode == Mode.DEREF) {
return new FixedDerefBytesImpl.Writer(dir, id, bytesUsed, context);
} else if (mode == Mode.SORTED) {
return new FixedSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context, acceptableOverheadRatio);
}
} else {
if (mode == Mode.STRAIGHT) {
return new VarStraightBytesImpl.Writer(dir, id, bytesUsed, context);
} else if (mode == Mode.DEREF) {
return new VarDerefBytesImpl.Writer(dir, id, bytesUsed, context);
} else if (mode == Mode.SORTED) {
return new VarSortedBytesImpl.Writer(dir, id, sortComparator, bytesUsed, context, acceptableOverheadRatio);
}
}
throw new IllegalArgumentException("");
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java
public static DocValues getValues(Directory dir, String id, Mode mode,
boolean fixedSize, int maxDoc, Comparator<BytesRef> sortComparator, IOContext context) throws IOException {
if (sortComparator == null) {
sortComparator = BytesRef.getUTF8SortedAsUnicodeComparator();
}
// TODO -- I can peek @ header to determing fixed/mode?
if (fixedSize) {
if (mode == Mode.STRAIGHT) {
return new FixedStraightBytesImpl.FixedStraightReader(dir, id, maxDoc, context);
} else if (mode == Mode.DEREF) {
return new FixedDerefBytesImpl.FixedDerefReader(dir, id, maxDoc, context);
} else if (mode == Mode.SORTED) {
return new FixedSortedBytesImpl.Reader(dir, id, maxDoc, context, Type.BYTES_FIXED_SORTED, sortComparator);
}
} else {
if (mode == Mode.STRAIGHT) {
return new VarStraightBytesImpl.VarStraightReader(dir, id, maxDoc, context);
} else if (mode == Mode.DEREF) {
return new VarDerefBytesImpl.VarDerefReader(dir, id, maxDoc, context);
} else if (mode == Mode.SORTED) {
return new VarSortedBytesImpl.Reader(dir, id, maxDoc,context, Type.BYTES_VAR_SORTED, sortComparator);
}
}
throw new IllegalArgumentException("Illegal Mode: " + mode);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java
protected IndexOutput getOrCreateDataOut() throws IOException {
if (datOut == null) {
boolean success = false;
assert codecNameDat != null;
try {
datOut = dir.createOutput(IndexFileNames.segmentFileName(id, DV_SEGMENT_SUFFIX,
DocValuesWriterBase.DATA_EXTENSION), context);
CodecUtil.writeHeader(datOut, codecNameDat, version);
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(datOut);
}
}
}
return datOut;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java
protected IndexOutput getOrCreateIndexOut() throws IOException {
boolean success = false;
try {
if (idxOut == null) {
assert codecNameIdx != null;
idxOut = dir.createOutput(IndexFileNames.segmentFileName(id, DV_SEGMENT_SUFFIX,
DocValuesWriterBase.INDEX_EXTENSION), context);
CodecUtil.writeHeader(idxOut, codecNameIdx, version);
}
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(idxOut);
}
}
return idxOut;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java
Override
public void close() throws IOException {
try {
super.close();
} finally {
IOUtils.close(datIn, idxIn);
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java
protected static int writePrefixLength(DataOutput datOut, BytesRef bytes)
throws IOException {
if (bytes.length < 128) {
datOut.writeByte((byte) bytes.length);
return 1;
} else {
datOut.writeByte((byte) (0x80 | (bytes.length >> 8)));
datOut.writeByte((byte) (bytes.length & 0xff));
return 2;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java
Override
public void add(int docID, IndexableField value) throws IOException {
BytesRef bytes = value.binaryValue();
assert bytes != null;
if (bytes.length == 0) { // default value - skip it
return;
}
checkSize(bytes);
fillDefault(docID);
int ord = hash.add(bytes);
if (ord < 0) {
ord = (-ord) - 1;
} else {
maxBytes += bytes.length;
}
docToEntry[docID] = ord;
lastDocId = docID;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java
Override
public void finish(int docCount) throws IOException {
boolean success = false;
try {
finishInternal(docCount);
success = true;
} finally {
releaseResources();
if (success) {
IOUtils.close(getIndexOut(), getDataOut());
} else {
IOUtils.closeWhileHandlingException(getIndexOut(), getDataOut());
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java
protected void writeIndex(IndexOutput idxOut, int docCount,
long maxValue, int[] toEntry) throws IOException {
writeIndex(idxOut, docCount, maxValue, (int[])null, toEntry);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java
protected void writeIndex(IndexOutput idxOut, int docCount,
long maxValue, int[] addresses, int[] toEntry) throws IOException {
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
PackedInts.bitsRequired(maxValue), acceptableOverheadRatio);
final int limit = docCount > docToEntry.length ? docToEntry.length
: docCount;
assert toEntry.length >= limit -1;
if (addresses != null) {
for (int i = 0; i < limit; i++) {
assert addresses[toEntry[i]] >= 0;
w.add(addresses[toEntry[i]]);
}
} else {
for (int i = 0; i < limit; i++) {
assert toEntry[i] >= 0;
w.add(toEntry[i]);
}
}
for (int i = limit; i < docCount; i++) {
w.add(0);
}
w.finish();
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java
protected void writeIndex(IndexOutput idxOut, int docCount,
long maxValue, long[] addresses, int[] toEntry) throws IOException {
final PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
PackedInts.bitsRequired(maxValue), acceptableOverheadRatio);
final int limit = docCount > docToEntry.length ? docToEntry.length
: docCount;
assert toEntry.length >= limit -1;
if (addresses != null) {
for (int i = 0; i < limit; i++) {
assert addresses[toEntry[i]] >= 0;
w.add(addresses[toEntry[i]]);
}
} else {
for (int i = 0; i < limit; i++) {
assert toEntry[i] >= 0;
w.add(toEntry[i]);
}
}
for (int i = limit; i < docCount; i++) {
w.add(0);
}
w.finish();
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/Bytes.java
protected void closeIndexInput() throws IOException {
IOUtils.close(datIn, idxIn);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java
Override
public void add(int docID, IndexableField value) throws IOException {
final BytesRef bytes = value.binaryValue();
assert bytes != null;
assert lastDocID < docID;
if (size == -1) {
if (bytes.length > BYTE_BLOCK_SIZE) {
throw new IllegalArgumentException("bytes arrays > " + BYTE_BLOCK_SIZE + " are not supported");
}
size = bytes.length;
} else if (bytes.length != size) {
throw new IllegalArgumentException("byte[] length changed for BYTES_FIXED_STRAIGHT type (before=" + size + " now=" + bytes.length);
}
if (lastDocID+1 < docID) {
advancePool(docID);
}
pool.copy(bytes);
lastDocID = docID;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java
protected void writeData(IndexOutput out) throws IOException {
pool.writePool(out);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java
protected void writeZeros(int num, IndexOutput out) throws IOException {
final byte[] zeros = new byte[size];
for (int i = 0; i < num; i++) {
out.writeBytes(zeros, zeros.length);
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java
Override
protected void merge(DocValues readerIn, int docBase, int docCount, Bits liveDocs) throws IOException {
datOut = getOrCreateDataOut();
boolean success = false;
try {
if (!hasMerged && size != -1) {
datOut.writeInt(size);
}
if (liveDocs == null && tryBulkMerge(readerIn)) {
FixedStraightReader reader = (FixedStraightReader) readerIn;
final int maxDocs = reader.maxDoc;
if (maxDocs == 0) {
return;
}
if (size == -1) {
size = reader.size;
datOut.writeInt(size);
} else if (size != reader.size) {
throw new IllegalArgumentException("expected bytes size=" + size
+ " but got " + reader.size);
}
if (lastDocID+1 < docBase) {
fill(datOut, docBase);
lastDocID = docBase-1;
}
// TODO should we add a transfer to API to each reader?
final IndexInput cloneData = reader.cloneData();
try {
datOut.copyBytes(cloneData, size * maxDocs);
} finally {
IOUtils.close(cloneData);
}
lastDocID += maxDocs;
} else {
super.merge(readerIn, docBase, docCount, liveDocs);
}
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(datOut);
}
hasMerged = true;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java
Override
protected void mergeDoc(Field scratchField, Source source, int docID, int sourceDoc) throws IOException {
assert lastDocID < docID;
setMergeBytes(source, sourceDoc);
if (size == -1) {
size = bytesRef.length;
datOut.writeInt(size);
}
assert size == bytesRef.length : "size: " + size + " ref: " + bytesRef.length;
if (lastDocID+1 < docID) {
fill(datOut, docID);
}
datOut.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length);
lastDocID = docID;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java
private void fill(IndexOutput datOut, int docID) throws IOException {
assert size >= 0;
writeZeros((docID - (lastDocID+1)), datOut);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java
Override
public void finish(int docCount) throws IOException {
boolean success = false;
try {
if (!hasMerged) {
// indexing path - no disk IO until here
assert datOut == null;
datOut = getOrCreateDataOut();
if (size == -1) {
datOut.writeInt(0);
} else {
datOut.writeInt(size);
writeData(datOut);
}
if (lastDocID + 1 < docCount) {
fill(datOut, docCount);
}
} else {
// merge path - datOut should be initialized
assert datOut != null;
if (size == -1) {// no data added
datOut.writeInt(0);
} else {
fill(datOut, docCount);
}
}
success = true;
} finally {
resetPool();
if (success) {
IOUtils.close(datOut);
} else {
IOUtils.closeWhileHandlingException(datOut);
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java
Override
public Source load() throws IOException {
return size == 1 ? new SingleByteSource(cloneData(), maxDoc) :
new FixedStraightSource(cloneData(), size, maxDoc, type);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java
Override
public void close() throws IOException {
datIn.close();
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java
Override
public Source getDirectSource() throws IOException {
return new DirectFixedStraightSource(cloneData(), size, getType());
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedStraightBytesImpl.java
Override
protected int position(int docID) throws IOException {
data.seek(baseOffset + size * docID);
return size;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedDerefBytesImpl.java
Override
protected void finishInternal(int docCount) throws IOException {
final int numValues = hash.size();
final IndexOutput datOut = getOrCreateDataOut();
datOut.writeInt(size);
if (size != -1) {
final BytesRef bytesRef = new BytesRef(size);
for (int i = 0; i < numValues; i++) {
hash.get(i, bytesRef);
datOut.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length);
}
}
final IndexOutput idxOut = getOrCreateIndexOut();
idxOut.writeInt(numValues);
writeIndex(idxOut, docCount, numValues, docToEntry);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedDerefBytesImpl.java
Override
public Source load() throws IOException {
return new FixedDerefSource(cloneData(), cloneIndex(), size, numValuesStored);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedDerefBytesImpl.java
Override
public Source getDirectSource()
throws IOException {
return new DirectFixedDerefSource(cloneData(), cloneIndex(), size, getType());
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/FixedDerefBytesImpl.java
Override
protected int position(int docID) throws IOException {
data.seek(baseOffset + index.get(docID) * size);
return size;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/DocValuesWriterBase.java
Override
public void close() throws IOException {
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/DocValuesWriterBase.java
Override
public DocValuesConsumer addValuesField(Type valueType, FieldInfo field) throws IOException {
return Writer.create(valueType,
PerDocProducerBase.docValuesId(segmentName, field.number),
getDirectory(), getComparator(), bytesUsed, context, acceptableOverheadRatio);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/DocValuesWriterBase.java
public Comparator<BytesRef> getComparator() throws IOException {
return BytesRef.getUTF8SortedAsUnicodeComparator();
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarDerefBytesImpl.java
Override
public void finishInternal(int docCount) throws IOException {
fillDefault(docCount);
final int size = hash.size();
final long[] addresses = new long[size];
final IndexOutput datOut = getOrCreateDataOut();
int addr = 0;
final BytesRef bytesRef = new BytesRef();
for (int i = 0; i < size; i++) {
hash.get(i, bytesRef);
addresses[i] = addr;
addr += writePrefixLength(datOut, bytesRef) + bytesRef.length;
datOut.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length);
}
final IndexOutput idxOut = getOrCreateIndexOut();
// write the max address to read directly on source load
idxOut.writeLong(addr);
writeIndex(idxOut, docCount, addresses[addresses.length-1], addresses, docToEntry);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarDerefBytesImpl.java
Override
public Source load() throws IOException {
return new VarDerefSource(cloneData(), cloneIndex(), totalBytes);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarDerefBytesImpl.java
Override
public Source getDirectSource()
throws IOException {
return new DirectVarDerefSource(cloneData(), cloneIndex(), getType());
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/values/VarDerefBytesImpl.java
Override
protected int position(int docID) throws IOException {
data.seek(baseOffset + index.get(docID));
final byte sizeByte = data.readByte();
if ((sizeByte & 128) == 0) {
// length is 1 byte
return sizeByte;
} else {
return ((sizeByte & 0x7f) << 8) | ((data.readByte() & 0xff));
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java
Override
public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException {
return new Lucene40NormsDocValuesConsumer(state, NORMS_SEGMENT_SUFFIX);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java
Override
public PerDocProducer docsProducer(SegmentReadState state) throws IOException {
return new Lucene40NormsDocValuesProducer(state, NORMS_SEGMENT_SUFFIX);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40NormsFormat.java
Override
protected DocValues getDocValuesForMerge(AtomicReader reader, FieldInfo info)
throws IOException {
return reader.normValues(info.name);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListReader.java
Override
protected void seekChild(int level) throws IOException {
super.seekChild(level);
freqPointer[level] = lastFreqPointer;
proxPointer[level] = lastProxPointer;
payloadLength[level] = lastPayloadLength;
offsetLength[level] = lastOffsetLength;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListReader.java
Override
protected int readSkipData(int level, IndexInput skipStream) throws IOException {
int delta;
if (currentFieldStoresPayloads || currentFieldStoresOffsets) {
// the current field stores payloads and/or offsets.
// if the doc delta is odd then we have
// to read the current payload/offset lengths
// because it differs from the lengths of the
// previous payload/offset
delta = skipStream.readVInt();
if ((delta & 1) != 0) {
if (currentFieldStoresPayloads) {
payloadLength[level] = skipStream.readVInt();
}
if (currentFieldStoresOffsets) {
offsetLength[level] = skipStream.readVInt();
}
}
delta >>>= 1;
} else {
delta = skipStream.readVInt();
}
freqPointer[level] += skipStream.readVInt();
proxPointer[level] += skipStream.readVInt();
return delta;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java
Override
public FieldInfos read(Directory directory, String segmentName, IOContext iocontext) throws IOException {
final String fileName = IndexFileNames.segmentFileName(segmentName, "", Lucene40FieldInfosWriter.FIELD_INFOS_EXTENSION);
IndexInput input = directory.openInput(fileName, iocontext);
try {
CodecUtil.checkHeader(input, Lucene40FieldInfosWriter.CODEC_NAME,
Lucene40FieldInfosWriter.FORMAT_START,
Lucene40FieldInfosWriter.FORMAT_CURRENT);
final int size = input.readVInt(); //read in the size
FieldInfo infos[] = new FieldInfo[size];
for (int i = 0; i < size; i++) {
String name = input.readString();
final int fieldNumber = input.readVInt();
byte bits = input.readByte();
boolean isIndexed = (bits & Lucene40FieldInfosWriter.IS_INDEXED) != 0;
boolean storeTermVector = (bits & Lucene40FieldInfosWriter.STORE_TERMVECTOR) != 0;
boolean omitNorms = (bits & Lucene40FieldInfosWriter.OMIT_NORMS) != 0;
boolean storePayloads = (bits & Lucene40FieldInfosWriter.STORE_PAYLOADS) != 0;
final IndexOptions indexOptions;
if (!isIndexed) {
indexOptions = null;
} else if ((bits & Lucene40FieldInfosWriter.OMIT_TERM_FREQ_AND_POSITIONS) != 0) {
indexOptions = IndexOptions.DOCS_ONLY;
} else if ((bits & Lucene40FieldInfosWriter.OMIT_POSITIONS) != 0) {
indexOptions = IndexOptions.DOCS_AND_FREQS;
} else if ((bits & Lucene40FieldInfosWriter.STORE_OFFSETS_IN_POSTINGS) != 0) {
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
} else {
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
}
// LUCENE-3027: past indices were able to write
// storePayloads=true when omitTFAP is also true,
// which is invalid. We correct that, here:
if (isIndexed && indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
storePayloads = false;
}
// DV Types are packed in one byte
byte val = input.readByte();
final DocValues.Type docValuesType = getDocValuesType((byte) (val & 0x0F));
final DocValues.Type normsType = getDocValuesType((byte) ((val >>> 4) & 0x0F));
final Map<String,String> attributes = input.readStringStringMap();
infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector,
omitNorms, storePayloads, indexOptions, docValuesType, normsType, Collections.unmodifiableMap(attributes));
}
if (input.getFilePointer() != input.length()) {
throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
}
return new FieldInfos(infos);
} finally {
input.close();
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SkipListWriter.java
Override
protected void writeSkipData(int level, IndexOutput skipBuffer) throws IOException {
// To efficiently store payloads/offsets in the posting lists we do not store the length of
// every payload/offset. Instead we omit the length if the previous lengths were the same
//
// However, in order to support skipping, the length at every skip point must be known.
// So we use the same length encoding that we use for the posting lists for the skip data as well:
// Case 1: current field does not store payloads/offsets
// SkipDatum --> DocSkip, FreqSkip, ProxSkip
// DocSkip,FreqSkip,ProxSkip --> VInt
// DocSkip records the document number before every SkipInterval th document in TermFreqs.
// Document numbers are represented as differences from the previous value in the sequence.
// Case 2: current field stores payloads/offsets
// SkipDatum --> DocSkip, PayloadLength?,OffsetLength?,FreqSkip,ProxSkip
// DocSkip,FreqSkip,ProxSkip --> VInt
// PayloadLength,OffsetLength--> VInt
// In this case DocSkip/2 is the difference between
// the current and the previous value. If DocSkip
// is odd, then a PayloadLength encoded as VInt follows,
// if DocSkip is even, then it is assumed that the
// current payload/offset lengths equals the lengths at the previous
// skip point
int delta = curDoc - lastSkipDoc[level];
if (curStorePayloads || curStoreOffsets) {
assert curStorePayloads || curPayloadLength == lastSkipPayloadLength[level];
assert curStoreOffsets || curOffsetLength == lastSkipOffsetLength[level];
if (curPayloadLength == lastSkipPayloadLength[level] && curOffsetLength == lastSkipOffsetLength[level]) {
// the current payload/offset lengths equals the lengths at the previous skip point,
// so we don't store the lengths again
skipBuffer.writeVInt(delta << 1);
} else {
// the payload and/or offset length is different from the previous one. We shift the DocSkip,
// set the lowest bit and store the current payload and/or offset lengths as VInts.
skipBuffer.writeVInt(delta << 1 | 1);
if (curStorePayloads) {
skipBuffer.writeVInt(curPayloadLength);
lastSkipPayloadLength[level] = curPayloadLength;
}
if (curStoreOffsets) {
skipBuffer.writeVInt(curOffsetLength);
lastSkipOffsetLength[level] = curOffsetLength;
}
}
} else {
// current field does not store payloads or offsets
skipBuffer.writeVInt(delta);
}
skipBuffer.writeVInt((int) (curFreqPointer - lastSkipFreqPointer[level]));
skipBuffer.writeVInt((int) (curProxPointer - lastSkipProxPointer[level]));
lastSkipDoc[level] = curDoc;
lastSkipFreqPointer[level] = curFreqPointer;
lastSkipProxPointer[level] = curProxPointer;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java
public final void close() throws IOException {
if (!closed) {
IOUtils.close(fieldsStream, indexStream);
closed = true;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java
private void seekIndex(int docID) throws IOException {
indexStream.seek(HEADER_LENGTH_IDX + docID * 8L);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java
public final void visitDocument(int n, StoredFieldVisitor visitor) throws CorruptIndexException, IOException {
seekIndex(n);
fieldsStream.seek(indexStream.readLong());
final int numFields = fieldsStream.readVInt();
for (int fieldIDX = 0; fieldIDX < numFields; fieldIDX++) {
int fieldNumber = fieldsStream.readVInt();
FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
int bits = fieldsStream.readByte() & 0xFF;
assert bits <= (FIELD_IS_NUMERIC_MASK | FIELD_IS_BINARY): "bits=" + Integer.toHexString(bits);
switch(visitor.needsField(fieldInfo)) {
case YES:
readField(visitor, fieldInfo, bits);
break;
case NO:
skipField(bits);
break;
case STOP:
return;
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java
private void readField(StoredFieldVisitor visitor, FieldInfo info, int bits) throws IOException {
final int numeric = bits & FIELD_IS_NUMERIC_MASK;
if (numeric != 0) {
switch(numeric) {
case FIELD_IS_NUMERIC_INT:
visitor.intField(info, fieldsStream.readInt());
return;
case FIELD_IS_NUMERIC_LONG:
visitor.longField(info, fieldsStream.readLong());
return;
case FIELD_IS_NUMERIC_FLOAT:
visitor.floatField(info, Float.intBitsToFloat(fieldsStream.readInt()));
return;
case FIELD_IS_NUMERIC_DOUBLE:
visitor.doubleField(info, Double.longBitsToDouble(fieldsStream.readLong()));
return;
default:
throw new CorruptIndexException("Invalid numeric type: " + Integer.toHexString(numeric));
}
} else {
final int length = fieldsStream.readVInt();
byte bytes[] = new byte[length];
fieldsStream.readBytes(bytes, 0, length);
if ((bits & FIELD_IS_BINARY) != 0) {
visitor.binaryField(info, bytes, 0, bytes.length);
} else {
visitor.stringField(info, new String(bytes, 0, bytes.length, IOUtils.CHARSET_UTF_8));
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java
private void skipField(int bits) throws IOException {
final int numeric = bits & FIELD_IS_NUMERIC_MASK;
if (numeric != 0) {
switch(numeric) {
case FIELD_IS_NUMERIC_INT:
case FIELD_IS_NUMERIC_FLOAT:
fieldsStream.readInt();
return;
case FIELD_IS_NUMERIC_LONG:
case FIELD_IS_NUMERIC_DOUBLE:
fieldsStream.readLong();
return;
default:
throw new CorruptIndexException("Invalid numeric type: " + Integer.toHexString(numeric));
}
} else {
final int length = fieldsStream.readVInt();
fieldsStream.seek(fieldsStream.getFilePointer() + length);
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsReader.java
public final IndexInput rawDocs(int[] lengths, int startDocID, int numDocs) throws IOException {
seekIndex(startDocID);
long startOffset = indexStream.readLong();
long lastOffset = startOffset;
int count = 0;
while (count < numDocs) {
final long offset;
final int docID = startDocID + count + 1;
assert docID <= numTotalDocs;
if (docID < numTotalDocs)
offset = indexStream.readLong();
else
offset = fieldsStream.length();
lengths[count++] = (int) (offset-lastOffset);
lastOffset = offset;
}
fieldsStream.seek(startOffset);
return fieldsStream;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/BitVector.java
public final void write(Directory d, String name, IOContext context) throws IOException {
assert !(d instanceof CompoundFileDirectory);
IndexOutput output = d.createOutput(name, context);
try {
output.writeInt(-2);
CodecUtil.writeHeader(output, CODEC, VERSION_CURRENT);
if (isSparse()) {
// sparse bit-set more efficiently saved as d-gaps.
writeClearedDgaps(output);
} else {
writeBits(output);
}
assert verifyCount();
} finally {
output.close();
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/BitVector.java
private void writeBits(IndexOutput output) throws IOException {
output.writeInt(size()); // write size
output.writeInt(count()); // write count
output.writeBytes(bits, bits.length);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/BitVector.java
private void writeClearedDgaps(IndexOutput output) throws IOException {
output.writeInt(-1); // mark using d-gaps
output.writeInt(size()); // write size
output.writeInt(count()); // write count
int last=0;
int numCleared = size()-count();
for (int i=0; i<bits.length && numCleared>0; i++) {
if (bits[i] != (byte) 0xff) {
output.writeVInt(i-last);
output.writeByte(bits[i]);
last = i;
numCleared -= (8-BYTE_COUNTS[bits[i] & 0xFF]);
assert numCleared >= 0 || (i == (bits.length-1) && numCleared == -(8-(size&7)));
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/BitVector.java
private void readBits(IndexInput input) throws IOException {
count = input.readInt(); // read count
bits = new byte[getNumBytes(size)]; // allocate bits
input.readBytes(bits, 0, bits.length);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/BitVector.java
private void readSetDgaps(IndexInput input) throws IOException {
size = input.readInt(); // (re)read size
count = input.readInt(); // read count
bits = new byte[getNumBytes(size)]; // allocate bits
int last=0;
int n = count();
while (n>0) {
last += input.readVInt();
bits[last] = input.readByte();
n -= BYTE_COUNTS[bits[last] & 0xFF];
assert n >= 0;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/BitVector.java
private void readClearedDgaps(IndexInput input) throws IOException {
size = input.readInt(); // (re)read size
count = input.readInt(); // read count
bits = new byte[getNumBytes(size)]; // allocate bits
Arrays.fill(bits, (byte) 0xff);
clearUnusedBits();
int last=0;
int numCleared = size()-count();
while (numCleared>0) {
last += input.readVInt();
bits[last] = input.readByte();
numCleared -= 8-BYTE_COUNTS[bits[last] & 0xFF];
assert numCleared >= 0 || (last == (bits.length-1) && numCleared == -(8-(size&7)));
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java
Override
public void startDocument(int numVectorFields) throws IOException {
lastFieldName = null;
this.numVectorFields = numVectorFields;
tvx.writeLong(tvd.getFilePointer());
tvx.writeLong(tvf.getFilePointer());
tvd.writeVInt(numVectorFields);
fieldCount = 0;
fps = ArrayUtil.grow(fps, numVectorFields);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java
Override
public void startField(FieldInfo info, int numTerms, boolean positions, boolean offsets) throws IOException {
assert lastFieldName == null || info.name.compareTo(lastFieldName) > 0: "fieldName=" + info.name + " lastFieldName=" + lastFieldName;
lastFieldName = info.name;
this.positions = positions;
this.offsets = offsets;
lastTerm.length = 0;
fps[fieldCount++] = tvf.getFilePointer();
tvd.writeVInt(info.number);
tvf.writeVInt(numTerms);
byte bits = 0x0;
if (positions)
bits |= Lucene40TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR;
if (offsets)
bits |= Lucene40TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR;
tvf.writeByte(bits);
assert fieldCount <= numVectorFields;
if (fieldCount == numVectorFields) {
// last field of the document
// this is crazy because the file format is crazy!
for (int i = 1; i < fieldCount; i++) {
tvd.writeVLong(fps[i] - fps[i-1]);
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java
Override
public void startTerm(BytesRef term, int freq) throws IOException {
final int prefix = StringHelper.bytesDifference(lastTerm, term);
final int suffix = term.length - prefix;
tvf.writeVInt(prefix);
tvf.writeVInt(suffix);
tvf.writeBytes(term.bytes, term.offset + prefix, suffix);
tvf.writeVInt(freq);
lastTerm.copyBytes(term);
lastPosition = lastOffset = 0;
if (offsets && positions) {
// we might need to buffer if its a non-bulk merge
offsetStartBuffer = ArrayUtil.grow(offsetStartBuffer, freq);
offsetEndBuffer = ArrayUtil.grow(offsetEndBuffer, freq);
offsetIndex = 0;
offsetFreq = freq;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java
Override
public void addProx(int numProx, DataInput positions, DataInput offsets) throws IOException {
// TODO: technically we could just copy bytes and not re-encode if we knew the length...
if (positions != null) {
for (int i = 0; i < numProx; i++) {
tvf.writeVInt(positions.readVInt());
}
}
if (offsets != null) {
for (int i = 0; i < numProx; i++) {
tvf.writeVInt(offsets.readVInt());
tvf.writeVInt(offsets.readVInt());
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java
Override
public void addPosition(int position, int startOffset, int endOffset) throws IOException {
if (positions && offsets) {
// write position delta
tvf.writeVInt(position - lastPosition);
lastPosition = position;
// buffer offsets
offsetStartBuffer[offsetIndex] = startOffset;
offsetEndBuffer[offsetIndex] = endOffset;
offsetIndex++;
// dump buffer if we are done
if (offsetIndex == offsetFreq) {
for (int i = 0; i < offsetIndex; i++) {
tvf.writeVInt(offsetStartBuffer[i] - lastOffset);
tvf.writeVInt(offsetEndBuffer[i] - offsetStartBuffer[i]);
lastOffset = offsetEndBuffer[i];
}
}
} else if (positions) {
// write position delta
tvf.writeVInt(position - lastPosition);
lastPosition = position;
} else if (offsets) {
// write offset deltas
tvf.writeVInt(startOffset - lastOffset);
tvf.writeVInt(endOffset - startOffset);
lastOffset = endOffset;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java
private void addRawDocuments(Lucene40TermVectorsReader reader, int[] tvdLengths, int[] tvfLengths, int numDocs) throws IOException {
long tvdPosition = tvd.getFilePointer();
long tvfPosition = tvf.getFilePointer();
long tvdStart = tvdPosition;
long tvfStart = tvfPosition;
for(int i=0;i<numDocs;i++) {
tvx.writeLong(tvdPosition);
tvdPosition += tvdLengths[i];
tvx.writeLong(tvfPosition);
tvfPosition += tvfLengths[i];
}
tvd.copyBytes(reader.getTvdStream(), tvdPosition-tvdStart);
tvf.copyBytes(reader.getTvfStream(), tvfPosition-tvfStart);
assert tvd.getFilePointer() == tvdPosition;
assert tvf.getFilePointer() == tvfPosition;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java
Override
public final int merge(MergeState mergeState) throws IOException {
// Used for bulk-reading raw bytes for term vectors
int rawDocLengths[] = new int[MAX_RAW_MERGE_DOCS];
int rawDocLengths2[] = new int[MAX_RAW_MERGE_DOCS];
int idx = 0;
int numDocs = 0;
for (final MergeState.IndexReaderAndLiveDocs reader : mergeState.readers) {
final SegmentReader matchingSegmentReader = mergeState.matchingSegmentReaders[idx++];
Lucene40TermVectorsReader matchingVectorsReader = null;
if (matchingSegmentReader != null) {
TermVectorsReader vectorsReader = matchingSegmentReader.getTermVectorsReader();
if (vectorsReader != null && vectorsReader instanceof Lucene40TermVectorsReader) {
matchingVectorsReader = (Lucene40TermVectorsReader) vectorsReader;
}
}
if (reader.liveDocs != null) {
numDocs += copyVectorsWithDeletions(mergeState, matchingVectorsReader, reader, rawDocLengths, rawDocLengths2);
} else {
numDocs += copyVectorsNoDeletions(mergeState, matchingVectorsReader, reader, rawDocLengths, rawDocLengths2);
}
}
finish(mergeState.fieldInfos, numDocs);
return numDocs;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java
private int copyVectorsWithDeletions(MergeState mergeState,
final Lucene40TermVectorsReader matchingVectorsReader,
final MergeState.IndexReaderAndLiveDocs reader,
int rawDocLengths[],
int rawDocLengths2[])
throws IOException, MergeAbortedException {
final int maxDoc = reader.reader.maxDoc();
final Bits liveDocs = reader.liveDocs;
int totalNumDocs = 0;
if (matchingVectorsReader != null) {
// We can bulk-copy because the fieldInfos are "congruent"
for (int docNum = 0; docNum < maxDoc;) {
if (!liveDocs.get(docNum)) {
// skip deleted docs
++docNum;
continue;
}
// We can optimize this case (doing a bulk byte copy) since the field
// numbers are identical
int start = docNum, numDocs = 0;
do {
docNum++;
numDocs++;
if (docNum >= maxDoc) break;
if (!liveDocs.get(docNum)) {
docNum++;
break;
}
} while(numDocs < MAX_RAW_MERGE_DOCS);
matchingVectorsReader.rawDocs(rawDocLengths, rawDocLengths2, start, numDocs);
addRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs);
totalNumDocs += numDocs;
mergeState.checkAbort.work(300 * numDocs);
}
} else {
for (int docNum = 0; docNum < maxDoc; docNum++) {
if (!liveDocs.get(docNum)) {
// skip deleted docs
continue;
}
// NOTE: it's very important to first assign to vectors then pass it to
// termVectorsWriter.addAllDocVectors; see LUCENE-1282
Fields vectors = reader.reader.getTermVectors(docNum);
addAllDocVectors(vectors, mergeState.fieldInfos);
totalNumDocs++;
mergeState.checkAbort.work(300);
}
}
return totalNumDocs;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java
private int copyVectorsNoDeletions(MergeState mergeState,
final Lucene40TermVectorsReader matchingVectorsReader,
final MergeState.IndexReaderAndLiveDocs reader,
int rawDocLengths[],
int rawDocLengths2[])
throws IOException, MergeAbortedException {
final int maxDoc = reader.reader.maxDoc();
if (matchingVectorsReader != null) {
// We can bulk-copy because the fieldInfos are "congruent"
int docCount = 0;
while (docCount < maxDoc) {
int len = Math.min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
matchingVectorsReader.rawDocs(rawDocLengths, rawDocLengths2, docCount, len);
addRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, len);
docCount += len;
mergeState.checkAbort.work(300 * len);
}
} else {
for (int docNum = 0; docNum < maxDoc; docNum++) {
// NOTE: it's very important to first assign to vectors then pass it to
// termVectorsWriter.addAllDocVectors; see LUCENE-1282
Fields vectors = reader.reader.getTermVectors(docNum);
addAllDocVectors(vectors, mergeState.fieldInfos);
mergeState.checkAbort.work(300);
}
}
return maxDoc;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java
Override
public void finish(FieldInfos fis, int numDocs) throws IOException {
if (HEADER_LENGTH_INDEX+((long) numDocs)*16 != tvx.getFilePointer())
// This is most likely a bug in Sun JRE 1.6.0_04/_05;
// we detect that the bug has struck, here, and
// throw an exception to prevent the corruption from
// entering the index. See LUCENE-1282 for
// details.
throw new RuntimeException("tvx size mismatch: mergedDocs is " + numDocs + " but tvx size is " + tvx.getFilePointer() + " file=" + tvx.toString() + "; now aborting this merge to prevent index corruption");
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java
Override
public void close() throws IOException {
// make an effort to close all streams we can but remember and re-throw
// the first exception encountered in this process
IOUtils.close(tvx, tvd, tvf);
tvx = tvd = tvf = null;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java
Override
public Comparator<BytesRef> getComparator() throws IOException {
return BytesRef.getUTF8SortedAsUnicodeComparator();
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsFormat.java
Override
public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si,
FieldInfos fn, IOContext context) throws IOException {
return new Lucene40StoredFieldsReader(directory, si, fn, context);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40StoredFieldsFormat.java
Override
public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si,
IOContext context) throws IOException {
return new Lucene40StoredFieldsWriter(directory, si.name, context);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40SegmentInfoReader.java
Override
public SegmentInfo read(Directory dir, String segment, IOContext context) throws IOException {
final String fileName = IndexFileNames.segmentFileName(segment, "", Lucene40SegmentInfoFormat.SI_EXTENSION);
final IndexInput input = dir.openInput(fileName, context);
boolean success = false;
try {
CodecUtil.checkHeader(input, Lucene40SegmentInfoFormat.CODEC_NAME,
Lucene40SegmentInfoFormat.VERSION_START,
Lucene40SegmentInfoFormat.VERSION_CURRENT);
final String version = input.readString();
final int docCount = input.readInt();
final boolean isCompoundFile = input.readByte() == SegmentInfo.YES;
final Map<String,String> diagnostics = input.readStringStringMap();
final Map<String,String> attributes = input.readStringStringMap();
final Set<String> files = input.readStringSet();
final SegmentInfo si = new SegmentInfo(dir, version, segment, docCount, isCompoundFile,
null, diagnostics, Collections.unmodifiableMap(attributes));
si.setFiles(files);
success = true;
return si;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(input);
} else {
input.close();
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesFormat.java
Override
public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException {
return new Lucene40DocValuesConsumer(state, Lucene40DocValuesConsumer.DOC_VALUES_SEGMENT_SUFFIX);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesFormat.java
Override
public PerDocProducer docsProducer(SegmentReadState state) throws IOException {
return new Lucene40DocValuesProducer(state, Lucene40DocValuesConsumer.DOC_VALUES_SEGMENT_SUFFIX);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsBaseFormat.java
Override
public PostingsReaderBase postingsReaderBase(SegmentReadState state) throws IOException {
return new Lucene40PostingsReader(state.dir, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsBaseFormat.java
Override
public PostingsWriterBase postingsWriterBase(SegmentWriteState state) throws IOException {
return new Lucene40PostingsWriter(state);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java
Override
public void start(IndexOutput termsOut) throws IOException {
this.termsOut = termsOut;
CodecUtil.writeHeader(termsOut, CODEC, VERSION_CURRENT);
termsOut.writeInt(skipInterval); // write skipInterval
termsOut.writeInt(maxSkipLevels); // write maxSkipLevels
termsOut.writeInt(skipMinimum); // write skipMinimum
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java
Override
public void startDoc(int docID, int termDocFreq) throws IOException {
// if (DEBUG) System.out.println("SPW: startDoc seg=" + segment + " docID=" + docID + " tf=" + termDocFreq + " freqOut.fp=" + freqOut.getFilePointer());
final int delta = docID - lastDocID;
if (docID < 0 || (df > 0 && delta <= 0)) {
throw new CorruptIndexException("docs out of order (" + docID + " <= " + lastDocID + " ) (freqOut: " + freqOut + ")");
}
if ((++df % skipInterval) == 0) {
skipListWriter.setSkipData(lastDocID, storePayloads, lastPayloadLength, storeOffsets, lastOffsetLength);
skipListWriter.bufferSkip(df);
}
assert docID < totalNumDocs: "docID=" + docID + " totalNumDocs=" + totalNumDocs;
lastDocID = docID;
if (indexOptions == IndexOptions.DOCS_ONLY) {
freqOut.writeVInt(delta);
} else if (1 == termDocFreq) {
freqOut.writeVInt((delta<<1) | 1);
} else {
freqOut.writeVInt(delta<<1);
freqOut.writeVInt(termDocFreq);
}
lastPosition = 0;
lastOffset = 0;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java
Override
public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException {
//if (DEBUG) System.out.println("SPW: addPos pos=" + position + " payload=" + (payload == null ? "null" : (payload.length + " bytes")) + " proxFP=" + proxOut.getFilePointer());
assert indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 : "invalid indexOptions: " + indexOptions;
assert proxOut != null;
final int delta = position - lastPosition;
assert delta >= 0: "position=" + position + " lastPosition=" + lastPosition; // not quite right (if pos=0 is repeated twice we don't catch it)
lastPosition = position;
int payloadLength = 0;
if (storePayloads) {
payloadLength = payload == null ? 0 : payload.length;
if (payloadLength != lastPayloadLength) {
lastPayloadLength = payloadLength;
proxOut.writeVInt((delta<<1)|1);
proxOut.writeVInt(payloadLength);
} else {
proxOut.writeVInt(delta << 1);
}
} else {
proxOut.writeVInt(delta);
}
if (storeOffsets) {
// don't use startOffset - lastEndOffset, because this creates lots of negative vints for synonyms,
// and the numbers aren't that much smaller anyways.
int offsetDelta = startOffset - lastOffset;
int offsetLength = endOffset - startOffset;
if (offsetLength != lastOffsetLength) {
proxOut.writeVInt(offsetDelta << 1 | 1);
proxOut.writeVInt(offsetLength);
} else {
proxOut.writeVInt(offsetDelta << 1);
}
lastOffset = startOffset;
lastOffsetLength = offsetLength;
}
if (payloadLength > 0) {
proxOut.writeBytes(payload.bytes, payload.offset, payloadLength);
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java
Override
public void finishTerm(TermStats stats) throws IOException {
// if (DEBUG) System.out.println("SPW: finishTerm seg=" + segment + " freqStart=" + freqStart);
assert stats.docFreq > 0;
// TODO: wasteful we are counting this (counting # docs
// for this term) in two places?
assert stats.docFreq == df;
final int skipOffset;
if (df >= skipMinimum) {
skipOffset = (int) (skipListWriter.writeSkip(freqOut)-freqStart);
} else {
skipOffset = -1;
}
pendingTerms.add(new PendingTerm(freqStart, proxStart, skipOffset));
lastDocID = 0;
df = 0;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java
Override
public void flushTermsBlock(int start, int count) throws IOException {
//if (DEBUG) System.out.println("SPW: flushTermsBlock start=" + start + " count=" + count + " left=" + (pendingTerms.size()-count) + " pendingTerms.size()=" + pendingTerms.size());
if (count == 0) {
termsOut.writeByte((byte) 0);
return;
}
assert start <= pendingTerms.size();
assert count <= start;
final int limit = pendingTerms.size() - start + count;
final PendingTerm firstTerm = pendingTerms.get(limit - count);
// First term in block is abs coded:
bytesWriter.writeVLong(firstTerm.freqStart);
if (firstTerm.skipOffset != -1) {
assert firstTerm.skipOffset > 0;
bytesWriter.writeVInt(firstTerm.skipOffset);
}
if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
bytesWriter.writeVLong(firstTerm.proxStart);
}
long lastFreqStart = firstTerm.freqStart;
long lastProxStart = firstTerm.proxStart;
for(int idx=limit-count+1; idx<limit; idx++) {
final PendingTerm term = pendingTerms.get(idx);
//if (DEBUG) System.out.println(" write term freqStart=" + term.freqStart);
// The rest of the terms term are delta coded:
bytesWriter.writeVLong(term.freqStart - lastFreqStart);
lastFreqStart = term.freqStart;
if (term.skipOffset != -1) {
assert term.skipOffset > 0;
bytesWriter.writeVInt(term.skipOffset);
}
if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
bytesWriter.writeVLong(term.proxStart - lastProxStart);
lastProxStart = term.proxStart;
}
}
termsOut.writeVInt((int) bytesWriter.getFilePointer());
bytesWriter.writeTo(termsOut);
bytesWriter.reset();
// Remove the terms we just wrote:
pendingTerms.subList(limit-count, limit).clear();
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java
Override
public void close() throws IOException {
try {
freqOut.close();
} finally {
if (proxOut != null) {
proxOut.close();
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesConsumer.java
Override
protected Directory getDirectory() throws IOException {
// lazy init
if (directory == null) {
directory = new CompoundFileDirectory(mainDirectory,
IndexFileNames.segmentFileName(segmentName, segmentSuffix,
IndexFileNames.COMPOUND_FILE_EXTENSION), context, true);
}
return directory;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesConsumer.java
Override
public void close() throws IOException {
if (directory != null) {
directory.close();
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java
void seekTvx(final int docNum) throws IOException {
tvx.seek(docNum * 16L + HEADER_LENGTH_INDEX);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java
final void rawDocs(int[] tvdLengths, int[] tvfLengths, int startDocID, int numDocs) throws IOException {
if (tvx == null) {
Arrays.fill(tvdLengths, 0);
Arrays.fill(tvfLengths, 0);
return;
}
seekTvx(startDocID);
long tvdPosition = tvx.readLong();
tvd.seek(tvdPosition);
long tvfPosition = tvx.readLong();
tvf.seek(tvfPosition);
long lastTvdPosition = tvdPosition;
long lastTvfPosition = tvfPosition;
int count = 0;
while (count < numDocs) {
final int docID = startDocID + count + 1;
assert docID <= numTotalDocs;
if (docID < numTotalDocs) {
tvdPosition = tvx.readLong();
tvfPosition = tvx.readLong();
} else {
tvdPosition = tvd.length();
tvfPosition = tvf.length();
assert count == numDocs-1;
}
tvdLengths[count] = (int) (tvdPosition-lastTvdPosition);
tvfLengths[count] = (int) (tvfPosition-lastTvfPosition);
count++;
lastTvdPosition = tvdPosition;
lastTvfPosition = tvfPosition;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java
public void close() throws IOException {
IOUtils.close(tvx, tvd, tvf);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java
Override
public FieldsEnum iterator() throws IOException {
return new FieldsEnum() {
private int fieldUpto;
@Override
public String next() throws IOException {
if (fieldNumbers != null && fieldUpto < fieldNumbers.length) {
return fieldInfos.fieldInfo(fieldNumbers[fieldUpto++]).name;
} else {
return null;
}
}
@Override
public Terms terms() throws IOException {
return TVFields.this.terms(fieldInfos.fieldInfo(fieldNumbers[fieldUpto-1]).name);
}
};
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java
Override
public String next() throws IOException {
if (fieldNumbers != null && fieldUpto < fieldNumbers.length) {
return fieldInfos.fieldInfo(fieldNumbers[fieldUpto++]).name;
} else {
return null;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java
Override
public Terms terms() throws IOException {
return TVFields.this.terms(fieldInfos.fieldInfo(fieldNumbers[fieldUpto-1]).name);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java
Override
public Terms terms(String field) throws IOException {
final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
if (fieldInfo == null) {
// No such field
return null;
}
final Integer fieldIndex = fieldNumberToIndex.get(fieldInfo.number);
if (fieldIndex == null) {
// Term vectors were not indexed for this field
return null;
}
return new TVTerms(fieldFPs[fieldIndex]);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java
Override
public TermsEnum iterator(TermsEnum reuse) throws IOException {
TVTermsEnum termsEnum;
if (reuse instanceof TVTermsEnum) {
termsEnum = (TVTermsEnum) reuse;
if (!termsEnum.canReuse(tvf)) {
termsEnum = new TVTermsEnum();
}
} else {
termsEnum = new TVTermsEnum();
}
termsEnum.reset(numTerms, tvfFPStart);
return termsEnum;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java
public void reset(int numTerms, long tvfFPStart) throws IOException {
this.numTerms = numTerms;
nextTerm = 0;
tvf.seek(tvfFPStart);
final byte bits = tvf.readByte();
storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
storeOffsets = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
tvfFP = 1+tvfFPStart;
positions = null;
startOffsets = null;
endOffsets = null;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java
Override
public SeekStatus seekCeil(BytesRef text, boolean useCache)
throws IOException {
if (nextTerm != 0) {
final int cmp = text.compareTo(term);
if (cmp < 0) {
nextTerm = 0;
tvf.seek(tvfFP);
} else if (cmp == 0) {
return SeekStatus.FOUND;
}
}
while (next() != null) {
final int cmp = text.compareTo(term);
if (cmp < 0) {
return SeekStatus.NOT_FOUND;
} else if (cmp == 0) {
return SeekStatus.FOUND;
}
}
return SeekStatus.END;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java
Override
public BytesRef next() throws IOException {
if (nextTerm >= numTerms) {
return null;
}
term.copyBytes(lastTerm);
final int start = tvf.readVInt();
final int deltaLen = tvf.readVInt();
term.length = start + deltaLen;
term.grow(term.length);
tvf.readBytes(term.bytes, start, deltaLen);
freq = tvf.readVInt();
if (storePositions) {
// TODO: we could maybe reuse last array, if we can
// somehow be careful about consumer never using two
// D&PEnums at once...
positions = new int[freq];
int pos = 0;
for(int posUpto=0;posUpto<freq;posUpto++) {
pos += tvf.readVInt();
positions[posUpto] = pos;
}
}
if (storeOffsets) {
startOffsets = new int[freq];
endOffsets = new int[freq];
int offset = 0;
for(int posUpto=0;posUpto<freq;posUpto++) {
startOffsets[posUpto] = offset + tvf.readVInt();
offset = endOffsets[posUpto] = startOffsets[posUpto] + tvf.readVInt();
}
}
lastTerm.copyBytes(term);
nextTerm++;
return term;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java
Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs /* ignored */) throws IOException {
TVDocsEnum docsEnum;
if (reuse != null && reuse instanceof TVDocsEnum) {
docsEnum = (TVDocsEnum) reuse;
} else {
docsEnum = new TVDocsEnum();
}
docsEnum.reset(liveDocs, freq);
return docsEnum;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java
Override
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
if (needsOffsets && !storeOffsets) {
return null;
}
if (!storePositions && !storeOffsets) {
return null;
}
TVDocsAndPositionsEnum docsAndPositionsEnum;
if (reuse != null && reuse instanceof TVDocsAndPositionsEnum) {
docsAndPositionsEnum = (TVDocsAndPositionsEnum) reuse;
} else {
docsAndPositionsEnum = new TVDocsAndPositionsEnum();
}
docsAndPositionsEnum.reset(liveDocs, positions, startOffsets, endOffsets);
return docsAndPositionsEnum;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java
Override
public int freq() throws IOException {
return freq;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java
Override
public int freq() throws IOException {
if (positions != null) {
return positions.length;
} else {
assert startOffsets != null;
return startOffsets.length;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java
Override
public Fields get(int docID) throws IOException {
if (docID < 0 || docID >= numTotalDocs) {
throw new IllegalArgumentException("doID=" + docID + " is out of bounds [0.." + (numTotalDocs-1) + "]");
}
if (tvx != null) {
Fields fields = new TVFields(docID);
if (fields.size() == 0) {
// TODO: we can improve writer here, eg write 0 into
// tvx file, so we know on first read from tvx that
// this doc has no TVs
return null;
} else {
return fields;
}
} else {
return null;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java
Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
PostingsWriterBase docs = new Lucene40PostingsWriter(state);
// TODO: should we make the terms index more easily
// pluggable? Ie so that this codec would record which
// index impl was used, and switch on loading?
// Or... you must make a new Codec for this?
boolean success = false;
try {
FieldsConsumer ret = new BlockTreeTermsWriter(state, docs, minBlockSize, maxBlockSize);
success = true;
return ret;
} finally {
if (!success) {
docs.close();
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java
Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
PostingsReaderBase postings = new Lucene40PostingsReader(state.dir, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix);
boolean success = false;
try {
FieldsProducer ret = new BlockTreeTermsReader(
state.dir,
state.fieldInfos,
state.segmentInfo.name,
postings,
state.context,
state.segmentSuffix,
state.termsIndexDivisor);
success = true;
return ret;
} finally {
if (!success) {
postings.close();
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
Override
public void init(IndexInput termsIn) throws IOException {
// Make sure we are talking to the matching past writer
CodecUtil.checkHeader(termsIn, Lucene40PostingsWriter.CODEC,
Lucene40PostingsWriter.VERSION_START, Lucene40PostingsWriter.VERSION_START);
skipInterval = termsIn.readInt();
maxSkipLevels = termsIn.readInt();
skipMinimum = termsIn.readInt();
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
Override
public void close() throws IOException {
try {
if (freqIn != null) {
freqIn.close();
}
} finally {
if (proxIn != null) {
proxIn.close();
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
Override
public void readTermsBlock(IndexInput termsIn, FieldInfo fieldInfo, BlockTermState _termState) throws IOException {
final StandardTermState termState = (StandardTermState) _termState;
final int len = termsIn.readVInt();
// if (DEBUG) System.out.println(" SPR.readTermsBlock bytes=" + len + " ts=" + _termState);
if (termState.bytes == null) {
termState.bytes = new byte[ArrayUtil.oversize(len, 1)];
termState.bytesReader = new ByteArrayDataInput();
} else if (termState.bytes.length < len) {
termState.bytes = new byte[ArrayUtil.oversize(len, 1)];
}
termsIn.readBytes(termState.bytes, 0, len);
termState.bytesReader.reset(termState.bytes, 0, len);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
Override
public void nextTerm(FieldInfo fieldInfo, BlockTermState _termState)
throws IOException {
final StandardTermState termState = (StandardTermState) _termState;
// if (DEBUG) System.out.println("SPR: nextTerm seg=" + segment + " tbOrd=" + termState.termBlockOrd + " bytesReader.fp=" + termState.bytesReader.getPosition());
final boolean isFirstTerm = termState.termBlockOrd == 0;
if (isFirstTerm) {
termState.freqOffset = termState.bytesReader.readVLong();
} else {
termState.freqOffset += termState.bytesReader.readVLong();
}
/*
if (DEBUG) {
System.out.println(" dF=" + termState.docFreq);
System.out.println(" freqFP=" + termState.freqOffset);
}
*/
assert termState.freqOffset < freqIn.length();
if (termState.docFreq >= skipMinimum) {
termState.skipOffset = termState.bytesReader.readVInt();
// if (DEBUG) System.out.println(" skipOffset=" + termState.skipOffset + " vs freqIn.length=" + freqIn.length());
assert termState.freqOffset + termState.skipOffset < freqIn.length();
} else {
// undefined
}
if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
if (isFirstTerm) {
termState.proxOffset = termState.bytesReader.readVLong();
} else {
termState.proxOffset += termState.bytesReader.readVLong();
}
// if (DEBUG) System.out.println(" proxFP=" + termState.proxOffset);
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
Override
public DocsEnum docs(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
if (needsFreqs && fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY) {
return null;
} else if (canReuse(reuse, liveDocs)) {
// if (DEBUG) System.out.println("SPR.docs ts=" + termState);
return ((SegmentDocsEnumBase) reuse).reset(fieldInfo, (StandardTermState)termState);
}
return newDocsEnum(liveDocs, fieldInfo, (StandardTermState)termState);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
private DocsEnum newDocsEnum(Bits liveDocs, FieldInfo fieldInfo, StandardTermState termState) throws IOException {
if (liveDocs == null) {
return new AllDocsSegmentDocsEnum(freqIn).reset(fieldInfo, termState);
} else {
return new LiveDocsSegmentDocsEnum(freqIn, liveDocs).reset(fieldInfo, termState);
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
Override
public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs,
DocsAndPositionsEnum reuse, boolean needsOffsets)
throws IOException {
boolean hasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
if (needsOffsets && !hasOffsets) {
return null; // not available
}
// TODO: refactor
if (fieldInfo.hasPayloads() || hasOffsets) {
SegmentFullPositionsEnum docsEnum;
if (reuse == null || !(reuse instanceof SegmentFullPositionsEnum)) {
docsEnum = new SegmentFullPositionsEnum(freqIn, proxIn);
} else {
docsEnum = (SegmentFullPositionsEnum) reuse;
if (docsEnum.startFreqIn != freqIn) {
// If you are using ParellelReader, and pass in a
// reused DocsEnum, it could have come from another
// reader also using standard codec
docsEnum = new SegmentFullPositionsEnum(freqIn, proxIn);
}
}
return docsEnum.reset(fieldInfo, (StandardTermState) termState, liveDocs);
} else {
SegmentDocsAndPositionsEnum docsEnum;
if (reuse == null || !(reuse instanceof SegmentDocsAndPositionsEnum)) {
docsEnum = new SegmentDocsAndPositionsEnum(freqIn, proxIn);
} else {
docsEnum = (SegmentDocsAndPositionsEnum) reuse;
if (docsEnum.startFreqIn != freqIn) {
// If you are using ParellelReader, and pass in a
// reused DocsEnum, it could have come from another
// reader also using standard codec
docsEnum = new SegmentDocsAndPositionsEnum(freqIn, proxIn);
}
}
return docsEnum.reset(fieldInfo, (StandardTermState) termState, liveDocs);
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
DocsEnum reset(FieldInfo fieldInfo, StandardTermState termState) throws IOException {
indexOmitsTF = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY;
storePayloads = fieldInfo.hasPayloads();
storeOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
freqOffset = termState.freqOffset;
skipOffset = termState.skipOffset;
// TODO: for full enum case (eg segment merging) this
// seek is unnecessary; maybe we can avoid in such
// cases
freqIn.seek(termState.freqOffset);
limit = termState.docFreq;
assert limit > 0;
ord = 0;
doc = -1;
accum = 0;
// if (DEBUG) System.out.println(" sde limit=" + limit + " freqFP=" + freqOffset);
skipped = false;
start = -1;
count = 0;
maxBufferedDocId = -1;
return this;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
Override
public final int freq() throws IOException {
assert !indexOmitsTF;
return freq;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
Override
public final int advance(int target) throws IOException {
// last doc in our buffer is >= target, binary search + next()
if (++start < count && maxBufferedDocId >= target) {
if ((count-start) > 32) { // 32 seemed to be a sweetspot here so use binsearch if the pending results are a lot
start = binarySearch(count - 1, start, target, docs);
return nextDoc();
} else {
return linearScan(target);
}
}
start = count; // buffer is consumed
return doc = skipTo(target);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
final int readFreq(final IndexInput freqIn, final int code)
throws IOException {
if ((code & 1) != 0) { // if low bit is set
return 1; // freq is one
} else {
return freqIn.readVInt(); // else read freq
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
protected final int refill() throws IOException {
final int doc = nextUnreadDoc();
count = 0;
start = -1;
if (doc == NO_MORE_DOCS) {
return NO_MORE_DOCS;
}
final int numDocs = Math.min(docs.length, limit - ord);
ord += numDocs;
if (indexOmitsTF) {
count = fillDocs(numDocs);
} else {
count = fillDocsAndFreqs(numDocs);
}
maxBufferedDocId = count > 0 ? docs[count-1] : NO_MORE_DOCS;
return doc;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
private final int fillDocs(int size) throws IOException {
final IndexInput freqIn = this.freqIn;
final int docs[] = this.docs;
int docAc = accum;
for (int i = 0; i < size; i++) {
docAc += freqIn.readVInt();
docs[i] = docAc;
}
accum = docAc;
return size;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
private final int fillDocsAndFreqs(int size) throws IOException {
final IndexInput freqIn = this.freqIn;
final int docs[] = this.docs;
final int freqs[] = this.freqs;
int docAc = accum;
for (int i = 0; i < size; i++) {
final int code = freqIn.readVInt();
docAc += code >>> 1; // shift off low bit
freqs[i] = readFreq(freqIn, code);
docs[i] = docAc;
}
accum = docAc;
return size;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
private final int skipTo(int target) throws IOException {
if ((target - skipInterval) >= accum && limit >= skipMinimum) {
// There are enough docs in the posting to have
// skip data, and it isn't too close.
if (skipper == null) {
// This is the first time this enum has ever been used for skipping -- do lazy init
skipper = new Lucene40SkipListReader((IndexInput) freqIn.clone(), maxSkipLevels, skipInterval);
}
if (!skipped) {
// This is the first time this posting has
// skipped since reset() was called, so now we
// load the skip data for this posting
skipper.init(freqOffset + skipOffset,
freqOffset, 0,
limit, storePayloads, storeOffsets);
skipped = true;
}
final int newOrd = skipper.skipTo(target);
if (newOrd > ord) {
// Skipper moved
ord = newOrd;
accum = skipper.getDoc();
freqIn.seek(skipper.getFreqPointer());
}
}
return scanTo(target);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
Override
public final int nextDoc() throws IOException {
if (++start < count) {
freq = freqs[start];
return doc = docs[start];
}
return doc = refill();
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
Override
protected final int linearScan(int scanTo) throws IOException {
final int[] docs = this.docs;
final int upTo = count;
for (int i = start; i < upTo; i++) {
final int d = docs[i];
if (scanTo <= d) {
start = i;
freq = freqs[i];
return doc = docs[i];
}
}
return doc = refill();
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
Override
protected int scanTo(int target) throws IOException {
int docAcc = accum;
int frq = 1;
final IndexInput freqIn = this.freqIn;
final boolean omitTF = indexOmitsTF;
final int loopLimit = limit;
for (int i = ord; i < loopLimit; i++) {
int code = freqIn.readVInt();
if (omitTF) {
docAcc += code;
} else {
docAcc += code >>> 1; // shift off low bit
frq = readFreq(freqIn, code);
}
if (docAcc >= target) {
freq = frq;
ord = i + 1;
return accum = docAcc;
}
}
ord = limit;
freq = frq;
accum = docAcc;
return NO_MORE_DOCS;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
Override
protected final int nextUnreadDoc() throws IOException {
if (ord++ < limit) {
int code = freqIn.readVInt();
if (indexOmitsTF) {
accum += code;
} else {
accum += code >>> 1; // shift off low bit
freq = readFreq(freqIn, code);
}
return accum;
} else {
return NO_MORE_DOCS;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
Override
public final int nextDoc() throws IOException {
final Bits liveDocs = this.liveDocs;
for (int i = start+1; i < count; i++) {
int d = docs[i];
if (liveDocs.get(d)) {
start = i;
freq = freqs[i];
return doc = d;
}
}
start = count;
return doc = refill();
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
Override
protected final int linearScan(int scanTo) throws IOException {
final int[] docs = this.docs;
final int upTo = count;
final Bits liveDocs = this.liveDocs;
for (int i = start; i < upTo; i++) {
int d = docs[i];
if (scanTo <= d && liveDocs.get(d)) {
start = i;
freq = freqs[i];
return doc = docs[i];
}
}
return doc = refill();
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
Override
protected int scanTo(int target) throws IOException {
int docAcc = accum;
int frq = 1;
final IndexInput freqIn = this.freqIn;
final boolean omitTF = indexOmitsTF;
final int loopLimit = limit;
final Bits liveDocs = this.liveDocs;
for (int i = ord; i < loopLimit; i++) {
int code = freqIn.readVInt();
if (omitTF) {
docAcc += code;
} else {
docAcc += code >>> 1; // shift off low bit
frq = readFreq(freqIn, code);
}
if (docAcc >= target && liveDocs.get(docAcc)) {
freq = frq;
ord = i + 1;
return accum = docAcc;
}
}
ord = limit;
freq = frq;
accum = docAcc;
return NO_MORE_DOCS;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
Override
protected final int nextUnreadDoc() throws IOException {
int docAcc = accum;
int frq = 1;
final IndexInput freqIn = this.freqIn;
final boolean omitTF = indexOmitsTF;
final int loopLimit = limit;
final Bits liveDocs = this.liveDocs;
for (int i = ord; i < loopLimit; i++) {
int code = freqIn.readVInt();
if (omitTF) {
docAcc += code;
} else {
docAcc += code >>> 1; // shift off low bit
frq = readFreq(freqIn, code);
}
if (liveDocs.get(docAcc)) {
freq = frq;
ord = i + 1;
return accum = docAcc;
}
}
ord = limit;
freq = frq;
accum = docAcc;
return NO_MORE_DOCS;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
public SegmentDocsAndPositionsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits liveDocs) throws IOException {
assert fieldInfo.getIndexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
assert !fieldInfo.hasPayloads();
this.liveDocs = liveDocs;
// TODO: for full enum case (eg segment merging) this
// seek is unnecessary; maybe we can avoid in such
// cases
freqIn.seek(termState.freqOffset);
lazyProxPointer = termState.proxOffset;
limit = termState.docFreq;
assert limit > 0;
ord = 0;
doc = -1;
accum = 0;
position = 0;
skipped = false;
posPendingCount = 0;
freqOffset = termState.freqOffset;
proxOffset = termState.proxOffset;
skipOffset = termState.skipOffset;
// if (DEBUG) System.out.println("StandardR.D&PE reset seg=" + segment + " limit=" + limit + " freqFP=" + freqOffset + " proxFP=" + proxOffset);
return this;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
Override
public int nextDoc() throws IOException {
// if (DEBUG) System.out.println("SPR.nextDoc seg=" + segment + " freqIn.fp=" + freqIn.getFilePointer());
while(true) {
if (ord == limit) {
// if (DEBUG) System.out.println(" return END");
return doc = NO_MORE_DOCS;
}
ord++;
// Decode next doc/freq pair
final int code = freqIn.readVInt();
accum += code >>> 1; // shift off low bit
if ((code & 1) != 0) { // if low bit is set
freq = 1; // freq is one
} else {
freq = freqIn.readVInt(); // else read freq
}
posPendingCount += freq;
if (liveDocs == null || liveDocs.get(accum)) {
break;
}
}
position = 0;
// if (DEBUG) System.out.println(" return doc=" + doc);
return (doc = accum);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
Override
public int freq() throws IOException {
return freq;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
Override
public int advance(int target) throws IOException {
//System.out.println("StandardR.D&PE advance target=" + target);
if ((target - skipInterval) >= doc && limit >= skipMinimum) {
// There are enough docs in the posting to have
// skip data, and it isn't too close
if (skipper == null) {
// This is the first time this enum has ever been used for skipping -- do lazy init
skipper = new Lucene40SkipListReader((IndexInput) freqIn.clone(), maxSkipLevels, skipInterval);
}
if (!skipped) {
// This is the first time this posting has
// skipped, since reset() was called, so now we
// load the skip data for this posting
skipper.init(freqOffset+skipOffset,
freqOffset, proxOffset,
limit, false, false);
skipped = true;
}
final int newOrd = skipper.skipTo(target);
if (newOrd > ord) {
// Skipper moved
ord = newOrd;
doc = accum = skipper.getDoc();
freqIn.seek(skipper.getFreqPointer());
lazyProxPointer = skipper.getProxPointer();
posPendingCount = 0;
position = 0;
}
}
// Now, linear scan for the rest:
do {
nextDoc();
} while (target > doc);
return doc;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
Override
public int nextPosition() throws IOException {
if (lazyProxPointer != -1) {
proxIn.seek(lazyProxPointer);
lazyProxPointer = -1;
}
// scan over any docs that were iterated without their positions
if (posPendingCount > freq) {
position = 0;
while(posPendingCount != freq) {
if ((proxIn.readByte() & 0x80) == 0) {
posPendingCount--;
}
}
}
position += proxIn.readVInt();
posPendingCount--;
assert posPendingCount >= 0: "nextPosition() was called too many times (more than freq() times) posPendingCount=" + posPendingCount;
return position;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
Override
public int startOffset() throws IOException {
return -1;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
Override
public int endOffset() throws IOException {
return -1;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
Override
public BytesRef getPayload() throws IOException {
throw new IOException("No payloads exist for this field!");
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
public SegmentFullPositionsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits liveDocs) throws IOException {
storeOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
storePayloads = fieldInfo.hasPayloads();
assert fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
assert storePayloads || storeOffsets;
if (payload == null) {
payload = new BytesRef();
payload.bytes = new byte[1];
}
this.liveDocs = liveDocs;
// TODO: for full enum case (eg segment merging) this
// seek is unnecessary; maybe we can avoid in such
// cases
freqIn.seek(termState.freqOffset);
lazyProxPointer = termState.proxOffset;
limit = termState.docFreq;
ord = 0;
doc = -1;
accum = 0;
position = 0;
startOffset = 0;
skipped = false;
posPendingCount = 0;
payloadPending = false;
freqOffset = termState.freqOffset;
proxOffset = termState.proxOffset;
skipOffset = termState.skipOffset;
//System.out.println("StandardR.D&PE reset seg=" + segment + " limit=" + limit + " freqFP=" + freqOffset + " proxFP=" + proxOffset + " this=" + this);
return this;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
Override
public int nextDoc() throws IOException {
while(true) {
if (ord == limit) {
//System.out.println("StandardR.D&PE seg=" + segment + " nextDoc return doc=END");
return doc = NO_MORE_DOCS;
}
ord++;
// Decode next doc/freq pair
final int code = freqIn.readVInt();
accum += code >>> 1; // shift off low bit
if ((code & 1) != 0) { // if low bit is set
freq = 1; // freq is one
} else {
freq = freqIn.readVInt(); // else read freq
}
posPendingCount += freq;
if (liveDocs == null || liveDocs.get(accum)) {
break;
}
}
position = 0;
startOffset = 0;
//System.out.println("StandardR.D&PE nextDoc seg=" + segment + " return doc=" + doc);
return (doc = accum);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
Override
public int freq() throws IOException {
return freq;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
Override
public int advance(int target) throws IOException {
//System.out.println("StandardR.D&PE advance seg=" + segment + " target=" + target + " this=" + this);
if ((target - skipInterval) >= doc && limit >= skipMinimum) {
// There are enough docs in the posting to have
// skip data, and it isn't too close
if (skipper == null) {
// This is the first time this enum has ever been used for skipping -- do lazy init
skipper = new Lucene40SkipListReader((IndexInput) freqIn.clone(), maxSkipLevels, skipInterval);
}
if (!skipped) {
// This is the first time this posting has
// skipped, since reset() was called, so now we
// load the skip data for this posting
//System.out.println(" init skipper freqOffset=" + freqOffset + " skipOffset=" + skipOffset + " vs len=" + freqIn.length());
skipper.init(freqOffset+skipOffset,
freqOffset, proxOffset,
limit, storePayloads, storeOffsets);
skipped = true;
}
final int newOrd = skipper.skipTo(target);
if (newOrd > ord) {
// Skipper moved
ord = newOrd;
doc = accum = skipper.getDoc();
freqIn.seek(skipper.getFreqPointer());
lazyProxPointer = skipper.getProxPointer();
posPendingCount = 0;
position = 0;
startOffset = 0;
payloadPending = false;
payloadLength = skipper.getPayloadLength();
offsetLength = skipper.getOffsetLength();
}
}
// Now, linear scan for the rest:
do {
nextDoc();
} while (target > doc);
return doc;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
Override
public int nextPosition() throws IOException {
if (lazyProxPointer != -1) {
proxIn.seek(lazyProxPointer);
lazyProxPointer = -1;
}
if (payloadPending && payloadLength > 0) {
// payload of last position was never retrieved -- skip it
proxIn.seek(proxIn.getFilePointer() + payloadLength);
payloadPending = false;
}
// scan over any docs that were iterated without their positions
while(posPendingCount > freq) {
final int code = proxIn.readVInt();
if (storePayloads) {
if ((code & 1) != 0) {
// new payload length
payloadLength = proxIn.readVInt();
assert payloadLength >= 0;
}
assert payloadLength != -1;
}
if (storeOffsets) {
if ((proxIn.readVInt() & 1) != 0) {
// new offset length
offsetLength = proxIn.readVInt();
}
}
if (storePayloads) {
proxIn.seek(proxIn.getFilePointer() + payloadLength);
}
posPendingCount--;
position = 0;
startOffset = 0;
payloadPending = false;
//System.out.println("StandardR.D&PE skipPos");
}
// read next position
if (payloadPending && payloadLength > 0) {
// payload wasn't retrieved for last position
proxIn.seek(proxIn.getFilePointer()+payloadLength);
}
int code = proxIn.readVInt();
if (storePayloads) {
if ((code & 1) != 0) {
// new payload length
payloadLength = proxIn.readVInt();
assert payloadLength >= 0;
}
assert payloadLength != -1;
payloadPending = true;
code >>>= 1;
}
position += code;
if (storeOffsets) {
int offsetCode = proxIn.readVInt();
if ((offsetCode & 1) != 0) {
// new offset length
offsetLength = proxIn.readVInt();
}
startOffset += offsetCode >>> 1;
}
posPendingCount--;
assert posPendingCount >= 0: "nextPosition() was called too many times (more than freq() times) posPendingCount=" + posPendingCount;
//System.out.println("StandardR.D&PE nextPos return pos=" + position);
return position;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
Override
public int startOffset() throws IOException {
return storeOffsets ? startOffset : -1;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
Override
public int endOffset() throws IOException {
return storeOffsets ? startOffset + offsetLength : -1;
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
Override
public BytesRef getPayload() throws IOException {
if (storePayloads) {
assert lazyProxPointer == -1;
assert posPendingCount < freq;
if (!payloadPending) {
throw new IOException("Either no payload exists at this term position or an attempt was made to load it more than once.");
}
if (payloadLength > payload.bytes.length) {
payload.grow(payloadLength);
}
proxIn.readBytes(payload.bytes, 0, payloadLength);
payload.length = payloadLength;
payloadPending = false;
return payload;
} else {
throw new IOException("No payloads exist for this field!");
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsFormat.java
Override
public TermVectorsReader vectorsReader(Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context) throws IOException {
return new Lucene40TermVectorsReader(directory, segmentInfo, fieldInfos, context);
}
// in lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsFormat.java
Override
public TermVectorsWriter vectorsWriter(Directory directory, SegmentInfo segmentInfo, IOContext context) throws IOException {
return new Lucene40TermVectorsWriter(directory, segmentInfo.name, context);
}
// in lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListWriter.java
public void bufferSkip(int df) throws IOException {
int numLevels;
// determine max level
for (numLevels = 0; (df % skipInterval) == 0 && numLevels < numberOfSkipLevels; df /= skipInterval) {
numLevels++;
}
// in lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListWriter.java
public long writeSkip(IndexOutput output) throws IOException {
long skipPointer = output.getFilePointer();
//System.out.println("skipper.writeSkip fp=" + skipPointer);
if (skipBuffer == null || skipBuffer.length == 0) return skipPointer;
for (int level = numberOfSkipLevels - 1; level > 0; level--) {
long length = skipBuffer[level].getFilePointer();
if (length > 0) {
output.writeVLong(length);
skipBuffer[level].writeTo(output);
}
}
skipBuffer[0].writeTo(output);
return skipPointer;
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsFormat.java
Override
public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException {
return new SimpleTextNormsPerDocConsumer(state);
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsFormat.java
Override
public PerDocProducer docsProducer(SegmentReadState state) throws IOException {
return new SimpleTextNormsPerDocProducer(state,
BytesRef.getUTF8SortedAsUnicodeComparator());
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextNormsFormat.java
Override
protected DocValues getDocValuesForMerge(AtomicReader reader, FieldInfo info)
throws IOException {
return reader.normValues(info.name);
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesConsumer.java
Override
public void add(int docID, IndexableField value) throws IOException {
assert docID >= 0;
final int ord, vSize;
switch (type) {
case BYTES_FIXED_DEREF:
case BYTES_FIXED_SORTED:
case BYTES_FIXED_STRAIGHT:
vSize = value.binaryValue().length;
ord = hash.add(value.binaryValue());
break;
case BYTES_VAR_DEREF:
case BYTES_VAR_SORTED:
case BYTES_VAR_STRAIGHT:
vSize = -1;
ord = hash.add(value.binaryValue());
break;
case FIXED_INTS_16:
vSize = 2;
scratch.grow(2);
DocValuesArraySource.copyShort(scratch, value.numericValue().shortValue());
ord = hash.add(scratch);
break;
case FIXED_INTS_32:
vSize = 4;
scratch.grow(4);
DocValuesArraySource.copyInt(scratch, value.numericValue().intValue());
ord = hash.add(scratch);
break;
case FIXED_INTS_8:
vSize = 1;
scratch.grow(1);
scratch.bytes[scratch.offset] = value.numericValue().byteValue();
scratch.length = 1;
ord = hash.add(scratch);
break;
case FIXED_INTS_64:
vSize = 8;
scratch.grow(8);
DocValuesArraySource.copyLong(scratch, value.numericValue().longValue());
ord = hash.add(scratch);
break;
case VAR_INTS:
vSize = -1;
scratch.grow(8);
DocValuesArraySource.copyLong(scratch, value.numericValue().longValue());
ord = hash.add(scratch);
break;
case FLOAT_32:
vSize = 4;
scratch.grow(4);
DocValuesArraySource.copyInt(scratch,
Float.floatToRawIntBits(value.numericValue().floatValue()));
ord = hash.add(scratch);
break;
case FLOAT_64:
vSize = 8;
scratch.grow(8);
DocValuesArraySource.copyLong(scratch,
Double.doubleToRawLongBits(value.numericValue().doubleValue()));
ord = hash.add(scratch);
break;
default:
throw new RuntimeException("should not reach this line");
}
if (fixedSize == Integer.MIN_VALUE) {
assert maxDocId == -1;
fixedSize = vSize;
} else {
if (fixedSize != vSize) {
throw new IllegalArgumentException("value size must be " + fixedSize + " but was: " + vSize);
}
}
maxDocId = Math.max(docID, maxDocId);
ords = grow(ords, docID);
ords[docID] = (ord < 0 ? (-ord)-1 : ord) + 1;
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesConsumer.java
Override
public void finish(int docCount) throws IOException {
final String fileName = IndexFileNames.segmentFileName(segment, "",
segmentSuffix);
IndexOutput output = dir.createOutput(fileName, ctx);
boolean success = false;
BytesRef spare = new BytesRef();
try {
SimpleTextUtil.write(output, getHeader());
SimpleTextUtil.writeNewline(output);
SimpleTextUtil.write(output, VALUE_SIZE);
SimpleTextUtil.write(output, Integer.toString(this.fixedSize), scratch);
SimpleTextUtil.writeNewline(output);
prepareFlush(docCount);
for (int i = 0; i < docCount; i++) {
SimpleTextUtil.write(output, DOC);
SimpleTextUtil.write(output, Integer.toString(i), scratch);
SimpleTextUtil.writeNewline(output);
SimpleTextUtil.write(output, VALUE);
writeDoc(output, i, spare);
SimpleTextUtil.writeNewline(output);
}
SimpleTextUtil.write(output, END);
SimpleTextUtil.writeNewline(output);
success = true;
} finally {
hash.close();
if (success) {
IOUtils.close(output);
} else {
IOUtils.closeWhileHandlingException(output);
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesConsumer.java
protected void writeDoc(IndexOutput output, int docId, BytesRef spare) throws IOException {
int ord = ords[docId] - 1;
if (ord != -1) {
assert ord >= 0;
hash.get(ord, spare);
switch (type) {
case BYTES_FIXED_DEREF:
case BYTES_FIXED_SORTED:
case BYTES_FIXED_STRAIGHT:
case BYTES_VAR_DEREF:
case BYTES_VAR_SORTED:
case BYTES_VAR_STRAIGHT:
SimpleTextUtil.write(output, spare);
break;
case FIXED_INTS_16:
SimpleTextUtil.write(output,
Short.toString(DocValuesArraySource.asShort(spare)), scratch);
break;
case FIXED_INTS_32:
SimpleTextUtil.write(output,
Integer.toString(DocValuesArraySource.asInt(spare)), scratch);
break;
case VAR_INTS:
case FIXED_INTS_64:
SimpleTextUtil.write(output,
Long.toString(DocValuesArraySource.asLong(spare)), scratch);
break;
case FIXED_INTS_8:
assert spare.length == 1 : spare.length;
SimpleTextUtil.write(output,
Integer.toString(spare.bytes[spare.offset]), scratch);
break;
case FLOAT_32:
float valueFloat = Float.intBitsToFloat(DocValuesArraySource.asInt(spare));
SimpleTextUtil.write(output, Float.toString(valueFloat), scratch);
break;
case FLOAT_64:
double valueDouble = Double.longBitsToDouble(DocValuesArraySource
.asLong(spare));
SimpleTextUtil.write(output, Double.toString(valueDouble), scratch);
break;
default:
throw new IllegalArgumentException("unsupported type: " + type);
}
} else {
switch (type) {
case BYTES_FIXED_DEREF:
case BYTES_FIXED_SORTED:
case BYTES_FIXED_STRAIGHT:
if(zeroBytes == null) {
assert fixedSize > 0;
zeroBytes = new BytesRef(new byte[fixedSize]);
}
SimpleTextUtil.write(output, zeroBytes);
break;
case BYTES_VAR_DEREF:
case BYTES_VAR_SORTED:
case BYTES_VAR_STRAIGHT:
scratch.length = 0;
SimpleTextUtil.write(output, scratch);
break;
case FIXED_INTS_16:
case FIXED_INTS_32:
case FIXED_INTS_64:
case FIXED_INTS_8:
case VAR_INTS:
SimpleTextUtil.write(output, ZERO_INT);
break;
case FLOAT_32:
case FLOAT_64:
SimpleTextUtil.write(output, ZERO_DOUBLE);
break;
default:
throw new IllegalArgumentException("unsupported type: " + type);
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
Override
public String next() throws IOException {
while(true) {
SimpleTextUtil.readLine(in, scratch);
if (scratch.equals(END)) {
current = null;
return null;
}
if (StringHelper.startsWith(scratch, FIELD)) {
return current = new String(scratch.bytes, scratch.offset + FIELD.length, scratch.length - FIELD.length, "UTF-8");
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
Override
public Terms terms() throws IOException {
return SimpleTextFieldsReader.this.terms(current);
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
Override
public boolean seekExact(BytesRef text, boolean useCache /* ignored */) throws IOException {
final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> result = fstEnum.seekExact(text);
if (result != null) {
PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>> pair1 = result.output;
PairOutputs.Pair<Long,Long> pair2 = pair1.output2;
docsStart = pair1.output1;
docFreq = pair2.output1.intValue();
totalTermFreq = pair2.output2;
return true;
} else {
return false;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
Override
public SeekStatus seekCeil(BytesRef text, boolean useCache /* ignored */) throws IOException {
//System.out.println("seek to text=" + text.utf8ToString());
final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> result = fstEnum.seekCeil(text);
if (result == null) {
//System.out.println(" end");
return SeekStatus.END;
} else {
//System.out.println(" got text=" + term.utf8ToString());
PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>> pair1 = result.output;
PairOutputs.Pair<Long,Long> pair2 = pair1.output2;
docsStart = pair1.output1;
docFreq = pair2.output1.intValue();
totalTermFreq = pair2.output2;
if (result.input.equals(text)) {
//System.out.println(" match docsStart=" + docsStart);
return SeekStatus.FOUND;
} else {
//System.out.println(" not match docsStart=" + docsStart);
return SeekStatus.NOT_FOUND;
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
Override
public BytesRef next() throws IOException {
assert !ended;
final BytesRefFSTEnum.InputOutput<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> result = fstEnum.next();
if (result != null) {
PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>> pair1 = result.output;
PairOutputs.Pair<Long,Long> pair2 = pair1.output2;
docsStart = pair1.output1;
docFreq = pair2.output1.intValue();
totalTermFreq = pair2.output2;
return result.input;
} else {
return null;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
Override
public long ord() throws IOException {
throw new UnsupportedOperationException();
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
if (needsFreqs && indexOptions == IndexOptions.DOCS_ONLY) {
return null;
}
SimpleTextDocsEnum docsEnum;
if (reuse != null && reuse instanceof SimpleTextDocsEnum && ((SimpleTextDocsEnum) reuse).canReuse(SimpleTextFieldsReader.this.in)) {
docsEnum = (SimpleTextDocsEnum) reuse;
} else {
docsEnum = new SimpleTextDocsEnum();
}
return docsEnum.reset(docsStart, liveDocs, !needsFreqs);
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
Override
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
// Positions were not indexed
return null;
}
if (needsOffsets &&
indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) < 0) {
// Offsets were not indexed
return null;
}
SimpleTextDocsAndPositionsEnum docsAndPositionsEnum;
if (reuse != null && reuse instanceof SimpleTextDocsAndPositionsEnum && ((SimpleTextDocsAndPositionsEnum) reuse).canReuse(SimpleTextFieldsReader.this.in)) {
docsAndPositionsEnum = (SimpleTextDocsAndPositionsEnum) reuse;
} else {
docsAndPositionsEnum = new SimpleTextDocsAndPositionsEnum();
}
return docsAndPositionsEnum.reset(docsStart, liveDocs, indexOptions);
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
public SimpleTextDocsEnum reset(long fp, Bits liveDocs, boolean omitTF) throws IOException {
this.liveDocs = liveDocs;
in.seek(fp);
this.omitTF = omitTF;
docID = -1;
return this;
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
Override
public int freq() throws IOException {
assert !omitTF;
return tf;
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
Override
public int nextDoc() throws IOException {
if (docID == NO_MORE_DOCS) {
return docID;
}
boolean first = true;
int termFreq = 0;
while(true) {
final long lineStart = in.getFilePointer();
SimpleTextUtil.readLine(in, scratch);
if (StringHelper.startsWith(scratch, DOC)) {
if (!first && (liveDocs == null || liveDocs.get(docID))) {
in.seek(lineStart);
if (!omitTF) {
tf = termFreq;
}
return docID;
}
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+DOC.length, scratch.length-DOC.length, scratchUTF16);
docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
termFreq = 0;
first = false;
} else if (StringHelper.startsWith(scratch, FREQ)) {
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+FREQ.length, scratch.length-FREQ.length, scratchUTF16);
termFreq = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
} else if (StringHelper.startsWith(scratch, POS)) {
// skip termFreq++;
} else if (StringHelper.startsWith(scratch, START_OFFSET)) {
// skip
} else if (StringHelper.startsWith(scratch, END_OFFSET)) {
// skip
} else if (StringHelper.startsWith(scratch, PAYLOAD)) {
// skip
} else {
assert StringHelper.startsWith(scratch, TERM) || StringHelper.startsWith(scratch, FIELD) || StringHelper.startsWith(scratch, END): "scratch=" + scratch.utf8ToString();
if (!first && (liveDocs == null || liveDocs.get(docID))) {
in.seek(lineStart);
if (!omitTF) {
tf = termFreq;
}
return docID;
}
return docID = NO_MORE_DOCS;
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
Override
public int advance(int target) throws IOException {
// Naive -- better to index skip data
while(nextDoc() < target);
return docID;
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
Override
public int freq() throws IOException {
return tf;
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
Override
public int nextDoc() throws IOException {
boolean first = true;
in.seek(nextDocStart);
long posStart = 0;
while(true) {
final long lineStart = in.getFilePointer();
SimpleTextUtil.readLine(in, scratch);
//System.out.println("NEXT DOC: " + scratch.utf8ToString());
if (StringHelper.startsWith(scratch, DOC)) {
if (!first && (liveDocs == null || liveDocs.get(docID))) {
nextDocStart = lineStart;
in.seek(posStart);
return docID;
}
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+DOC.length, scratch.length-DOC.length, scratchUTF16);
docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
tf = 0;
first = false;
} else if (StringHelper.startsWith(scratch, FREQ)) {
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+FREQ.length, scratch.length-FREQ.length, scratchUTF16);
tf = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
posStart = in.getFilePointer();
} else if (StringHelper.startsWith(scratch, POS)) {
// skip
} else if (StringHelper.startsWith(scratch, START_OFFSET)) {
// skip
} else if (StringHelper.startsWith(scratch, END_OFFSET)) {
// skip
} else if (StringHelper.startsWith(scratch, PAYLOAD)) {
// skip
} else {
assert StringHelper.startsWith(scratch, TERM) || StringHelper.startsWith(scratch, FIELD) || StringHelper.startsWith(scratch, END);
if (!first && (liveDocs == null || liveDocs.get(docID))) {
nextDocStart = lineStart;
in.seek(posStart);
return docID;
}
return docID = NO_MORE_DOCS;
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
Override
public int advance(int target) throws IOException {
// Naive -- better to index skip data
while(nextDoc() < target);
return docID;
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
Override
public int nextPosition() throws IOException {
final int pos;
if (readPositions) {
SimpleTextUtil.readLine(in, scratch);
assert StringHelper.startsWith(scratch, POS): "got line=" + scratch.utf8ToString();
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+POS.length, scratch.length-POS.length, scratchUTF16_2);
pos = ArrayUtil.parseInt(scratchUTF16_2.chars, 0, scratchUTF16_2.length);
} else {
pos = -1;
}
if (readOffsets) {
SimpleTextUtil.readLine(in, scratch);
assert StringHelper.startsWith(scratch, START_OFFSET): "got line=" + scratch.utf8ToString();
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+START_OFFSET.length, scratch.length-START_OFFSET.length, scratchUTF16_2);
startOffset = ArrayUtil.parseInt(scratchUTF16_2.chars, 0, scratchUTF16_2.length);
SimpleTextUtil.readLine(in, scratch);
assert StringHelper.startsWith(scratch, END_OFFSET): "got line=" + scratch.utf8ToString();
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+END_OFFSET.length, scratch.length-END_OFFSET.length, scratchUTF16_2);
endOffset = ArrayUtil.parseInt(scratchUTF16_2.chars, 0, scratchUTF16_2.length);
}
final long fp = in.getFilePointer();
SimpleTextUtil.readLine(in, scratch);
if (StringHelper.startsWith(scratch, PAYLOAD)) {
final int len = scratch.length - PAYLOAD.length;
if (scratch2.bytes.length < len) {
scratch2.grow(len);
}
System.arraycopy(scratch.bytes, PAYLOAD.length, scratch2.bytes, 0, len);
scratch2.length = len;
payload = scratch2;
} else {
payload = null;
in.seek(fp);
}
return pos;
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
Override
public int startOffset() throws IOException {
return startOffset;
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
Override
public int endOffset() throws IOException {
return endOffset;
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
private void loadTerms() throws IOException {
PositiveIntOutputs posIntOutputs = PositiveIntOutputs.getSingleton(false);
final Builder<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> b;
final PairOutputs<Long,Long> outputsInner = new PairOutputs<Long,Long>(posIntOutputs, posIntOutputs);
final PairOutputs<Long,PairOutputs.Pair<Long,Long>> outputs = new PairOutputs<Long,PairOutputs.Pair<Long,Long>>(posIntOutputs,
outputsInner);
b = new Builder<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>>(FST.INPUT_TYPE.BYTE1, outputs);
IndexInput in = (IndexInput) SimpleTextFieldsReader.this.in.clone();
in.seek(termsStart);
final BytesRef lastTerm = new BytesRef(10);
long lastDocsStart = -1;
int docFreq = 0;
long totalTermFreq = 0;
OpenBitSet visitedDocs = new OpenBitSet();
final IntsRef scratchIntsRef = new IntsRef();
while(true) {
SimpleTextUtil.readLine(in, scratch);
if (scratch.equals(END) || StringHelper.startsWith(scratch, FIELD)) {
if (lastDocsStart != -1) {
b.add(Util.toIntsRef(lastTerm, scratchIntsRef),
outputs.newPair(lastDocsStart,
outputsInner.newPair((long) docFreq, totalTermFreq)));
sumTotalTermFreq += totalTermFreq;
}
break;
} else if (StringHelper.startsWith(scratch, DOC)) {
docFreq++;
sumDocFreq++;
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+DOC.length, scratch.length-DOC.length, scratchUTF16);
int docID = ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
visitedDocs.set(docID);
} else if (StringHelper.startsWith(scratch, FREQ)) {
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+FREQ.length, scratch.length-FREQ.length, scratchUTF16);
totalTermFreq += ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
} else if (StringHelper.startsWith(scratch, TERM)) {
if (lastDocsStart != -1) {
b.add(Util.toIntsRef(lastTerm, scratchIntsRef), outputs.newPair(lastDocsStart,
outputsInner.newPair((long) docFreq, totalTermFreq)));
}
lastDocsStart = in.getFilePointer();
final int len = scratch.length - TERM.length;
if (len > lastTerm.length) {
lastTerm.grow(len);
}
System.arraycopy(scratch.bytes, TERM.length, lastTerm.bytes, 0, len);
lastTerm.length = len;
docFreq = 0;
sumTotalTermFreq += totalTermFreq;
totalTermFreq = 0;
termCount++;
}
}
docCount = (int) visitedDocs.cardinality();
fst = b.finish();
/*
PrintStream ps = new PrintStream("out.dot");
fst.toDot(ps);
ps.close();
System.out.println("SAVED out.dot");
*/
//System.out.println("FST " + fst.sizeInBytes());
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
Override
public TermsEnum iterator(TermsEnum reuse) throws IOException {
if (fst != null) {
return new SimpleTextTermsEnum(fst, indexOptions);
} else {
return TermsEnum.EMPTY;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
Override
public long getSumDocFreq() throws IOException {
return sumDocFreq;
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
Override
public int getDocCount() throws IOException {
return docCount;
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
Override
public FieldsEnum iterator() throws IOException {
return new SimpleTextFieldsEnum();
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
Override
synchronized public Terms terms(String field) throws IOException {
Terms terms = termsCache.get(field);
if (terms == null) {
SimpleTextFieldsEnum fe = (SimpleTextFieldsEnum) iterator();
String fieldUpto;
while((fieldUpto = fe.next()) != null) {
if (fieldUpto.equals(field)) {
terms = new SimpleTextTerms(field, fe.in.getFilePointer());
break;
}
}
termsCache.put(field, terms);
}
return terms;
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
Override
public void close() throws IOException {
in.close();
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPerDocProducer.java
protected DocValues loadDocValues(int docCount, Directory dir, String id,
DocValues.Type type, IOContext context) throws IOException {
return new SimpleTextDocValues(dir, context, type, id, docCount, comp, segmentSuffix);
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPerDocProducer.java
Override
protected void closeInternal(Collection<? extends Closeable> closeables)
throws IOException {
IOUtils.close(closeables);
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPerDocProducer.java
Override
public void close() throws IOException {
try {
super.close();
} finally {
IOUtils.close(input);
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPerDocProducer.java
Override
public Source load() throws IOException {
boolean success = false;
IndexInput in = (IndexInput) input.clone();
try {
Source source = null;
switch (type) {
case BYTES_FIXED_DEREF:
case BYTES_FIXED_SORTED:
case BYTES_FIXED_STRAIGHT:
case BYTES_VAR_DEREF:
case BYTES_VAR_SORTED:
case BYTES_VAR_STRAIGHT:
source = read(in, new ValueReader(type, docCount, comp));
break;
case FIXED_INTS_16:
case FIXED_INTS_32:
case VAR_INTS:
case FIXED_INTS_64:
case FIXED_INTS_8:
case FLOAT_32:
case FLOAT_64:
source = read(in, new ValueReader(type, docCount, null));
break;
default:
throw new IllegalArgumentException("unknown type: " + type);
}
assert source != null;
success = true;
return source;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(in);
} else {
IOUtils.close(in);
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPerDocProducer.java
private int readHeader(IndexInput in) throws IOException {
BytesRef scratch = new BytesRef();
SimpleTextUtil.readLine(in, scratch);
assert StringHelper.startsWith(scratch, HEADER);
SimpleTextUtil.readLine(in, scratch);
assert StringHelper.startsWith(scratch, VALUE_SIZE);
return Integer.parseInt(readString(scratch.offset + VALUE_SIZE.length,
scratch));
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPerDocProducer.java
private Source read(IndexInput in, ValueReader reader) throws IOException {
BytesRef scratch = new BytesRef();
for (int i = 0; i < docCount; i++) {
SimpleTextUtil.readLine(in, scratch);
assert StringHelper.startsWith(scratch, DOC) : scratch.utf8ToString();
SimpleTextUtil.readLine(in, scratch);
assert StringHelper.startsWith(scratch, VALUE);
reader.fromString(i, scratch, scratch.offset + VALUE.length);
}
SimpleTextUtil.readLine(in, scratch);
assert scratch.equals(END);
return reader.getSource();
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPerDocProducer.java
Override
public Source getDirectSource() throws IOException {
return this.getSource();
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosFormat.java
Override
public FieldInfosReader getFieldInfosReader() throws IOException {
return reader;
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosFormat.java
Override
public FieldInfosWriter getFieldInfosWriter() throws IOException {
return writer;
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java
Override
public void startDocument(int numVectorFields) throws IOException {
write(DOC);
write(Integer.toString(numDocsWritten));
newLine();
write(NUMFIELDS);
write(Integer.toString(numVectorFields));
newLine();
numDocsWritten++;
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java
Override
public void startField(FieldInfo info, int numTerms, boolean positions, boolean offsets) throws IOException {
write(FIELD);
write(Integer.toString(info.number));
newLine();
write(FIELDNAME);
write(info.name);
newLine();
write(FIELDPOSITIONS);
write(Boolean.toString(positions));
newLine();
write(FIELDOFFSETS);
write(Boolean.toString(offsets));
newLine();
write(FIELDTERMCOUNT);
write(Integer.toString(numTerms));
newLine();
this.positions = positions;
this.offsets = offsets;
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java
Override
public void startTerm(BytesRef term, int freq) throws IOException {
write(TERMTEXT);
write(term);
newLine();
write(TERMFREQ);
write(Integer.toString(freq));
newLine();
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java
Override
public void addPosition(int position, int startOffset, int endOffset) throws IOException {
assert positions || offsets;
if (positions) {
write(POSITION);
write(Integer.toString(position));
newLine();
}
if (offsets) {
write(STARTOFFSET);
write(Integer.toString(startOffset));
newLine();
write(ENDOFFSET);
write(Integer.toString(endOffset));
newLine();
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java
Override
public void finish(FieldInfos fis, int numDocs) throws IOException {
if (numDocsWritten != numDocs) {
throw new RuntimeException("mergeVectors produced an invalid result: mergedDocs is " + numDocs + " but vec numDocs is " + numDocsWritten + " file=" + out.toString() + "; now aborting this merge to prevent index corruption");
}
write(END);
newLine();
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java
Override
public void close() throws IOException {
try {
IOUtils.close(out);
} finally {
out = null;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java
Override
public Comparator<BytesRef> getComparator() throws IOException {
return BytesRef.getUTF8SortedAsUnicodeComparator();
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java
private void write(String s) throws IOException {
SimpleTextUtil.write(out, s, scratch);
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java
private void write(BytesRef bytes) throws IOException {
SimpleTextUtil.write(out, bytes);
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java
private void newLine() throws IOException {
SimpleTextUtil.writeNewline(out);
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosReader.java
Override
public FieldInfos read(Directory directory, String segmentName, IOContext iocontext) throws IOException {
final String fileName = IndexFileNames.segmentFileName(segmentName, "", FIELD_INFOS_EXTENSION);
IndexInput input = directory.openInput(fileName, iocontext);
BytesRef scratch = new BytesRef();
try {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch, NUMFIELDS);
final int size = Integer.parseInt(readString(NUMFIELDS.length, scratch));
FieldInfo infos[] = new FieldInfo[size];
for (int i = 0; i < size; i++) {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch, NAME);
String name = readString(NAME.length, scratch);
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch, NUMBER);
int fieldNumber = Integer.parseInt(readString(NUMBER.length, scratch));
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch, ISINDEXED);
boolean isIndexed = Boolean.parseBoolean(readString(ISINDEXED.length, scratch));
final IndexOptions indexOptions;
if (isIndexed) {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch, INDEXOPTIONS);
indexOptions = IndexOptions.valueOf(readString(INDEXOPTIONS.length, scratch));
} else {
indexOptions = null;
}
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch, STORETV);
boolean storeTermVector = Boolean.parseBoolean(readString(STORETV.length, scratch));
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch, PAYLOADS);
boolean storePayloads = Boolean.parseBoolean(readString(PAYLOADS.length, scratch));
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch, NORMS);
boolean omitNorms = !Boolean.parseBoolean(readString(NORMS.length, scratch));
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch, NORMS_TYPE);
String nrmType = readString(NORMS_TYPE.length, scratch);
final DocValues.Type normsType = docValuesType(nrmType);
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch, DOCVALUES);
String dvType = readString(DOCVALUES.length, scratch);
final DocValues.Type docValuesType = docValuesType(dvType);
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch, NUM_ATTS);
int numAtts = Integer.parseInt(readString(NUM_ATTS.length, scratch));
Map<String,String> atts = new HashMap<String,String>();
for (int j = 0; j < numAtts; j++) {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch, ATT_KEY);
String key = readString(ATT_KEY.length, scratch);
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch, ATT_VALUE);
String value = readString(ATT_VALUE.length, scratch);
atts.put(key, value);
}
infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector,
omitNorms, storePayloads, indexOptions, docValuesType, normsType, Collections.unmodifiableMap(atts));
}
if (input.getFilePointer() != input.length()) {
throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
}
return new FieldInfos(infos);
} finally {
input.close();
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java
private void write(String s) throws IOException {
SimpleTextUtil.write(out, s, scratch);
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java
private void write(BytesRef b) throws IOException {
SimpleTextUtil.write(out, b);
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java
private void newline() throws IOException {
SimpleTextUtil.writeNewline(out);
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java
Override
public TermsConsumer addField(FieldInfo field) throws IOException {
write(FIELD);
write(field.name);
newline();
return new SimpleTextTermsWriter(field);
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java
Override
public PostingsConsumer startTerm(BytesRef term) throws IOException {
return postingsWriter.reset(term);
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java
Override
public void finishTerm(BytesRef term, TermStats stats) throws IOException {
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java
Override
public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException {
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java
Override
public void startDoc(int docID, int termDocFreq) throws IOException {
if (!wroteTerm) {
// we lazily do this, in case the term had zero docs
write(TERM);
write(term);
newline();
wroteTerm = true;
}
write(DOC);
write(Integer.toString(docID));
newline();
if (indexOptions != IndexOptions.DOCS_ONLY) {
write(FREQ);
write(Integer.toString(termDocFreq));
newline();
}
lastEndOffset = -1;
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java
Override
public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException {
if (writePositions) {
write(POS);
write(Integer.toString(position));
newline();
}
if (writeOffsets) {
assert endOffset >= startOffset;
assert startOffset >= lastEndOffset: "startOffset=" + startOffset + " lastEndOffset=" + lastEndOffset;
lastEndOffset = endOffset;
write(START_OFFSET);
write(Integer.toString(startOffset));
newline();
write(END_OFFSET);
write(Integer.toString(endOffset));
newline();
}
if (payload != null && payload.length > 0) {
assert payload.length != 0;
write(PAYLOAD);
write(payload);
newline();
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsWriter.java
Override
public void close() throws IOException {
try {
write(END);
newline();
} finally {
out.close();
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsFormat.java
Override
public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException {;
return new SimpleTextStoredFieldsReader(directory, si, fn, context);
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsFormat.java
Override
public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si, IOContext context) throws IOException {
return new SimpleTextStoredFieldsWriter(directory, si.name, context);
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesFormat.java
Override
public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException {
return new SimpleTextPerDocConsumer(state, DOC_VALUES_SEG_SUFFIX);
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesFormat.java
Override
public PerDocProducer docsProducer(SegmentReadState state) throws IOException {
return new SimpleTextPerDocProducer(state, BytesRef.getUTF8SortedAsUnicodeComparator(), DOC_VALUES_SEG_SUFFIX);
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoReader.java
Override
public SegmentInfo read(Directory directory, String segmentName, IOContext context) throws IOException {
BytesRef scratch = new BytesRef();
String segFileName = IndexFileNames.segmentFileName(segmentName, "", SimpleTextSegmentInfoFormat.SI_EXTENSION);
IndexInput input = directory.openInput(segFileName, context);
boolean success = false;
try {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch, SI_VERSION);
final String version = readString(SI_VERSION.length, scratch);
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch, SI_DOCCOUNT);
final int docCount = Integer.parseInt(readString(SI_DOCCOUNT.length, scratch));
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch, SI_USECOMPOUND);
final boolean isCompoundFile = Boolean.parseBoolean(readString(SI_USECOMPOUND.length, scratch));
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch, SI_NUM_DIAG);
int numDiag = Integer.parseInt(readString(SI_NUM_DIAG.length, scratch));
Map<String,String> diagnostics = new HashMap<String,String>();
for (int i = 0; i < numDiag; i++) {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch, SI_DIAG_KEY);
String key = readString(SI_DIAG_KEY.length, scratch);
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch, SI_DIAG_VALUE);
String value = readString(SI_DIAG_VALUE.length, scratch);
diagnostics.put(key, value);
}
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch, SI_NUM_ATTS);
int numAtts = Integer.parseInt(readString(SI_NUM_ATTS.length, scratch));
Map<String,String> attributes = new HashMap<String,String>();
for (int i = 0; i < numAtts; i++) {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch, SI_ATT_KEY);
String key = readString(SI_ATT_KEY.length, scratch);
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch, SI_ATT_VALUE);
String value = readString(SI_ATT_VALUE.length, scratch);
attributes.put(key, value);
}
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch, SI_NUM_FILES);
int numFiles = Integer.parseInt(readString(SI_NUM_FILES.length, scratch));
Set<String> files = new HashSet<String>();
for (int i = 0; i < numFiles; i++) {
SimpleTextUtil.readLine(input, scratch);
assert StringHelper.startsWith(scratch, SI_FILE);
String fileName = readString(SI_FILE.length, scratch);
files.add(fileName);
}
SegmentInfo info = new SegmentInfo(directory, version, segmentName, docCount,
isCompoundFile, null, diagnostics, Collections.unmodifiableMap(attributes));
info.setFiles(files);
success = true;
return info;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(input);
} else {
input.close();
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPerDocConsumer.java
Override
public void close() throws IOException {
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPerDocConsumer.java
Override
public DocValuesConsumer addValuesField(Type type, FieldInfo field)
throws IOException {
return new SimpleTextDocValuesConsumer(SimpleTextDocValuesFormat.docValuesId(state.segmentInfo.name,
field.number), state.directory, state.context, type, segmentSuffix);
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsFormat.java
Override
public TermVectorsReader vectorsReader(Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext context) throws IOException {
return new SimpleTextTermVectorsReader(directory, segmentInfo, context);
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsFormat.java
Override
public TermVectorsWriter vectorsWriter(Directory directory, SegmentInfo segmentInfo, IOContext context) throws IOException {
return new SimpleTextTermVectorsWriter(directory, segmentInfo.name, context);
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
private void readIndex() throws IOException {
offsets = new ArrayList<Long>();
while (!scratch.equals(END)) {
readLine();
if (StringHelper.startsWith(scratch, DOC)) {
offsets.add(in.getFilePointer());
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
Override
public Fields get(int doc) throws IOException {
// TestTV tests for this in testBadParams... but is this
// really guaranteed by the API?
if (doc < 0 || doc >= offsets.size()) {
throw new IllegalArgumentException("doc id out of range");
}
SortedMap<String,SimpleTVTerms> fields = new TreeMap<String,SimpleTVTerms>();
in.seek(offsets.get(doc));
readLine();
assert StringHelper.startsWith(scratch, NUMFIELDS);
int numFields = parseIntAt(NUMFIELDS.length);
if (numFields == 0) {
return null; // no vectors for this doc
}
for (int i = 0; i < numFields; i++) {
readLine();
assert StringHelper.startsWith(scratch, FIELD);
// skip fieldNumber:
parseIntAt(FIELD.length);
readLine();
assert StringHelper.startsWith(scratch, FIELDNAME);
String fieldName = readString(FIELDNAME.length, scratch);
readLine();
assert StringHelper.startsWith(scratch, FIELDPOSITIONS);
boolean positions = Boolean.parseBoolean(readString(FIELDPOSITIONS.length, scratch));
readLine();
assert StringHelper.startsWith(scratch, FIELDOFFSETS);
boolean offsets = Boolean.parseBoolean(readString(FIELDOFFSETS.length, scratch));
readLine();
assert StringHelper.startsWith(scratch, FIELDTERMCOUNT);
int termCount = parseIntAt(FIELDTERMCOUNT.length);
SimpleTVTerms terms = new SimpleTVTerms();
fields.put(fieldName, terms);
for (int j = 0; j < termCount; j++) {
readLine();
assert StringHelper.startsWith(scratch, TERMTEXT);
BytesRef term = new BytesRef();
int termLength = scratch.length - TERMTEXT.length;
term.grow(termLength);
term.length = termLength;
System.arraycopy(scratch.bytes, scratch.offset+TERMTEXT.length, term.bytes, term.offset, termLength);
SimpleTVPostings postings = new SimpleTVPostings();
terms.terms.put(term, postings);
readLine();
assert StringHelper.startsWith(scratch, TERMFREQ);
postings.freq = parseIntAt(TERMFREQ.length);
if (positions || offsets) {
if (positions) {
postings.positions = new int[postings.freq];
}
if (offsets) {
postings.startOffsets = new int[postings.freq];
postings.endOffsets = new int[postings.freq];
}
for (int k = 0; k < postings.freq; k++) {
if (positions) {
readLine();
assert StringHelper.startsWith(scratch, POSITION);
postings.positions[k] = parseIntAt(POSITION.length);
}
if (offsets) {
readLine();
assert StringHelper.startsWith(scratch, STARTOFFSET);
postings.startOffsets[k] = parseIntAt(STARTOFFSET.length);
readLine();
assert StringHelper.startsWith(scratch, ENDOFFSET);
postings.endOffsets[k] = parseIntAt(ENDOFFSET.length);
}
}
}
}
}
return new SimpleTVFields(fields);
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
Override
public void close() throws IOException {
try {
IOUtils.close(in);
} finally {
in = null;
offsets = null;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
private void readLine() throws IOException {
SimpleTextUtil.readLine(in, scratch);
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
private int parseIntAt(int offset) throws IOException {
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+offset, scratch.length-offset, scratchUTF16);
return ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
Override
public FieldsEnum iterator() throws IOException {
return new FieldsEnum() {
private Iterator<Map.Entry<String,SimpleTVTerms>> iterator = fields.entrySet().iterator();
private Map.Entry<String,SimpleTVTerms> current = null;
@Override
public String next() throws IOException {
if (!iterator.hasNext()) {
return null;
} else {
current = iterator.next();
return current.getKey();
}
}
@Override
public Terms terms() throws IOException {
return current.getValue();
}
};
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
Override
public String next() throws IOException {
if (!iterator.hasNext()) {
return null;
} else {
current = iterator.next();
return current.getKey();
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
Override
public Terms terms() throws IOException {
return current.getValue();
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
Override
public Terms terms(String field) throws IOException {
return fields.get(field);
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
Override
public int size() throws IOException {
return fields.size();
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
Override
public TermsEnum iterator(TermsEnum reuse) throws IOException {
// TODO: reuse
return new SimpleTVTermsEnum(terms);
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
Override
public Comparator<BytesRef> getComparator() throws IOException {
return BytesRef.getUTF8SortedAsUnicodeComparator();
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
Override
public long size() throws IOException {
return terms.size();
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
Override
public long getSumTotalTermFreq() throws IOException {
return -1;
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
Override
public long getSumDocFreq() throws IOException {
return terms.size();
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
Override
public int getDocCount() throws IOException {
return 1;
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
Override
public SeekStatus seekCeil(BytesRef text, boolean useCache) throws IOException {
iterator = terms.tailMap(text).entrySet().iterator();
if (!iterator.hasNext()) {
return SeekStatus.END;
} else {
return next().equals(text) ? SeekStatus.FOUND : SeekStatus.NOT_FOUND;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
Override
public void seekExact(long ord) throws IOException {
throw new UnsupportedOperationException();
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
Override
public BytesRef next() throws IOException {
if (!iterator.hasNext()) {
return null;
} else {
current = iterator.next();
return current.getKey();
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
Override
public BytesRef term() throws IOException {
return current.getKey();
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
Override
public long ord() throws IOException {
throw new UnsupportedOperationException();
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
Override
public int docFreq() throws IOException {
return 1;
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
Override
public long totalTermFreq() throws IOException {
return current.getValue().freq;
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
// TODO: reuse
SimpleTVDocsEnum e = new SimpleTVDocsEnum();
e.reset(liveDocs, needsFreqs ? current.getValue().freq : -1);
return e;
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
Override
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
SimpleTVPostings postings = current.getValue();
if (postings.positions == null && postings.startOffsets == null) {
return null;
}
if (needsOffsets && (postings.startOffsets == null || postings.endOffsets == null)) {
return null;
}
// TODO: reuse
SimpleTVDocsAndPositionsEnum e = new SimpleTVDocsAndPositionsEnum();
e.reset(liveDocs, postings.positions, postings.startOffsets, postings.endOffsets);
return e;
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
Override
public int freq() throws IOException {
assert freq != -1;
return freq;
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
Override
public int freq() throws IOException {
if (positions != null) {
return positions.length;
} else {
assert startOffsets != null;
return startOffsets.length;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPostingsFormat.java
Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
return new SimpleTextFieldsWriter(state);
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPostingsFormat.java
Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
return new SimpleTextFieldsReader(state);
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldInfosWriter.java
Override
public void write(Directory directory, String segmentName, FieldInfos infos, IOContext context) throws IOException {
final String fileName = IndexFileNames.segmentFileName(segmentName, "", FIELD_INFOS_EXTENSION);
IndexOutput out = directory.createOutput(fileName, context);
BytesRef scratch = new BytesRef();
try {
SimpleTextUtil.write(out, NUMFIELDS);
SimpleTextUtil.write(out, Integer.toString(infos.size()), scratch);
SimpleTextUtil.writeNewline(out);
for (FieldInfo fi : infos) {
SimpleTextUtil.write(out, NAME);
SimpleTextUtil.write(out, fi.name, scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, NUMBER);
SimpleTextUtil.write(out, Integer.toString(fi.number), scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, ISINDEXED);
SimpleTextUtil.write(out, Boolean.toString(fi.isIndexed()), scratch);
SimpleTextUtil.writeNewline(out);
if (fi.isIndexed()) {
assert fi.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.hasPayloads();
SimpleTextUtil.write(out, INDEXOPTIONS);
SimpleTextUtil.write(out, fi.getIndexOptions().toString(), scratch);
SimpleTextUtil.writeNewline(out);
}
SimpleTextUtil.write(out, STORETV);
SimpleTextUtil.write(out, Boolean.toString(fi.hasVectors()), scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, PAYLOADS);
SimpleTextUtil.write(out, Boolean.toString(fi.hasPayloads()), scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, NORMS);
SimpleTextUtil.write(out, Boolean.toString(!fi.omitsNorms()), scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, NORMS_TYPE);
SimpleTextUtil.write(out, getDocValuesType(fi.getNormType()), scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, DOCVALUES);
SimpleTextUtil.write(out, getDocValuesType(fi.getDocValuesType()), scratch);
SimpleTextUtil.writeNewline(out);
Map<String,String> atts = fi.attributes();
int numAtts = atts == null ? 0 : atts.size();
SimpleTextUtil.write(out, NUM_ATTS);
SimpleTextUtil.write(out, Integer.toString(numAtts), scratch);
SimpleTextUtil.writeNewline(out);
if (numAtts > 0) {
for (Map.Entry<String,String> entry : atts.entrySet()) {
SimpleTextUtil.write(out, ATT_KEY);
SimpleTextUtil.write(out, entry.getKey(), scratch);
SimpleTextUtil.writeNewline(out);
SimpleTextUtil.write(out, ATT_VALUE);
SimpleTextUtil.write(out, entry.getValue(), scratch);
SimpleTextUtil.writeNewline(out);
}
}
}
} finally {
out.close();
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java
private void readIndex() throws IOException {
offsets = new ArrayList<Long>();
while (!scratch.equals(END)) {
readLine();
if (StringHelper.startsWith(scratch, DOC)) {
offsets.add(in.getFilePointer());
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java
Override
public void visitDocument(int n, StoredFieldVisitor visitor) throws CorruptIndexException, IOException {
in.seek(offsets.get(n));
readLine();
assert StringHelper.startsWith(scratch, NUM);
int numFields = parseIntAt(NUM.length);
for (int i = 0; i < numFields; i++) {
readLine();
assert StringHelper.startsWith(scratch, FIELD);
int fieldNumber = parseIntAt(FIELD.length);
FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
readLine();
assert StringHelper.startsWith(scratch, NAME);
readLine();
assert StringHelper.startsWith(scratch, TYPE);
final BytesRef type;
if (equalsAt(TYPE_STRING, scratch, TYPE.length)) {
type = TYPE_STRING;
} else if (equalsAt(TYPE_BINARY, scratch, TYPE.length)) {
type = TYPE_BINARY;
} else if (equalsAt(TYPE_INT, scratch, TYPE.length)) {
type = TYPE_INT;
} else if (equalsAt(TYPE_LONG, scratch, TYPE.length)) {
type = TYPE_LONG;
} else if (equalsAt(TYPE_FLOAT, scratch, TYPE.length)) {
type = TYPE_FLOAT;
} else if (equalsAt(TYPE_DOUBLE, scratch, TYPE.length)) {
type = TYPE_DOUBLE;
} else {
throw new RuntimeException("unknown field type");
}
switch (visitor.needsField(fieldInfo)) {
case YES:
readField(type, fieldInfo, visitor);
break;
case NO:
readLine();
assert StringHelper.startsWith(scratch, VALUE);
break;
case STOP: return;
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java
private void readField(BytesRef type, FieldInfo fieldInfo, StoredFieldVisitor visitor) throws IOException {
readLine();
assert StringHelper.startsWith(scratch, VALUE);
if (type == TYPE_STRING) {
visitor.stringField(fieldInfo, new String(scratch.bytes, scratch.offset+VALUE.length, scratch.length-VALUE.length, "UTF-8"));
} else if (type == TYPE_BINARY) {
// TODO: who owns the bytes?
byte[] copy = new byte[scratch.length-VALUE.length];
System.arraycopy(scratch.bytes, scratch.offset+VALUE.length, copy, 0, copy.length);
visitor.binaryField(fieldInfo, copy, 0, copy.length);
} else if (type == TYPE_INT) {
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+VALUE.length, scratch.length-VALUE.length, scratchUTF16);
visitor.intField(fieldInfo, Integer.parseInt(scratchUTF16.toString()));
} else if (type == TYPE_LONG) {
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+VALUE.length, scratch.length-VALUE.length, scratchUTF16);
visitor.longField(fieldInfo, Long.parseLong(scratchUTF16.toString()));
} else if (type == TYPE_FLOAT) {
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+VALUE.length, scratch.length-VALUE.length, scratchUTF16);
visitor.floatField(fieldInfo, Float.parseFloat(scratchUTF16.toString()));
} else if (type == TYPE_DOUBLE) {
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+VALUE.length, scratch.length-VALUE.length, scratchUTF16);
visitor.doubleField(fieldInfo, Double.parseDouble(scratchUTF16.toString()));
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java
Override
public void close() throws IOException {
try {
IOUtils.close(in);
} finally {
in = null;
offsets = null;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java
private void readLine() throws IOException {
SimpleTextUtil.readLine(in, scratch);
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsReader.java
private int parseIntAt(int offset) throws IOException {
UnicodeUtil.UTF8toUTF16(scratch.bytes, scratch.offset+offset, scratch.length-offset, scratchUTF16);
return ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java
Override
public MutableBits newLiveDocs(int size) throws IOException {
return new SimpleTextMutableBits(size);
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java
Override
public MutableBits newLiveDocs(Bits existing) throws IOException {
final SimpleTextBits bits = (SimpleTextBits) existing;
return new SimpleTextMutableBits((BitSet)bits.bits.clone(), bits.size);
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java
Override
public Bits readLiveDocs(Directory dir, SegmentInfoPerCommit info, IOContext context) throws IOException {
assert info.hasDeletions();
BytesRef scratch = new BytesRef();
CharsRef scratchUTF16 = new CharsRef();
String fileName = IndexFileNames.fileNameFromGeneration(info.info.name, LIVEDOCS_EXTENSION, info.getDelGen());
IndexInput in = null;
boolean success = false;
try {
in = dir.openInput(fileName, context);
SimpleTextUtil.readLine(in, scratch);
assert StringHelper.startsWith(scratch, SIZE);
int size = parseIntAt(scratch, SIZE.length, scratchUTF16);
BitSet bits = new BitSet(size);
SimpleTextUtil.readLine(in, scratch);
while (!scratch.equals(END)) {
assert StringHelper.startsWith(scratch, DOC);
int docid = parseIntAt(scratch, DOC.length, scratchUTF16);
bits.set(docid);
SimpleTextUtil.readLine(in, scratch);
}
success = true;
return new SimpleTextBits(bits, size);
} finally {
if (success) {
IOUtils.close(in);
} else {
IOUtils.closeWhileHandlingException(in);
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java
private int parseIntAt(BytesRef bytes, int offset, CharsRef scratch) throws IOException {
UnicodeUtil.UTF8toUTF16(bytes.bytes, bytes.offset+offset, bytes.length-offset, scratch);
return ArrayUtil.parseInt(scratch.chars, 0, scratch.length);
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java
Override
public void writeLiveDocs(MutableBits bits, Directory dir, SegmentInfoPerCommit info, int newDelCount, IOContext context) throws IOException {
BitSet set = ((SimpleTextBits) bits).bits;
int size = bits.length();
BytesRef scratch = new BytesRef();
String fileName = IndexFileNames.fileNameFromGeneration(info.info.name, LIVEDOCS_EXTENSION, info.getNextDelGen());
IndexOutput out = null;
boolean success = false;
try {
out = dir.createOutput(fileName, context);
SimpleTextUtil.write(out, SIZE);
SimpleTextUtil.write(out, Integer.toString(size), scratch);
SimpleTextUtil.writeNewline(out);
for (int i = set.nextSetBit(0); i >= 0; i=set.nextSetBit(i + 1)) {
SimpleTextUtil.write(out, DOC);
SimpleTextUtil.write(out, Integer.toString(i), scratch);
SimpleTextUtil.writeNewline(out);
}
SimpleTextUtil.write(out, END);
SimpleTextUtil.writeNewline(out);
success = true;
} finally {
if (success) {
IOUtils.close(out);
} else {
IOUtils.closeWhileHandlingException(out);
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextLiveDocsFormat.java
Override
public void files(SegmentInfoPerCommit info, Collection<String> files) throws IOException {
if (info.hasDeletions()) {
files.add(IndexFileNames.fileNameFromGeneration(info.info.name, LIVEDOCS_EXTENSION, info.getDelGen()));
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java
Override
public void startDocument(int numStoredFields) throws IOException {
write(DOC);
write(Integer.toString(numDocsWritten));
newLine();
write(NUM);
write(Integer.toString(numStoredFields));
newLine();
numDocsWritten++;
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java
Override
public void writeField(FieldInfo info, IndexableField field) throws IOException {
write(FIELD);
write(Integer.toString(info.number));
newLine();
write(NAME);
write(field.name());
newLine();
write(TYPE);
final Number n = field.numericValue();
if (n != null) {
if (n instanceof Byte || n instanceof Short || n instanceof Integer) {
write(TYPE_INT);
newLine();
write(VALUE);
write(Integer.toString(n.intValue()));
newLine();
} else if (n instanceof Long) {
write(TYPE_LONG);
newLine();
write(VALUE);
write(Long.toString(n.longValue()));
newLine();
} else if (n instanceof Float) {
write(TYPE_FLOAT);
newLine();
write(VALUE);
write(Float.toString(n.floatValue()));
newLine();
} else if (n instanceof Double) {
write(TYPE_DOUBLE);
newLine();
write(VALUE);
write(Double.toString(n.doubleValue()));
newLine();
} else {
throw new IllegalArgumentException("cannot store numeric type " + n.getClass());
}
} else {
BytesRef bytes = field.binaryValue();
if (bytes != null) {
write(TYPE_BINARY);
newLine();
write(VALUE);
write(bytes);
newLine();
} else if (field.stringValue() == null) {
throw new IllegalArgumentException("field " + field.name() + " is stored but does not have binaryValue, stringValue nor numericValue");
} else {
write(TYPE_STRING);
newLine();
write(VALUE);
write(field.stringValue());
newLine();
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java
Override
public void finish(FieldInfos fis, int numDocs) throws IOException {
if (numDocsWritten != numDocs) {
throw new RuntimeException("mergeFields produced an invalid result: docCount is " + numDocs
+ " but only saw " + numDocsWritten + " file=" + out.toString() + "; now aborting this merge to prevent index corruption");
}
write(END);
newLine();
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java
Override
public void close() throws IOException {
try {
IOUtils.close(out);
} finally {
out = null;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java
private void write(String s) throws IOException {
SimpleTextUtil.write(out, s, scratch);
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java
private void write(BytesRef bytes) throws IOException {
SimpleTextUtil.write(out, bytes);
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsWriter.java
private void newLine() throws IOException {
SimpleTextUtil.writeNewline(out);
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextSegmentInfoWriter.java
Override
public void write(Directory dir, SegmentInfo si, FieldInfos fis, IOContext ioContext) throws IOException {
String segFileName = IndexFileNames.segmentFileName(si.name, "", SimpleTextSegmentInfoFormat.SI_EXTENSION);
si.addFile(segFileName);
boolean success = false;
IndexOutput output = dir.createOutput(segFileName, ioContext);
try {
BytesRef scratch = new BytesRef();
SimpleTextUtil.write(output, SI_VERSION);
SimpleTextUtil.write(output, si.getVersion(), scratch);
SimpleTextUtil.writeNewline(output);
SimpleTextUtil.write(output, SI_DOCCOUNT);
SimpleTextUtil.write(output, Integer.toString(si.getDocCount()), scratch);
SimpleTextUtil.writeNewline(output);
SimpleTextUtil.write(output, SI_USECOMPOUND);
SimpleTextUtil.write(output, Boolean.toString(si.getUseCompoundFile()), scratch);
SimpleTextUtil.writeNewline(output);
Map<String,String> diagnostics = si.getDiagnostics();
int numDiagnostics = diagnostics == null ? 0 : diagnostics.size();
SimpleTextUtil.write(output, SI_NUM_DIAG);
SimpleTextUtil.write(output, Integer.toString(numDiagnostics), scratch);
SimpleTextUtil.writeNewline(output);
if (numDiagnostics > 0) {
for (Map.Entry<String,String> diagEntry : diagnostics.entrySet()) {
SimpleTextUtil.write(output, SI_DIAG_KEY);
SimpleTextUtil.write(output, diagEntry.getKey(), scratch);
SimpleTextUtil.writeNewline(output);
SimpleTextUtil.write(output, SI_DIAG_VALUE);
SimpleTextUtil.write(output, diagEntry.getValue(), scratch);
SimpleTextUtil.writeNewline(output);
}
}
Map<String,String> atts = si.attributes();
int numAtts = atts == null ? 0 : atts.size();
SimpleTextUtil.write(output, SI_NUM_ATTS);
SimpleTextUtil.write(output, Integer.toString(numAtts), scratch);
SimpleTextUtil.writeNewline(output);
if (numAtts > 0) {
for (Map.Entry<String,String> entry : atts.entrySet()) {
SimpleTextUtil.write(output, SI_ATT_KEY);
SimpleTextUtil.write(output, entry.getKey(), scratch);
SimpleTextUtil.writeNewline(output);
SimpleTextUtil.write(output, SI_ATT_VALUE);
SimpleTextUtil.write(output, entry.getValue(), scratch);
SimpleTextUtil.writeNewline(output);
}
}
Set<String> files = si.files();
int numFiles = files == null ? 0 : files.size();
SimpleTextUtil.write(output, SI_NUM_FILES);
SimpleTextUtil.write(output, Integer.toString(numFiles), scratch);
SimpleTextUtil.writeNewline(output);
if (numFiles > 0) {
for(String fileName : files) {
SimpleTextUtil.write(output, SI_FILE);
SimpleTextUtil.write(output, fileName, scratch);
SimpleTextUtil.writeNewline(output);
}
}
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(output);
} else {
output.close();
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextUtil.java
public static void write(DataOutput out, String s, BytesRef scratch) throws IOException {
UnicodeUtil.UTF16toUTF8(s, 0, s.length(), scratch);
write(out, scratch);
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextUtil.java
public static void write(DataOutput out, BytesRef b) throws IOException {
for(int i=0;i<b.length;i++) {
final byte bx = b.bytes[b.offset+i];
if (bx == NEWLINE || bx == ESCAPE) {
out.writeByte(ESCAPE);
}
out.writeByte(bx);
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextUtil.java
public static void writeNewline(DataOutput out) throws IOException {
out.writeByte(NEWLINE);
}
// in lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextUtil.java
public static void readLine(DataInput in, BytesRef scratch) throws IOException {
int upto = 0;
while(true) {
byte b = in.readByte();
if (scratch.bytes.length == upto) {
scratch.grow(1+upto);
}
if (b == ESCAPE) {
scratch.bytes[upto++] = in.readByte();
} else {
if (b == NEWLINE) {
break;
} else {
scratch.bytes[upto++] = b;
}
}
}
scratch.offset = 0;
scratch.length = upto;
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
protected void writeHeader(IndexOutput out) throws IOException {
CodecUtil.writeHeader(out, TERMS_CODEC_NAME, TERMS_VERSION_CURRENT);
out.writeLong(0); // leave space for end index pointer
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
protected void writeIndexHeader(IndexOutput out) throws IOException {
CodecUtil.writeHeader(out, TERMS_INDEX_CODEC_NAME, TERMS_INDEX_VERSION_CURRENT);
out.writeLong(0); // leave space for end index pointer
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
protected void writeTrailer(IndexOutput out, long dirStart) throws IOException {
out.seek(CodecUtil.headerLength(TERMS_CODEC_NAME));
out.writeLong(dirStart);
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
protected void writeIndexTrailer(IndexOutput indexOut, long dirStart) throws IOException {
indexOut.seek(CodecUtil.headerLength(TERMS_INDEX_CODEC_NAME));
indexOut.writeLong(dirStart);
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
Override
public TermsConsumer addField(FieldInfo field) throws IOException {
//DEBUG = field.name.equals("id");
//if (DEBUG) System.out.println("\nBTTW.addField seg=" + segment + " field=" + field.name);
assert currentField == null || currentField.name.compareTo(field.name) < 0;
currentField = field;
final TermsWriter terms = new TermsWriter(field);
fields.add(terms);
return terms;
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
public void compileIndex(List<PendingBlock> floorBlocks, RAMOutputStream scratchBytes) throws IOException {
assert (isFloor && floorBlocks != null && floorBlocks.size() != 0) || (!isFloor && floorBlocks == null): "isFloor=" + isFloor + " floorBlocks=" + floorBlocks;
assert scratchBytes.getFilePointer() == 0;
// TODO: try writing the leading vLong in MSB order
// (opposite of what Lucene does today), for better
// outputs sharing in the FST
scratchBytes.writeVLong(encodeOutput(fp, hasTerms, isFloor));
if (isFloor) {
scratchBytes.writeVInt(floorBlocks.size());
for (PendingBlock sub : floorBlocks) {
assert sub.floorLeadByte != -1;
//if (DEBUG) {
// System.out.println(" write floorLeadByte=" + Integer.toHexString(sub.floorLeadByte&0xff));
//}
scratchBytes.writeByte((byte) sub.floorLeadByte);
assert sub.fp > fp;
scratchBytes.writeVLong((sub.fp - fp) << 1 | (sub.hasTerms ? 1 : 0));
}
}
final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
final Builder<BytesRef> indexBuilder = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1,
0, 0, true, false, Integer.MAX_VALUE,
outputs, null, false);
//if (DEBUG) {
// System.out.println(" compile index for prefix=" + prefix);
//}
//indexBuilder.DEBUG = false;
final byte[] bytes = new byte[(int) scratchBytes.getFilePointer()];
assert bytes.length > 0;
scratchBytes.writeTo(bytes, 0);
indexBuilder.add(Util.toIntsRef(prefix, scratchIntsRef), new BytesRef(bytes, 0, bytes.length));
scratchBytes.reset();
// Copy over index for all sub-blocks
if (subIndices != null) {
for(FST<BytesRef> subIndex : subIndices) {
append(indexBuilder, subIndex);
}
}
if (floorBlocks != null) {
for (PendingBlock sub : floorBlocks) {
if (sub.subIndices != null) {
for(FST<BytesRef> subIndex : sub.subIndices) {
append(indexBuilder, subIndex);
}
}
sub.subIndices = null;
}
}
index = indexBuilder.finish();
subIndices = null;
/*
Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"));
Util.toDot(index, w, false, false);
System.out.println("SAVED to out.dot");
w.close();
*/
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
private void append(Builder<BytesRef> builder, FST<BytesRef> subIndex) throws IOException {
final BytesRefFSTEnum<BytesRef> subIndexEnum = new BytesRefFSTEnum<BytesRef>(subIndex);
BytesRefFSTEnum.InputOutput<BytesRef> indexEnt;
while((indexEnt = subIndexEnum.next()) != null) {
//if (DEBUG) {
// System.out.println(" add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output);
//}
builder.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), indexEnt.output);
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
Override
public void freeze(final Builder.UnCompiledNode<Object>[] frontier, int prefixLenPlus1, final IntsRef lastInput) throws IOException {
//if (DEBUG) System.out.println(" freeze prefixLenPlus1=" + prefixLenPlus1);
for(int idx=lastInput.length; idx >= prefixLenPlus1; idx--) {
final Builder.UnCompiledNode<Object> node = frontier[idx];
long totCount = 0;
if (node.isFinal) {
totCount++;
}
for(int arcIdx=0;arcIdx<node.numArcs;arcIdx++) {
@SuppressWarnings("unchecked") final Builder.UnCompiledNode<Object> target = (Builder.UnCompiledNode<Object>) node.arcs[arcIdx].target;
totCount += target.inputCount;
target.clear();
node.arcs[arcIdx].target = null;
}
node.numArcs = 0;
if (totCount >= minItemsInBlock || idx == 0) {
// We are on a prefix node that has enough
// entries (terms or sub-blocks) under it to let
// us write a new block or multiple blocks (main
// block + follow on floor blocks):
//if (DEBUG) {
// if (totCount < minItemsInBlock && idx != 0) {
// System.out.println(" force block has terms");
// }
//}
writeBlocks(lastInput, idx, (int) totCount);
node.inputCount = 1;
} else {
// stragglers! carry count upwards
node.inputCount = totCount;
}
frontier[idx] = new Builder.UnCompiledNode<Object>(blockBuilder, idx);
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
void writeBlocks(IntsRef prevTerm, int prefixLength, int count) throws IOException {
if (prefixLength == 0 || count <= maxItemsInBlock) {
// Easy case: not floor block. Eg, prefix is "foo",
// and we found 30 terms/sub-blocks starting w/ that
// prefix, and minItemsInBlock <= 30 <=
// maxItemsInBlock.
final PendingBlock nonFloorBlock = writeBlock(prevTerm, prefixLength, prefixLength, count, count, 0, false, -1, true);
nonFloorBlock.compileIndex(null, scratchBytes);
pending.add(nonFloorBlock);
} else {
// Floor block case. Eg, prefix is "foo" but we
// have 100 terms/sub-blocks starting w/ that
// prefix. We segment the entries into a primary
// block and following floor blocks using the first
// label in the suffix to assign to floor blocks.
// TODO: we could store min & max suffix start byte
// in each block, to make floor blocks authoritative
//if (DEBUG) {
// final BytesRef prefix = new BytesRef(prefixLength);
// for(int m=0;m<prefixLength;m++) {
// prefix.bytes[m] = (byte) prevTerm.ints[m];
// }
// prefix.length = prefixLength;
// //System.out.println("\nWBS count=" + count + " prefix=" + prefix.utf8ToString() + " " + prefix);
// System.out.println("writeBlocks: prefix=" + prefix + " " + prefix + " count=" + count + " pending.size()=" + pending.size());
//}
//System.out.println("\nwbs count=" + count);
final int savLabel = prevTerm.ints[prevTerm.offset + prefixLength];
// Count up how many items fall under
// each unique label after the prefix.
// TODO: this is wasteful since the builder had
// already done this (partitioned these sub-terms
// according to their leading prefix byte)
final List<PendingEntry> slice = pending.subList(pending.size()-count, pending.size());
int lastSuffixLeadLabel = -1;
int termCount = 0;
int subCount = 0;
int numSubs = 0;
for(PendingEntry ent : slice) {
// First byte in the suffix of this term
final int suffixLeadLabel;
if (ent.isTerm) {
PendingTerm term = (PendingTerm) ent;
if (term.term.length == prefixLength) {
// Suffix is 0, ie prefix 'foo' and term is
// 'foo' so the term has empty string suffix
// in this block
assert lastSuffixLeadLabel == -1;
assert numSubs == 0;
suffixLeadLabel = -1;
} else {
suffixLeadLabel = term.term.bytes[term.term.offset + prefixLength] & 0xff;
}
} else {
PendingBlock block = (PendingBlock) ent;
assert block.prefix.length > prefixLength;
suffixLeadLabel = block.prefix.bytes[block.prefix.offset + prefixLength] & 0xff;
}
if (suffixLeadLabel != lastSuffixLeadLabel && (termCount + subCount) != 0) {
if (subBytes.length == numSubs) {
subBytes = ArrayUtil.grow(subBytes);
subTermCounts = ArrayUtil.grow(subTermCounts);
subSubCounts = ArrayUtil.grow(subSubCounts);
}
subBytes[numSubs] = lastSuffixLeadLabel;
lastSuffixLeadLabel = suffixLeadLabel;
subTermCounts[numSubs] = termCount;
subSubCounts[numSubs] = subCount;
/*
if (suffixLeadLabel == -1) {
System.out.println(" sub " + -1 + " termCount=" + termCount + " subCount=" + subCount);
} else {
System.out.println(" sub " + Integer.toHexString(suffixLeadLabel) + " termCount=" + termCount + " subCount=" + subCount);
}
*/
termCount = subCount = 0;
numSubs++;
}
if (ent.isTerm) {
termCount++;
} else {
subCount++;
}
}
if (subBytes.length == numSubs) {
subBytes = ArrayUtil.grow(subBytes);
subTermCounts = ArrayUtil.grow(subTermCounts);
subSubCounts = ArrayUtil.grow(subSubCounts);
}
subBytes[numSubs] = lastSuffixLeadLabel;
subTermCounts[numSubs] = termCount;
subSubCounts[numSubs] = subCount;
numSubs++;
/*
if (lastSuffixLeadLabel == -1) {
System.out.println(" sub " + -1 + " termCount=" + termCount + " subCount=" + subCount);
} else {
System.out.println(" sub " + Integer.toHexString(lastSuffixLeadLabel) + " termCount=" + termCount + " subCount=" + subCount);
}
*/
if (subTermCountSums.length < numSubs) {
subTermCountSums = ArrayUtil.grow(subTermCountSums, numSubs);
}
// Roll up (backwards) the termCounts; postings impl
// needs this to know where to pull the term slice
// from its pending terms stack:
int sum = 0;
for(int idx=numSubs-1;idx>=0;idx--) {
sum += subTermCounts[idx];
subTermCountSums[idx] = sum;
}
// TODO: make a better segmenter? It'd have to
// absorb the too-small end blocks backwards into
// the previous blocks
// Naive greedy segmentation; this is not always
// best (it can produce a too-small block as the
// last block):
int pendingCount = 0;
int startLabel = subBytes[0];
int curStart = count;
subCount = 0;
final List<PendingBlock> floorBlocks = new ArrayList<PendingBlock>();
PendingBlock firstBlock = null;
for(int sub=0;sub<numSubs;sub++) {
pendingCount += subTermCounts[sub] + subSubCounts[sub];
//System.out.println(" " + (subTermCounts[sub] + subSubCounts[sub]));
subCount++;
// Greedily make a floor block as soon as we've
// crossed the min count
if (pendingCount >= minItemsInBlock) {
final int curPrefixLength;
if (startLabel == -1) {
curPrefixLength = prefixLength;
} else {
curPrefixLength = 1+prefixLength;
// floor term:
prevTerm.ints[prevTerm.offset + prefixLength] = startLabel;
}
//System.out.println(" " + subCount + " subs");
final PendingBlock floorBlock = writeBlock(prevTerm, prefixLength, curPrefixLength, curStart, pendingCount, subTermCountSums[1+sub], true, startLabel, curStart == pendingCount);
if (firstBlock == null) {
firstBlock = floorBlock;
} else {
floorBlocks.add(floorBlock);
}
curStart -= pendingCount;
//System.out.println(" = " + pendingCount);
pendingCount = 0;
assert minItemsInBlock == 1 || subCount > 1: "minItemsInBlock=" + minItemsInBlock + " subCount=" + subCount + " sub=" + sub + " of " + numSubs + " subTermCount=" + subTermCountSums[sub] + " subSubCount=" + subSubCounts[sub] + " depth=" + prefixLength;
subCount = 0;
startLabel = subBytes[sub+1];
if (curStart == 0) {
break;
}
if (curStart <= maxItemsInBlock) {
// remainder is small enough to fit into a
// block. NOTE that this may be too small (<
// minItemsInBlock); need a true segmenter
// here
assert startLabel != -1;
assert firstBlock != null;
prevTerm.ints[prevTerm.offset + prefixLength] = startLabel;
//System.out.println(" final " + (numSubs-sub-1) + " subs");
/*
for(sub++;sub < numSubs;sub++) {
System.out.println(" " + (subTermCounts[sub] + subSubCounts[sub]));
}
System.out.println(" = " + curStart);
if (curStart < minItemsInBlock) {
System.out.println(" **");
}
*/
floorBlocks.add(writeBlock(prevTerm, prefixLength, prefixLength+1, curStart, curStart, 0, true, startLabel, true));
break;
}
}
}
prevTerm.ints[prevTerm.offset + prefixLength] = savLabel;
assert firstBlock != null;
firstBlock.compileIndex(floorBlocks, scratchBytes);
pending.add(firstBlock);
//if (DEBUG) System.out.println(" done pending.size()=" + pending.size());
}
lastBlockIndex = pending.size()-1;
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
private PendingBlock writeBlock(IntsRef prevTerm, int prefixLength, int indexPrefixLength, int startBackwards, int length,
int futureTermCount, boolean isFloor, int floorLeadByte, boolean isLastInFloor) throws IOException {
assert length > 0;
final int start = pending.size()-startBackwards;
assert start >= 0: "pending.size()=" + pending.size() + " startBackwards=" + startBackwards + " length=" + length;
final List<PendingEntry> slice = pending.subList(start, start + length);
final long startFP = out.getFilePointer();
final BytesRef prefix = new BytesRef(indexPrefixLength);
for(int m=0;m<indexPrefixLength;m++) {
prefix.bytes[m] = (byte) prevTerm.ints[m];
}
prefix.length = indexPrefixLength;
// Write block header:
out.writeVInt((length<<1)|(isLastInFloor ? 1:0));
// if (DEBUG) {
// System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + toString(prefix) + " entCount=" + length + " startFP=" + startFP + " futureTermCount=" + futureTermCount + (isFloor ? (" floorLeadByte=" + Integer.toHexString(floorLeadByte&0xff)) : "") + " isLastInFloor=" + isLastInFloor);
// }
// 1st pass: pack term suffix bytes into byte[] blob
// TODO: cutover to bulk int codec... simple64?
final boolean isLeafBlock;
if (lastBlockIndex < start) {
// This block definitely does not contain sub-blocks:
isLeafBlock = true;
//System.out.println("no scan true isFloor=" + isFloor);
} else if (!isFloor) {
// This block definitely does contain at least one sub-block:
isLeafBlock = false;
//System.out.println("no scan false " + lastBlockIndex + " vs start=" + start + " len=" + length);
} else {
// Must scan up-front to see if there is a sub-block
boolean v = true;
//System.out.println("scan " + lastBlockIndex + " vs start=" + start + " len=" + length);
for (PendingEntry ent : slice) {
if (!ent.isTerm) {
v = false;
break;
}
}
isLeafBlock = v;
}
final List<FST<BytesRef>> subIndices;
int termCount;
if (isLeafBlock) {
subIndices = null;
for (PendingEntry ent : slice) {
assert ent.isTerm;
PendingTerm term = (PendingTerm) ent;
final int suffix = term.term.length - prefixLength;
// if (DEBUG) {
// BytesRef suffixBytes = new BytesRef(suffix);
// System.arraycopy(term.term.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
// suffixBytes.length = suffix;
// System.out.println(" write term suffix=" + suffixBytes);
// }
// For leaf block we write suffix straight
bytesWriter.writeVInt(suffix);
bytesWriter.writeBytes(term.term.bytes, prefixLength, suffix);
// Write term stats, to separate byte[] blob:
bytesWriter2.writeVInt(term.stats.docFreq);
if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
assert term.stats.totalTermFreq >= term.stats.docFreq;
bytesWriter2.writeVLong(term.stats.totalTermFreq - term.stats.docFreq);
}
}
termCount = length;
} else {
subIndices = new ArrayList<FST<BytesRef>>();
termCount = 0;
for (PendingEntry ent : slice) {
if (ent.isTerm) {
PendingTerm term = (PendingTerm) ent;
final int suffix = term.term.length - prefixLength;
// if (DEBUG) {
// BytesRef suffixBytes = new BytesRef(suffix);
// System.arraycopy(term.term.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
// suffixBytes.length = suffix;
// System.out.println(" write term suffix=" + suffixBytes);
// }
// For non-leaf block we borrow 1 bit to record
// if entry is term or sub-block
bytesWriter.writeVInt(suffix<<1);
bytesWriter.writeBytes(term.term.bytes, prefixLength, suffix);
// Write term stats, to separate byte[] blob:
bytesWriter2.writeVInt(term.stats.docFreq);
if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
assert term.stats.totalTermFreq >= term.stats.docFreq;
bytesWriter2.writeVLong(term.stats.totalTermFreq - term.stats.docFreq);
}
termCount++;
} else {
PendingBlock block = (PendingBlock) ent;
final int suffix = block.prefix.length - prefixLength;
assert suffix > 0;
// For non-leaf block we borrow 1 bit to record
// if entry is term or sub-block
bytesWriter.writeVInt((suffix<<1)|1);
bytesWriter.writeBytes(block.prefix.bytes, prefixLength, suffix);
assert block.fp < startFP;
// if (DEBUG) {
// BytesRef suffixBytes = new BytesRef(suffix);
// System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
// suffixBytes.length = suffix;
// System.out.println(" write sub-block suffix=" + toString(suffixBytes) + " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
// }
bytesWriter.writeVLong(startFP - block.fp);
subIndices.add(block.index);
}
}
assert subIndices.size() != 0;
}
// TODO: we could block-write the term suffix pointers;
// this would take more space but would enable binary
// search on lookup
// Write suffixes byte[] blob to terms dict output:
out.writeVInt((int) (bytesWriter.getFilePointer() << 1) | (isLeafBlock ? 1:0));
bytesWriter.writeTo(out);
bytesWriter.reset();
// Write term stats byte[] blob
out.writeVInt((int) bytesWriter2.getFilePointer());
bytesWriter2.writeTo(out);
bytesWriter2.reset();
// Have postings writer write block
postingsWriter.flushTermsBlock(futureTermCount+termCount, termCount);
// Remove slice replaced by block:
slice.clear();
if (lastBlockIndex >= start) {
if (lastBlockIndex < start+length) {
lastBlockIndex = start;
} else {
lastBlockIndex -= length;
}
}
// if (DEBUG) {
// System.out.println(" fpEnd=" + out.getFilePointer());
// }
return new PendingBlock(prefix, startFP, termCount != 0, isFloor, floorLeadByte, subIndices);
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
Override
public PostingsConsumer startTerm(BytesRef text) throws IOException {
//if (DEBUG) System.out.println("\nBTTW.startTerm term=" + fieldInfo.name + ":" + toString(text) + " seg=" + segment);
postingsWriter.startTerm();
/*
if (fieldInfo.name.equals("id")) {
postingsWriter.termID = Integer.parseInt(text.utf8ToString());
} else {
postingsWriter.termID = -1;
}
*/
return postingsWriter;
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
Override
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
assert stats.docFreq > 0;
//if (DEBUG) System.out.println("BTTW.finishTerm term=" + fieldInfo.name + ":" + toString(text) + " seg=" + segment + " df=" + stats.docFreq);
blockBuilder.add(Util.toIntsRef(text, scratchIntsRef), noOutputs.getNoOutput());
pending.add(new PendingTerm(BytesRef.deepCopyOf(text), stats));
postingsWriter.finishTerm(stats);
numTerms++;
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
Override
public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException {
if (numTerms > 0) {
blockBuilder.finish();
// We better have one final "root" block:
assert pending.size() == 1 && !pending.get(0).isTerm: "pending.size()=" + pending.size() + " pending=" + pending;
final PendingBlock root = (PendingBlock) pending.get(0);
assert root.prefix.length == 0;
assert root.index.getEmptyOutput() != null;
this.sumTotalTermFreq = sumTotalTermFreq;
this.sumDocFreq = sumDocFreq;
this.docCount = docCount;
// Write FST to index
indexStartFP = indexOut.getFilePointer();
root.index.save(indexOut);
//System.out.println(" write FST " + indexStartFP + " field=" + fieldInfo.name);
// if (SAVE_DOT_FILES || DEBUG) {
// final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
// Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName));
// Util.toDot(root.index, w, false, false);
// System.out.println("SAVED to " + dotFileName);
// w.close();
// }
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
Override
public void close() throws IOException {
IOException ioe = null;
try {
int nonZeroCount = 0;
for(TermsWriter field : fields) {
if (field.numTerms > 0) {
nonZeroCount++;
}
}
final long dirStart = out.getFilePointer();
final long indexDirStart = indexOut.getFilePointer();
out.writeVInt(nonZeroCount);
for(TermsWriter field : fields) {
if (field.numTerms > 0) {
//System.out.println(" field " + field.fieldInfo.name + " " + field.numTerms + " terms");
out.writeVInt(field.fieldInfo.number);
out.writeVLong(field.numTerms);
final BytesRef rootCode = ((PendingBlock) field.pending.get(0)).index.getEmptyOutput();
assert rootCode != null: "field=" + field.fieldInfo.name + " numTerms=" + field.numTerms;
out.writeVInt(rootCode.length);
out.writeBytes(rootCode.bytes, rootCode.offset, rootCode.length);
if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
out.writeVLong(field.sumTotalTermFreq);
}
out.writeVLong(field.sumDocFreq);
out.writeVInt(field.docCount);
indexOut.writeVLong(field.indexStartFP);
}
}
writeTrailer(out, dirStart);
writeIndexTrailer(indexOut, indexDirStart);
} catch (IOException ioe2) {
ioe = ioe2;
} finally {
IOUtils.closeWhileHandlingException(ioe, out, indexOut, postingsWriter);
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java
Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
PostingsWriterBase docsWriter = wrappedPostingsBaseFormat.postingsWriterBase(state);
// Terms that have <= freqCutoff number of docs are
// "pulsed" (inlined):
PostingsWriterBase pulsingWriter = new PulsingPostingsWriter(freqCutoff, docsWriter);
// Terms dict
boolean success = false;
try {
FieldsConsumer ret = new BlockTreeTermsWriter(state, pulsingWriter, minBlockSize, maxBlockSize);
success = true;
return ret;
} finally {
if (!success) {
pulsingWriter.close();
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java
Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
PostingsReaderBase docsReader = wrappedPostingsBaseFormat.postingsReaderBase(state);
PostingsReaderBase pulsingReader = new PulsingPostingsReader(docsReader);
boolean success = false;
try {
FieldsProducer ret = new BlockTreeTermsReader(
state.dir, state.fieldInfos, state.segmentInfo.name,
pulsingReader,
state.context,
state.segmentSuffix,
state.termsIndexDivisor);
success = true;
return ret;
} finally {
if (!success) {
pulsingReader.close();
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsWriter.java
Override
public void start(IndexOutput termsOut) throws IOException {
this.termsOut = termsOut;
CodecUtil.writeHeader(termsOut, CODEC, VERSION_CURRENT);
termsOut.writeVInt(pending.length); // encode maxPositions in header
wrappedPostingsWriter.start(termsOut);
}
// in lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsWriter.java
Override
public void startDoc(int docID, int termDocFreq) throws IOException {
assert docID >= 0: "got docID=" + docID;
/*
if (termID != -1) {
if (docID == 0) {
baseDocID = termID;
} else if (baseDocID + docID != termID) {
throw new RuntimeException("WRITE: baseDocID=" + baseDocID + " docID=" + docID + " termID=" + termID);
}
}
*/
if (DEBUG) System.out.println("PW doc=" + docID);
if (pendingCount == pending.length) {
push();
if (DEBUG) System.out.println("PW: wrapped.finishDoc");
wrappedPostingsWriter.finishDoc();
}
if (pendingCount != -1) {
assert pendingCount < pending.length;
currentDoc = pending[pendingCount];
currentDoc.docID = docID;
if (indexOptions == IndexOptions.DOCS_ONLY) {
pendingCount++;
} else if (indexOptions == IndexOptions.DOCS_AND_FREQS) {
pendingCount++;
currentDoc.termFreq = termDocFreq;
} else {
currentDoc.termFreq = termDocFreq;
}
} else {
// We've already seen too many docs for this term --
// just forward to our fallback writer
wrappedPostingsWriter.startDoc(docID, termDocFreq);
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsWriter.java
Override
public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException {
if (DEBUG) System.out.println("PW pos=" + position + " payload=" + (payload == null ? "null" : payload.length + " bytes"));
if (pendingCount == pending.length) {
push();
}
if (pendingCount == -1) {
// We've already seen too many docs for this term --
// just forward to our fallback writer
wrappedPostingsWriter.addPosition(position, payload, startOffset, endOffset);
} else {
// buffer up
final Position pos = pending[pendingCount++];
pos.pos = position;
pos.startOffset = startOffset;
pos.endOffset = endOffset;
pos.docID = currentDoc.docID;
if (payload != null && payload.length > 0) {
if (pos.payload == null) {
pos.payload = BytesRef.deepCopyOf(payload);
} else {
pos.payload.copyBytes(payload);
}
} else if (pos.payload != null) {
pos.payload.length = 0;
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsWriter.java
Override
public void finishDoc() throws IOException {
if (DEBUG) System.out.println("PW finishDoc");
if (pendingCount == -1) {
wrappedPostingsWriter.finishDoc();
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsWriter.java
Override
public void finishTerm(TermStats stats) throws IOException {
if (DEBUG) System.out.println("PW finishTerm docCount=" + stats.docFreq + " pendingCount=" + pendingCount + " pendingTerms.size()=" + pendingTerms.size());
assert pendingCount > 0 || pendingCount == -1;
if (pendingCount == -1) {
wrappedPostingsWriter.finishTerm(stats);
// Must add null entry to record terms that our
// wrapped postings impl added
pendingTerms.add(null);
} else {
// There were few enough total occurrences for this
// term, so we fully inline our postings data into
// terms dict, now:
// TODO: it'd be better to share this encoding logic
// in some inner codec that knows how to write a
// single doc / single position, etc. This way if a
// given codec wants to store other interesting
// stuff, it could use this pulsing codec to do so
if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
int lastDocID = 0;
int pendingIDX = 0;
int lastPayloadLength = -1;
int lastOffsetLength = -1;
while(pendingIDX < pendingCount) {
final Position doc = pending[pendingIDX];
final int delta = doc.docID - lastDocID;
lastDocID = doc.docID;
if (DEBUG) System.out.println(" write doc=" + doc.docID + " freq=" + doc.termFreq);
if (doc.termFreq == 1) {
buffer.writeVInt((delta<<1)|1);
} else {
buffer.writeVInt(delta<<1);
buffer.writeVInt(doc.termFreq);
}
int lastPos = 0;
int lastOffset = 0;
for(int posIDX=0;posIDX<doc.termFreq;posIDX++) {
final Position pos = pending[pendingIDX++];
assert pos.docID == doc.docID;
final int posDelta = pos.pos - lastPos;
lastPos = pos.pos;
if (DEBUG) System.out.println(" write pos=" + pos.pos);
final int payloadLength = pos.payload == null ? 0 : pos.payload.length;
if (storePayloads) {
if (payloadLength != lastPayloadLength) {
buffer.writeVInt((posDelta << 1)|1);
buffer.writeVInt(payloadLength);
lastPayloadLength = payloadLength;
} else {
buffer.writeVInt(posDelta << 1);
}
} else {
buffer.writeVInt(posDelta);
}
if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) {
//System.out.println("write=" + pos.startOffset + "," + pos.endOffset);
int offsetDelta = pos.startOffset - lastOffset;
int offsetLength = pos.endOffset - pos.startOffset;
if (offsetLength != lastOffsetLength) {
buffer.writeVInt(offsetDelta << 1 | 1);
buffer.writeVInt(offsetLength);
} else {
buffer.writeVInt(offsetDelta << 1);
}
lastOffset = pos.startOffset;
lastOffsetLength = offsetLength;
}
if (payloadLength > 0) {
assert storePayloads;
buffer.writeBytes(pos.payload.bytes, 0, pos.payload.length);
}
}
}
} else if (indexOptions == IndexOptions.DOCS_AND_FREQS) {
int lastDocID = 0;
for(int posIDX=0;posIDX<pendingCount;posIDX++) {
final Position doc = pending[posIDX];
final int delta = doc.docID - lastDocID;
assert doc.termFreq != 0;
if (doc.termFreq == 1) {
buffer.writeVInt((delta<<1)|1);
} else {
buffer.writeVInt(delta<<1);
buffer.writeVInt(doc.termFreq);
}
lastDocID = doc.docID;
}
} else if (indexOptions == IndexOptions.DOCS_ONLY) {
int lastDocID = 0;
for(int posIDX=0;posIDX<pendingCount;posIDX++) {
final Position doc = pending[posIDX];
buffer.writeVInt(doc.docID - lastDocID);
lastDocID = doc.docID;
}
}
final byte[] bytes = new byte[(int) buffer.getFilePointer()];
buffer.writeTo(bytes, 0);
pendingTerms.add(new PendingTerm(bytes));
buffer.reset();
}
pendingCount = 0;
}
// in lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsWriter.java
Override
public void close() throws IOException {
wrappedPostingsWriter.close();
}
// in lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsWriter.java
Override
public void flushTermsBlock(int start, int count) throws IOException {
if (DEBUG) System.out.println("PW: flushTermsBlock start=" + start + " count=" + count + " pendingTerms.size()=" + pendingTerms.size());
int wrappedCount = 0;
assert buffer.getFilePointer() == 0;
assert start >= count;
final int limit = pendingTerms.size() - start + count;
for(int idx=pendingTerms.size()-start; idx<limit; idx++) {
final PendingTerm term = pendingTerms.get(idx);
if (term == null) {
wrappedCount++;
} else {
buffer.writeVInt(term.bytes.length);
buffer.writeBytes(term.bytes, 0, term.bytes.length);
}
}
termsOut.writeVInt((int) buffer.getFilePointer());
buffer.writeTo(termsOut);
buffer.reset();
// TDOO: this could be somewhat costly since
// pendingTerms.size() could be biggish?
int futureWrappedCount = 0;
final int limit2 = pendingTerms.size();
for(int idx=limit;idx<limit2;idx++) {
if (pendingTerms.get(idx) == null) {
futureWrappedCount++;
}
}
// Remove the terms we just wrote:
pendingTerms.subList(pendingTerms.size()-start, limit).clear();
if (DEBUG) System.out.println("PW: len=" + buffer.getFilePointer() + " fp=" + termsOut.getFilePointer() + " futureWrappedCount=" + futureWrappedCount + " wrappedCount=" + wrappedCount);
// TODO: can we avoid calling this if all terms
// were inlined...? Eg for a "primary key" field, the
// wrapped codec is never invoked...
wrappedPostingsWriter.flushTermsBlock(futureWrappedCount+wrappedCount, wrappedCount);
}
// in lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsWriter.java
private void push() throws IOException {
if (DEBUG) System.out.println("PW now push @ " + pendingCount + " wrapped=" + wrappedPostingsWriter);
assert pendingCount == pending.length;
wrappedPostingsWriter.startTerm();
// Flush all buffered docs
if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
Position doc = null;
for(Position pos : pending) {
if (doc == null) {
doc = pos;
if (DEBUG) System.out.println("PW: wrapped.startDoc docID=" + doc.docID + " tf=" + doc.termFreq);
wrappedPostingsWriter.startDoc(doc.docID, doc.termFreq);
} else if (doc.docID != pos.docID) {
assert pos.docID > doc.docID;
if (DEBUG) System.out.println("PW: wrapped.finishDoc");
wrappedPostingsWriter.finishDoc();
doc = pos;
if (DEBUG) System.out.println("PW: wrapped.startDoc docID=" + doc.docID + " tf=" + doc.termFreq);
wrappedPostingsWriter.startDoc(doc.docID, doc.termFreq);
}
if (DEBUG) System.out.println("PW: wrapped.addPos pos=" + pos.pos);
wrappedPostingsWriter.addPosition(pos.pos, pos.payload, pos.startOffset, pos.endOffset);
}
//wrappedPostingsWriter.finishDoc();
} else {
for(Position doc : pending) {
wrappedPostingsWriter.startDoc(doc.docID, indexOptions == IndexOptions.DOCS_ONLY ? 0 : doc.termFreq);
}
}
pendingCount = -1;
}
// in lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java
Override
public void init(IndexInput termsIn) throws IOException {
CodecUtil.checkHeader(termsIn, PulsingPostingsWriter.CODEC,
PulsingPostingsWriter.VERSION_START, PulsingPostingsWriter.VERSION_START);
maxPositions = termsIn.readVInt();
wrappedPostingsReader.init(termsIn);
}
// in lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java
Override
public void readTermsBlock(IndexInput termsIn, FieldInfo fieldInfo, BlockTermState _termState) throws IOException {
//System.out.println("PR.readTermsBlock state=" + _termState);
final PulsingTermState termState = (PulsingTermState) _termState;
if (termState.inlinedBytes == null) {
termState.inlinedBytes = new byte[128];
termState.inlinedBytesReader = new ByteArrayDataInput();
}
int len = termsIn.readVInt();
//System.out.println(" len=" + len + " fp=" + termsIn.getFilePointer());
if (termState.inlinedBytes.length < len) {
termState.inlinedBytes = new byte[ArrayUtil.oversize(len, 1)];
}
termsIn.readBytes(termState.inlinedBytes, 0, len);
termState.inlinedBytesReader.reset(termState.inlinedBytes);
termState.wrappedTermState.termBlockOrd = 0;
wrappedPostingsReader.readTermsBlock(termsIn, fieldInfo, termState.wrappedTermState);
}
// in lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java
Override
public BlockTermState newTermState() throws IOException {
PulsingTermState state = new PulsingTermState();
state.wrappedTermState = wrappedPostingsReader.newTermState();
return state;
}
// in lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java
Override
public void nextTerm(FieldInfo fieldInfo, BlockTermState _termState) throws IOException {
//System.out.println("PR nextTerm");
PulsingTermState termState = (PulsingTermState) _termState;
// if we have positions, its total TF, otherwise its computed based on docFreq.
long count = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 ? termState.totalTermFreq : termState.docFreq;
//System.out.println(" count=" + count + " threshold=" + maxPositions);
if (count <= maxPositions) {
// Inlined into terms dict -- just read the byte[] blob in,
// but don't decode it now (we only decode when a DocsEnum
// or D&PEnum is pulled):
termState.postingsSize = termState.inlinedBytesReader.readVInt();
if (termState.postings == null || termState.postings.length < termState.postingsSize) {
termState.postings = new byte[ArrayUtil.oversize(termState.postingsSize, 1)];
}
// TODO: sort of silly to copy from one big byte[]
// (the blob holding all inlined terms' blobs for
// current term block) into another byte[] (just the
// blob for this term)...
termState.inlinedBytesReader.readBytes(termState.postings, 0, termState.postingsSize);
//System.out.println(" inlined bytes=" + termState.postingsSize);
} else {
//System.out.println(" not inlined");
termState.postingsSize = -1;
// TODO: should we do full copyFrom? much heavier...?
termState.wrappedTermState.docFreq = termState.docFreq;
termState.wrappedTermState.totalTermFreq = termState.totalTermFreq;
wrappedPostingsReader.nextTerm(fieldInfo, termState.wrappedTermState);
termState.wrappedTermState.termBlockOrd++;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java
Override
public DocsEnum docs(FieldInfo field, BlockTermState _termState, Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
if (needsFreqs && field.getIndexOptions() == IndexOptions.DOCS_ONLY) {
return null;
}
PulsingTermState termState = (PulsingTermState) _termState;
if (termState.postingsSize != -1) {
PulsingDocsEnum postings;
if (reuse instanceof PulsingDocsEnum) {
postings = (PulsingDocsEnum) reuse;
if (!postings.canReuse(field)) {
postings = new PulsingDocsEnum(field);
}
} else {
// the 'reuse' is actually the wrapped enum
PulsingDocsEnum previous = (PulsingDocsEnum) getOther(reuse);
if (previous != null && previous.canReuse(field)) {
postings = previous;
} else {
postings = new PulsingDocsEnum(field);
}
}
if (reuse != postings) {
setOther(postings, reuse); // postings.other = reuse
}
return postings.reset(liveDocs, termState);
} else {
if (reuse instanceof PulsingDocsEnum) {
DocsEnum wrapped = wrappedPostingsReader.docs(field, termState.wrappedTermState, liveDocs, getOther(reuse), needsFreqs);
setOther(wrapped, reuse); // wrapped.other = reuse
return wrapped;
} else {
return wrappedPostingsReader.docs(field, termState.wrappedTermState, liveDocs, reuse, needsFreqs);
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java
Override
public DocsAndPositionsEnum docsAndPositions(FieldInfo field, BlockTermState _termState, Bits liveDocs, DocsAndPositionsEnum reuse,
boolean needsOffsets) throws IOException {
if (field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
return null;
} else if (needsOffsets && field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) < 0) {
return null;
}
final PulsingTermState termState = (PulsingTermState) _termState;
if (termState.postingsSize != -1) {
PulsingDocsAndPositionsEnum postings;
if (reuse instanceof PulsingDocsAndPositionsEnum) {
postings = (PulsingDocsAndPositionsEnum) reuse;
if (!postings.canReuse(field)) {
postings = new PulsingDocsAndPositionsEnum(field);
}
} else {
// the 'reuse' is actually the wrapped enum
PulsingDocsAndPositionsEnum previous = (PulsingDocsAndPositionsEnum) getOther(reuse);
if (previous != null && previous.canReuse(field)) {
postings = previous;
} else {
postings = new PulsingDocsAndPositionsEnum(field);
}
}
if (reuse != postings) {
setOther(postings, reuse); // postings.other = reuse
}
return postings.reset(liveDocs, termState);
} else {
if (reuse instanceof PulsingDocsAndPositionsEnum) {
DocsAndPositionsEnum wrapped = wrappedPostingsReader.docsAndPositions(field, termState.wrappedTermState, liveDocs, (DocsAndPositionsEnum) getOther(reuse),
needsOffsets);
setOther(wrapped, reuse); // wrapped.other = reuse
return wrapped;
} else {
return wrappedPostingsReader.docsAndPositions(field, termState.wrappedTermState, liveDocs, reuse, needsOffsets);
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java
Override
public int nextDoc() throws IOException {
//System.out.println("PR nextDoc this= "+ this);
while(true) {
if (postings.eof()) {
//System.out.println("PR END");
return docID = NO_MORE_DOCS;
}
final int code = postings.readVInt();
//System.out.println(" read code=" + code);
if (indexOptions == IndexOptions.DOCS_ONLY) {
accum += code;
} else {
accum += code >>> 1; // shift off low bit
if ((code & 1) != 0) { // if low bit is set
freq = 1; // freq is one
} else {
freq = postings.readVInt(); // else read freq
}
if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
// Skip positions
if (storePayloads) {
for(int pos=0;pos<freq;pos++) {
final int posCode = postings.readVInt();
if ((posCode & 1) != 0) {
payloadLength = postings.readVInt();
}
if (storeOffsets && (postings.readVInt() & 1) != 0) {
// new offset length
postings.readVInt();
}
if (payloadLength != 0) {
postings.skipBytes(payloadLength);
}
}
} else {
for(int pos=0;pos<freq;pos++) {
// TODO: skipVInt
postings.readVInt();
if (storeOffsets && (postings.readVInt() & 1) != 0) {
// new offset length
postings.readVInt();
}
}
}
}
}
if (liveDocs == null || liveDocs.get(accum)) {
return (docID = accum);
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java
Override
public int freq() throws IOException {
assert indexOptions != IndexOptions.DOCS_ONLY;
return freq;
}
// in lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java
Override
public int advance(int target) throws IOException {
int doc;
while((doc=nextDoc()) != NO_MORE_DOCS) {
if (doc >= target)
return doc;
}
return docID = NO_MORE_DOCS;
}
// in lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java
Override
public int nextDoc() throws IOException {
//System.out.println("PR d&p nextDoc this=" + this);
while(true) {
//System.out.println(" cycle skip posPending=" + posPending);
skipPositions();
if (postings.eof()) {
//System.out.println("PR END");
return docID = NO_MORE_DOCS;
}
final int code = postings.readVInt();
accum += code >>> 1; // shift off low bit
if ((code & 1) != 0) { // if low bit is set
freq = 1; // freq is one
} else {
freq = postings.readVInt(); // else read freq
}
posPending = freq;
startOffset = storeOffsets ? 0 : -1; // always return -1 if no offsets are stored
if (liveDocs == null || liveDocs.get(accum)) {
//System.out.println(" return docID=" + docID + " freq=" + freq);
position = 0;
return (docID = accum);
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java
Override
public int freq() throws IOException {
return freq;
}
// in lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java
Override
public int advance(int target) throws IOException {
int doc;
while((doc=nextDoc()) != NO_MORE_DOCS) {
if (doc >= target) {
return docID = doc;
}
}
return docID = NO_MORE_DOCS;
}
// in lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java
Override
public int nextPosition() throws IOException {
//System.out.println("PR d&p nextPosition posPending=" + posPending + " vs freq=" + freq);
assert posPending > 0;
posPending--;
if (storePayloads) {
if (!payloadRetrieved) {
//System.out.println("PR skip payload=" + payloadLength);
postings.skipBytes(payloadLength);
}
final int code = postings.readVInt();
//System.out.println("PR code=" + code);
if ((code & 1) != 0) {
payloadLength = postings.readVInt();
//System.out.println("PR new payload len=" + payloadLength);
}
position += code >>> 1;
payloadRetrieved = false;
} else {
position += postings.readVInt();
}
if (storeOffsets) {
int offsetCode = postings.readVInt();
if ((offsetCode & 1) != 0) {
// new offset length
offsetLength = postings.readVInt();
}
startOffset += offsetCode >>> 1;
}
//System.out.println("PR d&p nextPos return pos=" + position + " this=" + this);
return position;
}
// in lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java
private void skipPositions() throws IOException {
while(posPending != 0) {
nextPosition();
}
if (storePayloads && !payloadRetrieved) {
//System.out.println(" skip payload len=" + payloadLength);
postings.skipBytes(payloadLength);
payloadRetrieved = true;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java
Override
public BytesRef getPayload() throws IOException {
//System.out.println("PR getPayload payloadLength=" + payloadLength + " this=" + this);
if (payloadRetrieved) {
throw new IOException("Either no payload exists at this term position or an attempt was made to load it more than once.");
}
payloadRetrieved = true;
if (payloadLength > 0) {
if (payload == null) {
payload = new BytesRef(payloadLength);
} else {
payload.grow(payloadLength);
}
postings.readBytes(payload.bytes, 0, payloadLength);
payload.length = payloadLength;
return payload;
} else {
return null;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java
Override
public void close() throws IOException {
wrappedPostingsReader.close();
}
// in lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
Override
public void startDoc(int docID, int termDocFreq) throws IOException {
//System.out.println(" startDoc docID=" + docID + " freq=" + termDocFreq);
final int delta = docID - lastDocID;
assert docID == 0 || delta > 0;
lastDocID = docID;
docCount++;
if (field.getIndexOptions() == IndexOptions.DOCS_ONLY) {
buffer.writeVInt(delta);
} else if (termDocFreq == 1) {
buffer.writeVInt((delta<<1) | 1);
} else {
buffer.writeVInt(delta<<1);
assert termDocFreq > 0;
buffer.writeVInt(termDocFreq);
}
lastPos = 0;
lastOffset = 0;
}
// in lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
Override
public void addPosition(int pos, BytesRef payload, int startOffset, int endOffset) throws IOException {
assert payload == null || field.hasPayloads();
//System.out.println(" addPos pos=" + pos + " payload=" + payload);
final int delta = pos - lastPos;
assert delta >= 0;
lastPos = pos;
int payloadLen = 0;
if (field.hasPayloads()) {
payloadLen = payload == null ? 0 : payload.length;
if (payloadLen != lastPayloadLen) {
lastPayloadLen = payloadLen;
buffer.writeVInt((delta<<1)|1);
buffer.writeVInt(payloadLen);
} else {
buffer.writeVInt(delta<<1);
}
} else {
buffer.writeVInt(delta);
}
if (field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) {
// don't use startOffset - lastEndOffset, because this creates lots of negative vints for synonyms,
// and the numbers aren't that much smaller anyways.
int offsetDelta = startOffset - lastOffset;
int offsetLength = endOffset - startOffset;
if (offsetLength != lastOffsetLength) {
buffer.writeVInt(offsetDelta << 1 | 1);
buffer.writeVInt(offsetLength);
} else {
buffer.writeVInt(offsetDelta << 1);
}
lastOffset = startOffset;
lastOffsetLength = offsetLength;
}
if (payloadLen > 0) {
buffer.writeBytes(payload.bytes, payload.offset, payloadLen);
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
Override
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
assert postingsWriter.docCount == stats.docFreq;
assert buffer2.getFilePointer() == 0;
buffer2.writeVInt(stats.docFreq);
if (field.getIndexOptions() != IndexOptions.DOCS_ONLY) {
buffer2.writeVLong(stats.totalTermFreq-stats.docFreq);
}
int pos = (int) buffer2.getFilePointer();
buffer2.writeTo(finalBuffer, 0);
buffer2.reset();
final int totalBytes = pos + (int) postingsWriter.buffer.getFilePointer();
if (totalBytes > finalBuffer.length) {
finalBuffer = ArrayUtil.grow(finalBuffer, totalBytes);
}
postingsWriter.buffer.writeTo(finalBuffer, pos);
postingsWriter.buffer.reset();
spare.bytes = finalBuffer;
spare.length = totalBytes;
//System.out.println(" finishTerm term=" + text.utf8ToString() + " " + totalBytes + " bytes totalTF=" + stats.totalTermFreq);
//for(int i=0;i<totalBytes;i++) {
// System.out.println(" " + Integer.toHexString(finalBuffer[i]&0xFF));
//}
builder.add(Util.toIntsRef(text, scratchIntsRef), BytesRef.deepCopyOf(spare));
termCount++;
}
// in lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
Override
public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException {
if (termCount > 0) {
out.writeVInt(termCount);
out.writeVInt(field.number);
if (field.getIndexOptions() != IndexOptions.DOCS_ONLY) {
out.writeVLong(sumTotalTermFreq);
}
out.writeVLong(sumDocFreq);
out.writeVInt(docCount);
FST<BytesRef> fst = builder.finish();
if (doPackFST) {
fst = fst.pack(3, Math.max(10, fst.getNodeCount()/4));
}
fst.save(out);
//System.out.println("finish field=" + field.name + " fp=" + out.getFilePointer());
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
final String fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION);
final IndexOutput out = state.directory.createOutput(fileName, state.context);
return new FieldsConsumer() {
@Override
public TermsConsumer addField(FieldInfo field) {
//System.out.println("\naddField field=" + field.name);
return new TermsWriter(out, field, doPackFST);
}
@Override
public void close() throws IOException {
// EOF marker:
try {
out.writeVInt(0);
} finally {
out.close();
}
}
};
}
// in lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
Override
public void close() throws IOException {
// EOF marker:
try {
out.writeVInt(0);
} finally {
out.close();
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
Override
public int freq() throws IOException {
assert indexOptions != IndexOptions.DOCS_ONLY;
return freq;
}
// in lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
Override
public int freq() throws IOException {
return freq;
}
// in lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
private void decodeMetaData() throws IOException {
if (!didDecode) {
buffer.reset(current.output.bytes, 0, current.output.length);
docFreq = buffer.readVInt();
if (field.getIndexOptions() != IndexOptions.DOCS_ONLY) {
totalTermFreq = docFreq + buffer.readVLong();
} else {
totalTermFreq = -1;
}
current.output.offset = buffer.getPosition();
//System.out.println(" df=" + docFreq + " totTF=" + totalTermFreq + " offset=" + buffer.getPosition() + " len=" + current.output.length);
didDecode = true;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
Override
public boolean seekExact(BytesRef text, boolean useCache /* ignored */) throws IOException {
//System.out.println("te.seekExact text=" + field.name + ":" + text.utf8ToString() + " this=" + this);
current = fstEnum.seekExact(text);
didDecode = false;
return current != null;
}
// in lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
Override
public SeekStatus seekCeil(BytesRef text, boolean useCache /* ignored */) throws IOException {
//System.out.println("te.seek text=" + field.name + ":" + text.utf8ToString() + " this=" + this);
current = fstEnum.seekCeil(text);
if (current == null) {
return SeekStatus.END;
} else {
// System.out.println(" got term=" + current.input.utf8ToString());
// for(int i=0;i<current.output.length;i++) {
// System.out.println(" " + Integer.toHexString(current.output.bytes[i]&0xFF));
// }
didDecode = false;
if (text.equals(current.input)) {
//System.out.println(" found!");
return SeekStatus.FOUND;
} else {
//System.out.println(" not found: " + current.input.utf8ToString());
return SeekStatus.NOT_FOUND;
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
decodeMetaData();
FSTDocsEnum docsEnum;
if (needsFreqs && field.getIndexOptions() == IndexOptions.DOCS_ONLY) {
return null;
} else if (reuse == null || !(reuse instanceof FSTDocsEnum)) {
docsEnum = new FSTDocsEnum(field.getIndexOptions(), field.hasPayloads());
} else {
docsEnum = (FSTDocsEnum) reuse;
if (!docsEnum.canReuse(field.getIndexOptions(), field.hasPayloads())) {
docsEnum = new FSTDocsEnum(field.getIndexOptions(), field.hasPayloads());
}
}
return docsEnum.reset(current.output, liveDocs, docFreq);
}
// in lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
Override
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
boolean hasOffsets = field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
if (needsOffsets && !hasOffsets) {
return null; // not available
}
if (field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
return null;
}
decodeMetaData();
FSTDocsAndPositionsEnum docsAndPositionsEnum;
if (reuse == null || !(reuse instanceof FSTDocsAndPositionsEnum)) {
docsAndPositionsEnum = new FSTDocsAndPositionsEnum(field.hasPayloads(), hasOffsets);
} else {
docsAndPositionsEnum = (FSTDocsAndPositionsEnum) reuse;
if (!docsAndPositionsEnum.canReuse(field.hasPayloads(), hasOffsets)) {
docsAndPositionsEnum = new FSTDocsAndPositionsEnum(field.hasPayloads(), hasOffsets);
}
}
//System.out.println("D&P reset this=" + this);
return docsAndPositionsEnum.reset(current.output, liveDocs, docFreq);
}
// in lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
Override
public BytesRef next() throws IOException {
//System.out.println("te.next");
current = fstEnum.next();
if (current == null) {
//System.out.println(" END");
return null;
}
didDecode = false;
//System.out.println(" term=" + field.name + ":" + current.input.utf8ToString());
return current.input;
}
// in lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
Override
public int docFreq() throws IOException {
decodeMetaData();
return docFreq;
}
// in lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
Override
public long totalTermFreq() throws IOException {
decodeMetaData();
return totalTermFreq;
}
// in lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
Override
public long getSumDocFreq() throws IOException {
return sumDocFreq;
}
// in lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
Override
public int getDocCount() throws IOException {
return docCount;
}
// in lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
Override
public long size() throws IOException {
return termCount;
}
// in lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
final String fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION);
final IndexInput in = state.dir.openInput(fileName, IOContext.READONCE);
final SortedMap<String,TermsReader> fields = new TreeMap<String,TermsReader>();
try {
while(true) {
final int termCount = in.readVInt();
if (termCount == 0) {
break;
}
final TermsReader termsReader = new TermsReader(state.fieldInfos, in, termCount);
// System.out.println("load field=" + termsReader.field.name);
fields.put(termsReader.field.name, termsReader);
}
} finally {
in.close();
}
return new FieldsProducer() {
@Override
public FieldsEnum iterator() {
final Iterator<TermsReader> iter = fields.values().iterator();
return new FieldsEnum() {
private TermsReader current;
@Override
public String next() {
current = iter.next();
return current.field.name;
}
@Override
public Terms terms() {
return current;
}
};
}
@Override
public Terms terms(String field) {
return fields.get(field);
}
@Override
public int size() {
return fields.size();
}
@Override
public void close() {
// Drop ref to FST:
for(TermsReader termsReader : fields.values()) {
termsReader.fst = null;
}
}
};
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
protected void readHeader(IndexInput input) throws IOException {
CodecUtil.checkHeader(input, BlockTreeTermsWriter.TERMS_CODEC_NAME,
BlockTreeTermsWriter.TERMS_VERSION_START,
BlockTreeTermsWriter.TERMS_VERSION_CURRENT);
dirOffset = input.readLong();
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
protected void readIndexHeader(IndexInput input) throws IOException {
CodecUtil.checkHeader(input, BlockTreeTermsWriter.TERMS_INDEX_CODEC_NAME,
BlockTreeTermsWriter.TERMS_INDEX_VERSION_START,
BlockTreeTermsWriter.TERMS_INDEX_VERSION_CURRENT);
indexDirOffset = input.readLong();
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
protected void seekDir(IndexInput input, long dirOffset)
throws IOException {
input.seek(dirOffset);
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
Override
public void close() throws IOException {
try {
IOUtils.close(in, postingsReader);
} finally {
// Clear so refs to terms index is GCable even if
// app hangs onto us:
fields.clear();
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
Override
public Terms terms(String field) throws IOException {
assert field != null;
return fields.get(field);
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
Override
public Terms terms() throws IOException {
return current;
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
public Stats computeStats() throws IOException {
return new SegmentTermsEnum().computeBlockStats();
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
Override
public TermsEnum iterator(TermsEnum reuse) throws IOException {
return new SegmentTermsEnum();
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
Override
public int getDocCount() throws IOException {
return docCount;
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
Override
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
}
return new IntersectEnum(compiled, startTerm);
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
void loadNextFloorBlock() throws IOException {
assert numFollowFloorBlocks > 0;
//if (DEBUG) System.out.println(" loadNextFoorBlock trans=" + transitions[transitionIndex]);
do {
fp = fpOrig + (floorDataReader.readVLong() >>> 1);
numFollowFloorBlocks--;
// if (DEBUG) System.out.println(" skip floor block2! nextFloorLabel=" + (char) nextFloorLabel + " vs target=" + (char) transitions[transitionIndex].getMin() + " newFP=" + fp + " numFollowFloorBlocks=" + numFollowFloorBlocks);
if (numFollowFloorBlocks != 0) {
nextFloorLabel = floorDataReader.readByte() & 0xff;
} else {
nextFloorLabel = 256;
}
// if (DEBUG) System.out.println(" nextFloorLabel=" + (char) nextFloorLabel);
} while (numFollowFloorBlocks != 0 && nextFloorLabel <= transitions[transitionIndex].getMin());
load(null);
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
void load(BytesRef frameIndexData) throws IOException {
// if (DEBUG) System.out.println(" load fp=" + fp + " fpOrig=" + fpOrig + " frameIndexData=" + frameIndexData + " trans=" + (transitions.length != 0 ? transitions[0] : "n/a" + " state=" + state));
if (frameIndexData != null && transitions.length != 0) {
// Floor frame
if (floorData.length < frameIndexData.length) {
this.floorData = new byte[ArrayUtil.oversize(frameIndexData.length, 1)];
}
System.arraycopy(frameIndexData.bytes, frameIndexData.offset, floorData, 0, frameIndexData.length);
floorDataReader.reset(floorData, 0, frameIndexData.length);
// Skip first long -- has redundant fp, hasTerms
// flag, isFloor flag
final long code = floorDataReader.readVLong();
if ((code & BlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR) != 0) {
numFollowFloorBlocks = floorDataReader.readVInt();
nextFloorLabel = floorDataReader.readByte() & 0xff;
// if (DEBUG) System.out.println(" numFollowFloorBlocks=" + numFollowFloorBlocks + " nextFloorLabel=" + nextFloorLabel);
// If current state is accept, we must process
// first block in case it has empty suffix:
if (!runAutomaton.isAccept(state)) {
// Maybe skip floor blocks:
while (numFollowFloorBlocks != 0 && nextFloorLabel <= transitions[0].getMin()) {
fp = fpOrig + (floorDataReader.readVLong() >>> 1);
numFollowFloorBlocks--;
// if (DEBUG) System.out.println(" skip floor block! nextFloorLabel=" + (char) nextFloorLabel + " vs target=" + (char) transitions[0].getMin() + " newFP=" + fp + " numFollowFloorBlocks=" + numFollowFloorBlocks);
if (numFollowFloorBlocks != 0) {
nextFloorLabel = floorDataReader.readByte() & 0xff;
} else {
nextFloorLabel = 256;
}
}
}
}
}
in.seek(fp);
int code = in.readVInt();
entCount = code >>> 1;
assert entCount > 0;
isLastInFloor = (code & 1) != 0;
// term suffixes:
code = in.readVInt();
isLeafBlock = (code & 1) != 0;
int numBytes = code >>> 1;
// if (DEBUG) System.out.println(" entCount=" + entCount + " lastInFloor?=" + isLastInFloor + " leafBlock?=" + isLeafBlock + " numSuffixBytes=" + numBytes);
if (suffixBytes.length < numBytes) {
suffixBytes = new byte[ArrayUtil.oversize(numBytes, 1)];
}
in.readBytes(suffixBytes, 0, numBytes);
suffixesReader.reset(suffixBytes, 0, numBytes);
// stats
numBytes = in.readVInt();
if (statBytes.length < numBytes) {
statBytes = new byte[ArrayUtil.oversize(numBytes, 1)];
}
in.readBytes(statBytes, 0, numBytes);
statsReader.reset(statBytes, 0, numBytes);
metaDataUpto = 0;
termState.termBlockOrd = 0;
nextEnt = 0;
postingsReader.readTermsBlock(in, fieldInfo, termState);
if (!isLastInFloor) {
// Sub-blocks of a single floor block are always
// written one after another -- tail recurse:
fpEnd = in.getFilePointer();
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
public void decodeMetaData() throws IOException {
// lazily catch up on metadata decode:
final int limit = getTermBlockOrd();
assert limit > 0;
// We must set/incr state.termCount because
// postings impl can look at this
termState.termBlockOrd = metaDataUpto;
// TODO: better API would be "jump straight to term=N"???
while (metaDataUpto < limit) {
// TODO: we could make "tiers" of metadata, ie,
// decode docFreq/totalTF but don't decode postings
// metadata; this way caller could get
// docFreq/totalTF w/o paying decode cost for
// postings
// TODO: if docFreq were bulk decoded we could
// just skipN here:
termState.docFreq = statsReader.readVInt();
//if (DEBUG) System.out.println(" dF=" + state.docFreq);
if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
termState.totalTermFreq = termState.docFreq + statsReader.readVLong();
//if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq);
}
postingsReader.nextTerm(fieldInfo, termState);
metaDataUpto++;
termState.termBlockOrd++;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
Override
public TermState termState() throws IOException {
currentFrame.decodeMetaData();
return currentFrame.termState.clone();
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
private Frame getFrame(int ord) throws IOException {
if (ord >= stack.length) {
final Frame[] next = new Frame[ArrayUtil.oversize(1+ord, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
System.arraycopy(stack, 0, next, 0, stack.length);
for(int stackOrd=stack.length;stackOrd<next.length;stackOrd++) {
next[stackOrd] = new Frame(stackOrd);
}
stack = next;
}
assert stack[ord].ord == ord;
return stack[ord];
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
private Frame pushFrame(int state) throws IOException {
final Frame f = getFrame(currentFrame == null ? 0 : 1+currentFrame.ord);
f.fp = f.fpOrig = currentFrame.lastSubFP;
f.prefix = currentFrame.prefix + currentFrame.suffix;
// if (DEBUG) System.out.println(" pushFrame state=" + state + " prefix=" + f.prefix);
f.setState(state);
// Walk the arc through the index -- we only
// "bother" with this so we can get the floor data
// from the index and skip floor blocks when
// possible:
FST.Arc<BytesRef> arc = currentFrame.arc;
int idx = currentFrame.prefix;
assert currentFrame.suffix > 0;
BytesRef output = currentFrame.outputPrefix;
while (idx < f.prefix) {
final int target = term.bytes[idx] & 0xff;
// TODO: we could be more efficient for the next()
// case by using current arc as starting point,
// passed to findTargetArc
arc = index.findTargetArc(target, arc, getArc(1+idx), fstReader);
assert arc != null;
output = fstOutputs.add(output, arc.output);
idx++;
}
f.arc = arc;
f.outputPrefix = output;
assert arc.isFinal();
f.load(fstOutputs.add(output, arc.nextFinalOutput));
return f;
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
Override
public BytesRef term() throws IOException {
return term;
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
Override
public int docFreq() throws IOException {
//if (DEBUG) System.out.println("BTIR.docFreq");
currentFrame.decodeMetaData();
//if (DEBUG) System.out.println(" return " + currentFrame.termState.docFreq);
return currentFrame.termState.docFreq;
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
Override
public long totalTermFreq() throws IOException {
currentFrame.decodeMetaData();
return currentFrame.termState.totalTermFreq;
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
Override
public DocsEnum docs(Bits skipDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
currentFrame.decodeMetaData();
return postingsReader.docs(fieldInfo, currentFrame.termState, skipDocs, reuse, needsFreqs);
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
Override
public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
// Positions were not indexed:
return null;
}
if (needsOffsets &&
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) < 0) {
// Offsets were not indexed:
return null;
}
currentFrame.decodeMetaData();
return postingsReader.docsAndPositions(fieldInfo, currentFrame.termState, skipDocs, reuse, needsOffsets);
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
private void seekToStartTerm(BytesRef target) throws IOException {
//if (DEBUG) System.out.println("seek to startTerm=" + target.utf8ToString());
assert currentFrame.ord == 0;
if (term.length < target.length) {
term.bytes = ArrayUtil.grow(term.bytes, target.length);
}
FST.Arc<BytesRef> arc = arcs[0];
assert arc == currentFrame.arc;
for(int idx=0;idx<=target.length;idx++) {
while (true) {
final int savePos = currentFrame.suffixesReader.getPosition();
final int saveStartBytePos = currentFrame.startBytePos;
final int saveSuffix = currentFrame.suffix;
final long saveLastSubFP = currentFrame.lastSubFP;
final int saveTermBlockOrd = currentFrame.termState.termBlockOrd;
final boolean isSubBlock = currentFrame.next();
//if (DEBUG) System.out.println(" cycle ent=" + currentFrame.nextEnt + " (of " + currentFrame.entCount + ") prefix=" + currentFrame.prefix + " suffix=" + currentFrame.suffix + " isBlock=" + isSubBlock + " firstLabel=" + (currentFrame.suffix == 0 ? "" : (currentFrame.suffixBytes[currentFrame.startBytePos])&0xff));
term.length = currentFrame.prefix + currentFrame.suffix;
if (term.bytes.length < term.length) {
term.bytes = ArrayUtil.grow(term.bytes, term.length);
}
System.arraycopy(currentFrame.suffixBytes, currentFrame.startBytePos, term.bytes, currentFrame.prefix, currentFrame.suffix);
if (isSubBlock && StringHelper.startsWith(target, term)) {
// Recurse
//if (DEBUG) System.out.println(" recurse!");
currentFrame = pushFrame(getState());
break;
} else {
final int cmp = term.compareTo(target);
if (cmp < 0) {
if (currentFrame.nextEnt == currentFrame.entCount) {
if (!currentFrame.isLastInFloor) {
//if (DEBUG) System.out.println(" load floorBlock");
currentFrame.loadNextFloorBlock();
continue;
} else {
//if (DEBUG) System.out.println(" return term=" + brToString(term));
return;
}
}
continue;
} else if (cmp == 0) {
//if (DEBUG) System.out.println(" return term=" + brToString(term));
return;
} else {
// Fallback to prior entry: the semantics of
// this method is that the first call to
// next() will return the term after the
// requested term
currentFrame.nextEnt--;
currentFrame.lastSubFP = saveLastSubFP;
currentFrame.startBytePos = saveStartBytePos;
currentFrame.suffix = saveSuffix;
currentFrame.suffixesReader.setPosition(savePos);
currentFrame.termState.termBlockOrd = saveTermBlockOrd;
System.arraycopy(currentFrame.suffixBytes, currentFrame.startBytePos, term.bytes, currentFrame.prefix, currentFrame.suffix);
term.length = currentFrame.prefix + currentFrame.suffix;
// If the last entry was a block we don't
// need to bother recursing and pushing to
// the last term under it because the first
// next() will simply skip the frame anyway
return;
}
}
}
}
assert false;
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
Override
public BytesRef next() throws IOException {
// if (DEBUG) {
// System.out.println("\nintEnum.next seg=" + segment);
// System.out.println(" frame ord=" + currentFrame.ord + " prefix=" + brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" : currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" + currentFrame.outputPrefix);
// }
nextTerm:
while(true) {
// Pop finished frames
while (currentFrame.nextEnt == currentFrame.entCount) {
if (!currentFrame.isLastInFloor) {
//if (DEBUG) System.out.println(" next-floor-block");
currentFrame.loadNextFloorBlock();
//if (DEBUG) System.out.println("\n frame ord=" + currentFrame.ord + " prefix=" + brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" : currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" + currentFrame.outputPrefix);
} else {
//if (DEBUG) System.out.println(" pop frame");
if (currentFrame.ord == 0) {
return null;
}
final long lastFP = currentFrame.fpOrig;
currentFrame = stack[currentFrame.ord-1];
assert currentFrame.lastSubFP == lastFP;
//if (DEBUG) System.out.println("\n frame ord=" + currentFrame.ord + " prefix=" + brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" : currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" + currentFrame.outputPrefix);
}
}
final boolean isSubBlock = currentFrame.next();
// if (DEBUG) {
// final BytesRef suffixRef = new BytesRef();
// suffixRef.bytes = currentFrame.suffixBytes;
// suffixRef.offset = currentFrame.startBytePos;
// suffixRef.length = currentFrame.suffix;
// System.out.println(" " + (isSubBlock ? "sub-block" : "term") + " " + currentFrame.nextEnt + " (of " + currentFrame.entCount + ") suffix=" + brToString(suffixRef));
// }
if (currentFrame.suffix != 0) {
final int label = currentFrame.suffixBytes[currentFrame.startBytePos] & 0xff;
while (label > currentFrame.curTransitionMax) {
if (currentFrame.transitionIndex >= currentFrame.transitions.length-1) {
// Stop processing this frame -- no further
// matches are possible because we've moved
// beyond what the max transition will allow
//if (DEBUG) System.out.println(" break: trans=" + (currentFrame.transitions.length == 0 ? "n/a" : currentFrame.transitions[currentFrame.transitionIndex]));
// sneaky! forces a pop above
currentFrame.isLastInFloor = true;
currentFrame.nextEnt = currentFrame.entCount;
continue nextTerm;
}
currentFrame.transitionIndex++;
currentFrame.curTransitionMax = currentFrame.transitions[currentFrame.transitionIndex].getMax();
//if (DEBUG) System.out.println(" next trans=" + currentFrame.transitions[currentFrame.transitionIndex]);
}
}
// First test the common suffix, if set:
if (compiledAutomaton.commonSuffixRef != null && !isSubBlock) {
final int termLen = currentFrame.prefix + currentFrame.suffix;
if (termLen < compiledAutomaton.commonSuffixRef.length) {
// No match
// if (DEBUG) {
// System.out.println(" skip: common suffix length");
// }
continue nextTerm;
}
final byte[] suffixBytes = currentFrame.suffixBytes;
final byte[] commonSuffixBytes = compiledAutomaton.commonSuffixRef.bytes;
final int lenInPrefix = compiledAutomaton.commonSuffixRef.length - currentFrame.suffix;
assert compiledAutomaton.commonSuffixRef.offset == 0;
int suffixBytesPos;
int commonSuffixBytesPos = 0;
if (lenInPrefix > 0) {
// A prefix of the common suffix overlaps with
// the suffix of the block prefix so we first
// test whether the prefix part matches:
final byte[] termBytes = term.bytes;
int termBytesPos = currentFrame.prefix - lenInPrefix;
assert termBytesPos >= 0;
final int termBytesPosEnd = currentFrame.prefix;
while (termBytesPos < termBytesPosEnd) {
if (termBytes[termBytesPos++] != commonSuffixBytes[commonSuffixBytesPos++]) {
// if (DEBUG) {
// System.out.println(" skip: common suffix mismatch (in prefix)");
// }
continue nextTerm;
}
}
suffixBytesPos = currentFrame.startBytePos;
} else {
suffixBytesPos = currentFrame.startBytePos + currentFrame.suffix - compiledAutomaton.commonSuffixRef.length;
}
// Test overlapping suffix part:
final int commonSuffixBytesPosEnd = compiledAutomaton.commonSuffixRef.length;
while (commonSuffixBytesPos < commonSuffixBytesPosEnd) {
if (suffixBytes[suffixBytesPos++] != commonSuffixBytes[commonSuffixBytesPos++]) {
// if (DEBUG) {
// System.out.println(" skip: common suffix mismatch");
// }
continue nextTerm;
}
}
}
// TODO: maybe we should do the same linear test
// that AutomatonTermsEnum does, so that if we
// reach a part of the automaton where .* is
// "temporarily" accepted, we just blindly .next()
// until the limit
// See if the term prefix matches the automaton:
int state = currentFrame.state;
for (int idx=0;idx<currentFrame.suffix;idx++) {
state = runAutomaton.step(state, currentFrame.suffixBytes[currentFrame.startBytePos+idx] & 0xff);
if (state == -1) {
// No match
//System.out.println(" no s=" + state);
continue nextTerm;
} else {
//System.out.println(" c s=" + state);
}
}
if (isSubBlock) {
// Match! Recurse:
//if (DEBUG) System.out.println(" sub-block match to state=" + state + "; recurse fp=" + currentFrame.lastSubFP);
copyTerm();
currentFrame = pushFrame(state);
//if (DEBUG) System.out.println("\n frame ord=" + currentFrame.ord + " prefix=" + brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" : currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" + currentFrame.outputPrefix);
} else if (runAutomaton.isAccept(state)) {
copyTerm();
//if (DEBUG) System.out.println(" term match to state=" + state + "; return term=" + brToString(term));
assert savedStartTerm == null || term.compareTo(savedStartTerm) > 0: "saveStartTerm=" + savedStartTerm.utf8ToString() + " term=" + term.utf8ToString();
return term;
} else {
//System.out.println(" no s=" + state);
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
Override
public boolean seekExact(BytesRef text, boolean useCache) throws IOException {
throw new UnsupportedOperationException();
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
Override
public void seekExact(long ord) throws IOException {
throw new UnsupportedOperationException();
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
Override
public long ord() throws IOException {
throw new UnsupportedOperationException();
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
Override
public SeekStatus seekCeil(BytesRef text, boolean useCache) throws IOException {
throw new UnsupportedOperationException();
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
public Stats computeBlockStats() throws IOException {
Stats stats = new Stats(segment, fieldInfo.name);
if (index != null) {
stats.indexNodeCount = index.getNodeCount();
stats.indexArcCount = index.getArcCount();
stats.indexNumBytes = index.sizeInBytes();
}
currentFrame = staticFrame;
FST.Arc<BytesRef> arc;
if (index != null) {
arc = index.getFirstArc(arcs[0]);
// Empty string prefix must have an output in the index!
assert arc.isFinal();
} else {
arc = null;
}
// Empty string prefix must have an output in the
// index!
currentFrame = pushFrame(arc, rootCode, 0);
currentFrame.fpOrig = currentFrame.fp;
currentFrame.loadBlock();
validIndexPrefix = 0;
stats.startBlock(currentFrame, !currentFrame.isLastInFloor);
allTerms:
while (true) {
// Pop finished blocks
while (currentFrame.nextEnt == currentFrame.entCount) {
stats.endBlock(currentFrame);
if (!currentFrame.isLastInFloor) {
currentFrame.loadNextFloorBlock();
stats.startBlock(currentFrame, true);
} else {
if (currentFrame.ord == 0) {
break allTerms;
}
final long lastFP = currentFrame.fpOrig;
currentFrame = stack[currentFrame.ord-1];
assert lastFP == currentFrame.lastSubFP;
// if (DEBUG) {
// System.out.println(" reset validIndexPrefix=" + validIndexPrefix);
// }
}
}
while(true) {
if (currentFrame.next()) {
// Push to new block:
currentFrame = pushFrame(null, currentFrame.lastSubFP, term.length);
currentFrame.fpOrig = currentFrame.fp;
// This is a "next" frame -- even if it's
// floor'd we must pretend it isn't so we don't
// try to scan to the right floor frame:
currentFrame.isFloor = false;
//currentFrame.hasTerms = true;
currentFrame.loadBlock();
stats.startBlock(currentFrame, !currentFrame.isLastInFloor);
} else {
stats.term(term);
break;
}
}
}
stats.finish();
// Put root frame back:
currentFrame = staticFrame;
if (index != null) {
arc = index.getFirstArc(arcs[0]);
// Empty string prefix must have an output in the index!
assert arc.isFinal();
} else {
arc = null;
}
currentFrame = pushFrame(arc, rootCode, 0);
currentFrame.rewind();
currentFrame.loadBlock();
validIndexPrefix = 0;
term.length = 0;
return stats;
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
private Frame getFrame(int ord) throws IOException {
if (ord >= stack.length) {
final Frame[] next = new Frame[ArrayUtil.oversize(1+ord, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
System.arraycopy(stack, 0, next, 0, stack.length);
for(int stackOrd=stack.length;stackOrd<next.length;stackOrd++) {
next[stackOrd] = new Frame(stackOrd);
}
stack = next;
}
assert stack[ord].ord == ord;
return stack[ord];
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
Frame pushFrame(FST.Arc<BytesRef> arc, BytesRef frameData, int length) throws IOException {
scratchReader.reset(frameData.bytes, frameData.offset, frameData.length);
final long code = scratchReader.readVLong();
final long fpSeek = code >>> BlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS;
final Frame f = getFrame(1+currentFrame.ord);
f.hasTerms = (code & BlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS) != 0;
f.hasTermsOrig = f.hasTerms;
f.isFloor = (code & BlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR) != 0;
if (f.isFloor) {
f.setFloorData(scratchReader, frameData);
}
pushFrame(arc, fpSeek, length);
return f;
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
Frame pushFrame(FST.Arc<BytesRef> arc, long fp, int length) throws IOException {
final Frame f = getFrame(1+currentFrame.ord);
f.arc = arc;
if (f.fpOrig == fp && f.nextEnt != -1) {
//if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp + " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" + f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" + term.length + " vs prefix=" + f.prefix);
if (f.prefix > targetBeforeCurrentLength) {
f.rewind();
} else {
// if (DEBUG) {
// System.out.println(" skip rewind!");
// }
}
assert length == f.prefix;
} else {
f.nextEnt = -1;
f.prefix = length;
f.state.termBlockOrd = 0;
f.fpOrig = f.fp = fp;
f.lastSubFP = -1;
// if (DEBUG) {
// final int sav = term.length;
// term.length = length;
// System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " pref=" + brToString(term));
// term.length = sav;
// }
}
return f;
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
Override
public boolean seekExact(final BytesRef target, final boolean useCache) throws IOException {
if (index == null) {
throw new IllegalStateException("terms index was not loaded");
}
if (term.bytes.length <= target.length) {
term.bytes = ArrayUtil.grow(term.bytes, 1+target.length);
}
assert clearEOF();
// if (DEBUG) {
// System.out.println("\nBTTR.seekExact seg=" + segment + " target=" + fieldInfo.name + ":" + brToString(target) + " current=" + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix);
// printSeekState();
// }
FST.Arc<BytesRef> arc;
int targetUpto;
BytesRef output;
targetBeforeCurrentLength = currentFrame.ord;
if (currentFrame != staticFrame) {
// We are already seek'd; find the common
// prefix of new seek term vs current term and
// re-use the corresponding seek state. For
// example, if app first seeks to foobar, then
// seeks to foobaz, we can re-use the seek state
// for the first 5 bytes.
// if (DEBUG) {
// System.out.println(" re-use current seek state validIndexPrefix=" + validIndexPrefix);
// }
arc = arcs[0];
assert arc.isFinal();
output = arc.output;
targetUpto = 0;
Frame lastFrame = stack[0];
assert validIndexPrefix <= term.length;
final int targetLimit = Math.min(target.length, validIndexPrefix);
int cmp = 0;
// TODO: reverse vLong byte order for better FST
// prefix output sharing
// First compare up to valid seek frames:
while (targetUpto < targetLimit) {
cmp = (term.bytes[targetUpto]&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
// if (DEBUG) {
// System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output + " output=" + output);
// }
if (cmp != 0) {
break;
}
arc = arcs[1+targetUpto];
//if (arc.label != (target.bytes[target.offset + targetUpto] & 0xFF)) {
//System.out.println("FAIL: arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF));
//}
assert arc.label == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
if (arc.output != NO_OUTPUT) {
output = fstOutputs.add(output, arc.output);
}
if (arc.isFinal()) {
lastFrame = stack[1+lastFrame.ord];
}
targetUpto++;
}
if (cmp == 0) {
final int targetUptoMid = targetUpto;
// Second compare the rest of the term, but
// don't save arc/output/frame; we only do this
// to find out if the target term is before,
// equal or after the current term
final int targetLimit2 = Math.min(target.length, term.length);
while (targetUpto < targetLimit2) {
cmp = (term.bytes[targetUpto]&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
// if (DEBUG) {
// System.out.println(" cycle2 targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")");
// }
if (cmp != 0) {
break;
}
targetUpto++;
}
if (cmp == 0) {
cmp = term.length - target.length;
}
targetUpto = targetUptoMid;
}
if (cmp < 0) {
// Common case: target term is after current
// term, ie, app is seeking multiple terms
// in sorted order
// if (DEBUG) {
// System.out.println(" target is after current (shares prefixLen=" + targetUpto + "); frame.ord=" + lastFrame.ord);
// }
currentFrame = lastFrame;
} else if (cmp > 0) {
// Uncommon case: target term
// is before current term; this means we can
// keep the currentFrame but we must rewind it
// (so we scan from the start)
targetBeforeCurrentLength = 0;
// if (DEBUG) {
// System.out.println(" target is before current (shares prefixLen=" + targetUpto + "); rewind frame ord=" + lastFrame.ord);
// }
currentFrame = lastFrame;
currentFrame.rewind();
} else {
// Target is exactly the same as current term
assert term.length == target.length;
if (termExists) {
// if (DEBUG) {
// System.out.println(" target is same as current; return true");
// }
return true;
} else {
// if (DEBUG) {
// System.out.println(" target is same as current but term doesn't exist");
// }
}
//validIndexPrefix = currentFrame.depth;
//term.length = target.length;
//return termExists;
}
} else {
targetBeforeCurrentLength = -1;
arc = index.getFirstArc(arcs[0]);
// Empty string prefix must have an output (block) in the index!
assert arc.isFinal();
assert arc.output != null;
// if (DEBUG) {
// System.out.println(" no seek state; push root frame");
// }
output = arc.output;
currentFrame = staticFrame;
//term.length = 0;
targetUpto = 0;
currentFrame = pushFrame(arc, fstOutputs.add(output, arc.nextFinalOutput), 0);
}
// if (DEBUG) {
// System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" + targetBeforeCurrentLength);
// }
while (targetUpto < target.length) {
final int targetLabel = target.bytes[target.offset + targetUpto] & 0xFF;
final FST.Arc<BytesRef> nextArc = index.findTargetArc(targetLabel, arc, getArc(1+targetUpto), fstReader);
if (nextArc == null) {
// Index is exhausted
// if (DEBUG) {
// System.out.println(" index: index exhausted label=" + ((char) targetLabel) + " " + toHex(targetLabel));
// }
validIndexPrefix = currentFrame.prefix;
//validIndexPrefix = targetUpto;
currentFrame.scanToFloorFrame(target);
if (!currentFrame.hasTerms) {
termExists = false;
term.bytes[targetUpto] = (byte) targetLabel;
term.length = 1+targetUpto;
// if (DEBUG) {
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
// }
return false;
}
currentFrame.loadBlock();
final SeekStatus result = currentFrame.scanToTerm(target, true);
if (result == SeekStatus.FOUND) {
// if (DEBUG) {
// System.out.println(" return FOUND term=" + term.utf8ToString() + " " + term);
// }
return true;
} else {
// if (DEBUG) {
// System.out.println(" got " + result + "; return NOT_FOUND term=" + brToString(term));
// }
return false;
}
} else {
// Follow this arc
arc = nextArc;
term.bytes[targetUpto] = (byte) targetLabel;
// Aggregate output as we go:
assert arc.output != null;
if (arc.output != NO_OUTPUT) {
output = fstOutputs.add(output, arc.output);
}
// if (DEBUG) {
// System.out.println(" index: follow label=" + toHex(target.bytes[target.offset + targetUpto]&0xff) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput);
// }
targetUpto++;
if (arc.isFinal()) {
//if (DEBUG) System.out.println(" arc is final!");
currentFrame = pushFrame(arc, fstOutputs.add(output, arc.nextFinalOutput), targetUpto);
//if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" + currentFrame.hasTerms);
}
}
}
//validIndexPrefix = targetUpto;
validIndexPrefix = currentFrame.prefix;
currentFrame.scanToFloorFrame(target);
// Target term is entirely contained in the index:
if (!currentFrame.hasTerms) {
termExists = false;
term.length = targetUpto;
// if (DEBUG) {
// System.out.println(" FAST NOT_FOUND term=" + brToString(term));
// }
return false;
}
currentFrame.loadBlock();
final SeekStatus result = currentFrame.scanToTerm(target, true);
if (result == SeekStatus.FOUND) {
// if (DEBUG) {
// System.out.println(" return FOUND term=" + term.utf8ToString() + " " + term);
// }
return true;
} else {
// if (DEBUG) {
// System.out.println(" got result " + result + "; return NOT_FOUND term=" + term.utf8ToString());
// }
return false;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
Override
public SeekStatus seekCeil(final BytesRef target, final boolean useCache) throws IOException {
if (index == null) {
throw new IllegalStateException("terms index was not loaded");
}
if (term.bytes.length <= target.length) {
term.bytes = ArrayUtil.grow(term.bytes, 1+target.length);
}
assert clearEOF();
//if (DEBUG) {
//System.out.println("\nBTTR.seekCeil seg=" + segment + " target=" + fieldInfo.name + ":" + target.utf8ToString() + " " + target + " current=" + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix);
//printSeekState();
//}
FST.Arc<BytesRef> arc;
int targetUpto;
BytesRef output;
targetBeforeCurrentLength = currentFrame.ord;
if (currentFrame != staticFrame) {
// We are already seek'd; find the common
// prefix of new seek term vs current term and
// re-use the corresponding seek state. For
// example, if app first seeks to foobar, then
// seeks to foobaz, we can re-use the seek state
// for the first 5 bytes.
//if (DEBUG) {
//System.out.println(" re-use current seek state validIndexPrefix=" + validIndexPrefix);
//}
arc = arcs[0];
assert arc.isFinal();
output = arc.output;
targetUpto = 0;
Frame lastFrame = stack[0];
assert validIndexPrefix <= term.length;
final int targetLimit = Math.min(target.length, validIndexPrefix);
int cmp = 0;
// TOOD: we should write our vLong backwards (MSB
// first) to get better sharing from the FST
// First compare up to valid seek frames:
while (targetUpto < targetLimit) {
cmp = (term.bytes[targetUpto]&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
//if (DEBUG) {
//System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output + " output=" + output);
//}
if (cmp != 0) {
break;
}
arc = arcs[1+targetUpto];
assert arc.label == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
// TOOD: we could save the outputs in local
// byte[][] instead of making new objs ever
// seek; but, often the FST doesn't have any
// shared bytes (but this could change if we
// reverse vLong byte order)
if (arc.output != NO_OUTPUT) {
output = fstOutputs.add(output, arc.output);
}
if (arc.isFinal()) {
lastFrame = stack[1+lastFrame.ord];
}
targetUpto++;
}
if (cmp == 0) {
final int targetUptoMid = targetUpto;
// Second compare the rest of the term, but
// don't save arc/output/frame:
final int targetLimit2 = Math.min(target.length, term.length);
while (targetUpto < targetLimit2) {
cmp = (term.bytes[targetUpto]&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
//if (DEBUG) {
//System.out.println(" cycle2 targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")");
//}
if (cmp != 0) {
break;
}
targetUpto++;
}
if (cmp == 0) {
cmp = term.length - target.length;
}
targetUpto = targetUptoMid;
}
if (cmp < 0) {
// Common case: target term is after current
// term, ie, app is seeking multiple terms
// in sorted order
//if (DEBUG) {
//System.out.println(" target is after current (shares prefixLen=" + targetUpto + "); clear frame.scanned ord=" + lastFrame.ord);
//}
currentFrame = lastFrame;
} else if (cmp > 0) {
// Uncommon case: target term
// is before current term; this means we can
// keep the currentFrame but we must rewind it
// (so we scan from the start)
targetBeforeCurrentLength = 0;
//if (DEBUG) {
//System.out.println(" target is before current (shares prefixLen=" + targetUpto + "); rewind frame ord=" + lastFrame.ord);
//}
currentFrame = lastFrame;
currentFrame.rewind();
} else {
// Target is exactly the same as current term
assert term.length == target.length;
if (termExists) {
//if (DEBUG) {
//System.out.println(" target is same as current; return FOUND");
//}
return SeekStatus.FOUND;
} else {
//if (DEBUG) {
//System.out.println(" target is same as current but term doesn't exist");
//}
}
}
} else {
targetBeforeCurrentLength = -1;
arc = index.getFirstArc(arcs[0]);
// Empty string prefix must have an output (block) in the index!
assert arc.isFinal();
assert arc.output != null;
//if (DEBUG) {
//System.out.println(" no seek state; push root frame");
//}
output = arc.output;
currentFrame = staticFrame;
//term.length = 0;
targetUpto = 0;
currentFrame = pushFrame(arc, fstOutputs.add(output, arc.nextFinalOutput), 0);
}
//if (DEBUG) {
//System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + " currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" + targetBeforeCurrentLength);
//}
while (targetUpto < target.length) {
final int targetLabel = target.bytes[target.offset + targetUpto] & 0xFF;
final FST.Arc<BytesRef> nextArc = index.findTargetArc(targetLabel, arc, getArc(1+targetUpto), fstReader);
if (nextArc == null) {
// Index is exhausted
// if (DEBUG) {
// System.out.println(" index: index exhausted label=" + ((char) targetLabel) + " " + toHex(targetLabel));
// }
validIndexPrefix = currentFrame.prefix;
//validIndexPrefix = targetUpto;
currentFrame.scanToFloorFrame(target);
currentFrame.loadBlock();
final SeekStatus result = currentFrame.scanToTerm(target, false);
if (result == SeekStatus.END) {
term.copyBytes(target);
termExists = false;
if (next() != null) {
//if (DEBUG) {
//System.out.println(" return NOT_FOUND term=" + brToString(term) + " " + term);
//}
return SeekStatus.NOT_FOUND;
} else {
//if (DEBUG) {
//System.out.println(" return END");
//}
return SeekStatus.END;
}
} else {
//if (DEBUG) {
//System.out.println(" return " + result + " term=" + brToString(term) + " " + term);
//}
return result;
}
} else {
// Follow this arc
term.bytes[targetUpto] = (byte) targetLabel;
arc = nextArc;
// Aggregate output as we go:
assert arc.output != null;
if (arc.output != NO_OUTPUT) {
output = fstOutputs.add(output, arc.output);
}
//if (DEBUG) {
//System.out.println(" index: follow label=" + toHex(target.bytes[target.offset + targetUpto]&0xff) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput);
//}
targetUpto++;
if (arc.isFinal()) {
//if (DEBUG) System.out.println(" arc is final!");
currentFrame = pushFrame(arc, fstOutputs.add(output, arc.nextFinalOutput), targetUpto);
//if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" + currentFrame.hasTerms);
}
}
}
//validIndexPrefix = targetUpto;
validIndexPrefix = currentFrame.prefix;
currentFrame.scanToFloorFrame(target);
currentFrame.loadBlock();
final SeekStatus result = currentFrame.scanToTerm(target, false);
if (result == SeekStatus.END) {
term.copyBytes(target);
termExists = false;
if (next() != null) {
//if (DEBUG) {
//System.out.println(" return NOT_FOUND term=" + term.utf8ToString() + " " + term);
//}
return SeekStatus.NOT_FOUND;
} else {
//if (DEBUG) {
//System.out.println(" return END");
//}
return SeekStatus.END;
}
} else {
return result;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
Override
public BytesRef next() throws IOException {
if (in == null) {
// Fresh TermsEnum; seek to first term:
final FST.Arc<BytesRef> arc;
if (index != null) {
arc = index.getFirstArc(arcs[0]);
// Empty string prefix must have an output in the index!
assert arc.isFinal();
} else {
arc = null;
}
currentFrame = pushFrame(arc, rootCode, 0);
currentFrame.loadBlock();
}
targetBeforeCurrentLength = currentFrame.ord;
assert !eof;
//if (DEBUG) {
//System.out.println("\nBTTR.next seg=" + segment + " term=" + brToString(term) + " termExists?=" + termExists + " field=" + fieldInfo.name + " termBlockOrd=" + currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix);
//printSeekState();
//}
if (currentFrame == staticFrame) {
// If seek was previously called and the term was
// cached, or seek(TermState) was called, usually
// caller is just going to pull a D/&PEnum or get
// docFreq, etc. But, if they then call next(),
// this method catches up all internal state so next()
// works properly:
//if (DEBUG) System.out.println(" re-seek to pending term=" + term.utf8ToString() + " " + term);
final boolean result = seekExact(term, false);
assert result;
}
// Pop finished blocks
while (currentFrame.nextEnt == currentFrame.entCount) {
if (!currentFrame.isLastInFloor) {
currentFrame.loadNextFloorBlock();
} else {
//if (DEBUG) System.out.println(" pop frame");
if (currentFrame.ord == 0) {
//if (DEBUG) System.out.println(" return null");
assert setEOF();
term.length = 0;
validIndexPrefix = 0;
currentFrame.rewind();
termExists = false;
return null;
}
final long lastFP = currentFrame.fpOrig;
currentFrame = stack[currentFrame.ord-1];
if (currentFrame.nextEnt == -1 || currentFrame.lastSubFP != lastFP) {
// We popped into a frame that's not loaded
// yet or not scan'd to the right entry
currentFrame.scanToFloorFrame(term);
currentFrame.loadBlock();
currentFrame.scanToSubBlock(lastFP);
}
// Note that the seek state (last seek) has been
// invalidated beyond this depth
validIndexPrefix = Math.min(validIndexPrefix, currentFrame.prefix);
//if (DEBUG) {
//System.out.println(" reset validIndexPrefix=" + validIndexPrefix);
//}
}
}
while(true) {
if (currentFrame.next()) {
// Push to new block:
//if (DEBUG) System.out.println(" push frame");
currentFrame = pushFrame(null, currentFrame.lastSubFP, term.length);
// This is a "next" frame -- even if it's
// floor'd we must pretend it isn't so we don't
// try to scan to the right floor frame:
currentFrame.isFloor = false;
//currentFrame.hasTerms = true;
currentFrame.loadBlock();
} else {
//if (DEBUG) System.out.println(" return term=" + term.utf8ToString() + " " + term + " currentFrame.ord=" + currentFrame.ord);
return term;
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
Override
public int docFreq() throws IOException {
assert !eof;
//if (DEBUG) System.out.println("BTR.docFreq");
currentFrame.decodeMetaData();
//if (DEBUG) System.out.println(" return " + currentFrame.state.docFreq);
return currentFrame.state.docFreq;
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
Override
public long totalTermFreq() throws IOException {
assert !eof;
currentFrame.decodeMetaData();
return currentFrame.state.totalTermFreq;
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
Override
public DocsEnum docs(Bits skipDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
assert !eof;
//if (DEBUG) {
//System.out.println("BTTR.docs seg=" + segment);
//}
currentFrame.decodeMetaData();
//if (DEBUG) {
//System.out.println(" state=" + currentFrame.state);
//}
return postingsReader.docs(fieldInfo, currentFrame.state, skipDocs, reuse, needsFreqs);
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
Override
public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
// Positions were not indexed:
return null;
}
if (needsOffsets &&
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) < 0) {
// Offsets were not indexed:
return null;
}
assert !eof;
currentFrame.decodeMetaData();
return postingsReader.docsAndPositions(fieldInfo, currentFrame.state, skipDocs, reuse, needsOffsets);
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
Override
public void seekExact(BytesRef target, TermState otherState) throws IOException {
// if (DEBUG) {
// System.out.println("BTTR.seekExact termState seg=" + segment + " target=" + target.utf8ToString() + " " + target + " state=" + otherState);
// }
assert clearEOF();
if (target.compareTo(term) != 0 || !termExists) {
assert otherState != null && otherState instanceof BlockTermState;
currentFrame = staticFrame;
currentFrame.state.copyFrom(otherState);
term.copyBytes(target);
currentFrame.metaDataUpto = currentFrame.getTermBlockOrd();
assert currentFrame.metaDataUpto > 0;
validIndexPrefix = 0;
} else {
// if (DEBUG) {
// System.out.println(" skip seek: already on target state=" + currentFrame.state);
// }
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
Override
public TermState termState() throws IOException {
assert !eof;
currentFrame.decodeMetaData();
TermState ts = currentFrame.state.clone();
//if (DEBUG) System.out.println("BTTR.termState seg=" + segment + " state=" + ts);
return ts;
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
Override
public void seekExact(long ord) throws IOException {
throw new UnsupportedOperationException();
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
void loadNextFloorBlock() throws IOException {
//if (DEBUG) {
//System.out.println(" loadNextFloorBlock fp=" + fp + " fpEnd=" + fpEnd);
//}
assert arc == null || isFloor: "arc=" + arc + " isFloor=" + isFloor;
fp = fpEnd;
nextEnt = -1;
loadBlock();
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
void loadBlock() throws IOException {
// Clone the IndexInput lazily, so that consumers
// that just pull a TermsEnum to
// seekExact(TermState) don't pay this cost:
initIndexInput();
if (nextEnt != -1) {
// Already loaded
return;
}
//System.out.println("blc=" + blockLoadCount);
in.seek(fp);
int code = in.readVInt();
entCount = code >>> 1;
assert entCount > 0;
isLastInFloor = (code & 1) != 0;
assert arc == null || (isLastInFloor || isFloor);
// TODO: if suffixes were stored in random-access
// array structure, then we could do binary search
// instead of linear scan to find target term; eg
// we could have simple array of offsets
// term suffixes:
code = in.readVInt();
isLeafBlock = (code & 1) != 0;
int numBytes = code >>> 1;
if (suffixBytes.length < numBytes) {
suffixBytes = new byte[ArrayUtil.oversize(numBytes, 1)];
}
in.readBytes(suffixBytes, 0, numBytes);
suffixesReader.reset(suffixBytes, 0, numBytes);
/*if (DEBUG) {
if (arc == null) {
System.out.println(" loadBlock (next) fp=" + fp + " entCount=" + entCount + " prefixLen=" + prefix + " isLastInFloor=" + isLastInFloor + " leaf?=" + isLeafBlock);
} else {
System.out.println(" loadBlock (seek) fp=" + fp + " entCount=" + entCount + " prefixLen=" + prefix + " hasTerms?=" + hasTerms + " isFloor?=" + isFloor + " isLastInFloor=" + isLastInFloor + " leaf?=" + isLeafBlock);
}
}*/
// stats
numBytes = in.readVInt();
if (statBytes.length < numBytes) {
statBytes = new byte[ArrayUtil.oversize(numBytes, 1)];
}
in.readBytes(statBytes, 0, numBytes);
statsReader.reset(statBytes, 0, numBytes);
metaDataUpto = 0;
state.termBlockOrd = 0;
nextEnt = 0;
lastSubFP = -1;
// TODO: we could skip this if !hasTerms; but
// that's rare so won't help much
postingsReader.readTermsBlock(in, fieldInfo, state);
// Sub-blocks of a single floor block are always
// written one after another -- tail recurse:
fpEnd = in.getFilePointer();
// if (DEBUG) {
// System.out.println(" fpEnd=" + fpEnd);
// }
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
void rewind() throws IOException {
// Force reload:
fp = fpOrig;
nextEnt = -1;
hasTerms = hasTermsOrig;
if (isFloor) {
floorDataReader.rewind();
numFollowFloorBlocks = floorDataReader.readVInt();
nextFloorLabel = floorDataReader.readByte() & 0xff;
}
/*
//System.out.println("rewind");
// Keeps the block loaded, but rewinds its state:
if (nextEnt > 0 || fp != fpOrig) {
if (DEBUG) {
System.out.println(" rewind frame ord=" + ord + " fpOrig=" + fpOrig + " fp=" + fp + " hasTerms?=" + hasTerms + " isFloor?=" + isFloor + " nextEnt=" + nextEnt + " prefixLen=" + prefix);
}
if (fp != fpOrig) {
fp = fpOrig;
nextEnt = -1;
} else {
nextEnt = 0;
}
hasTerms = hasTermsOrig;
if (isFloor) {
floorDataReader.rewind();
numFollowFloorBlocks = floorDataReader.readVInt();
nextFloorLabel = floorDataReader.readByte() & 0xff;
}
assert suffixBytes != null;
suffixesReader.rewind();
assert statBytes != null;
statsReader.rewind();
metaDataUpto = 0;
state.termBlockOrd = 0;
// TODO: skip this if !hasTerms? Then postings
// impl wouldn't have to write useless 0 byte
postingsReader.resetTermsBlock(fieldInfo, state);
lastSubFP = -1;
} else if (DEBUG) {
System.out.println(" skip rewind fp=" + fp + " fpOrig=" + fpOrig + " nextEnt=" + nextEnt + " ord=" + ord);
}
*/
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
public void decodeMetaData() throws IOException {
//if (DEBUG) System.out.println("\nBTTR.decodeMetadata seg=" + segment + " mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd);
// lazily catch up on metadata decode:
final int limit = getTermBlockOrd();
assert limit > 0;
// We must set/incr state.termCount because
// postings impl can look at this
state.termBlockOrd = metaDataUpto;
// TODO: better API would be "jump straight to term=N"???
while (metaDataUpto < limit) {
// TODO: we could make "tiers" of metadata, ie,
// decode docFreq/totalTF but don't decode postings
// metadata; this way caller could get
// docFreq/totalTF w/o paying decode cost for
// postings
// TODO: if docFreq were bulk decoded we could
// just skipN here:
state.docFreq = statsReader.readVInt();
//if (DEBUG) System.out.println(" dF=" + state.docFreq);
if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
state.totalTermFreq = state.docFreq + statsReader.readVLong();
//if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq);
}
postingsReader.nextTerm(fieldInfo, state);
metaDataUpto++;
state.termBlockOrd++;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
public SeekStatus scanToTerm(BytesRef target, boolean exactOnly) throws IOException {
return isLeafBlock ? scanToTermLeaf(target, exactOnly) : scanToTermNonLeaf(target, exactOnly);
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException {
// if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" + brToString(term));
assert nextEnt != -1;
termExists = true;
subCode = 0;
if (nextEnt == entCount) {
if (exactOnly) {
fillTerm();
}
return SeekStatus.END;
}
assert prefixMatches(target);
// Loop over each entry (term or sub-block) in this block:
//nextTerm: while(nextEnt < entCount) {
nextTerm: while (true) {
nextEnt++;
suffix = suffixesReader.readVInt();
// if (DEBUG) {
// BytesRef suffixBytesRef = new BytesRef();
// suffixBytesRef.bytes = suffixBytes;
// suffixBytesRef.offset = suffixesReader.getPosition();
// suffixBytesRef.length = suffix;
// System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef));
// }
final int termLen = prefix + suffix;
startBytePos = suffixesReader.getPosition();
suffixesReader.skipBytes(suffix);
final int targetLimit = target.offset + (target.length < termLen ? target.length : termLen);
int targetPos = target.offset + prefix;
// Loop over bytes in the suffix, comparing to
// the target
int bytePos = startBytePos;
while(true) {
final int cmp;
final boolean stop;
if (targetPos < targetLimit) {
cmp = (suffixBytes[bytePos++]&0xFF) - (target.bytes[targetPos++]&0xFF);
stop = false;
} else {
assert targetPos == targetLimit;
cmp = termLen - target.length;
stop = true;
}
if (cmp < 0) {
// Current entry is still before the target;
// keep scanning
if (nextEnt == entCount) {
if (exactOnly) {
fillTerm();
}
// We are done scanning this block
break nextTerm;
} else {
continue nextTerm;
}
} else if (cmp > 0) {
// Done! Current entry is after target --
// return NOT_FOUND:
fillTerm();
if (!exactOnly && !termExists) {
// We are on a sub-block, and caller wants
// us to position to the next term after
// the target, so we must recurse into the
// sub-frame(s):
currentFrame = pushFrame(null, currentFrame.lastSubFP, termLen);
currentFrame.loadBlock();
while (currentFrame.next()) {
currentFrame = pushFrame(null, currentFrame.lastSubFP, term.length);
currentFrame.loadBlock();
}
}
//if (DEBUG) System.out.println(" not found");
return SeekStatus.NOT_FOUND;
} else if (stop) {
// Exact match!
// This cannot be a sub-block because we
// would have followed the index to this
// sub-block from the start:
assert termExists;
fillTerm();
//if (DEBUG) System.out.println(" found!");
return SeekStatus.FOUND;
}
}
}
// It is possible (and OK) that terms index pointed us
// at this block, but, we scanned the entire block and
// did not find the term to position to. This happens
// when the target is after the last term in the block
// (but, before the next term in the index). EG
// target could be foozzz, and terms index pointed us
// to the foo* block, but the last term in this block
// was fooz (and, eg, first term in the next block will
// bee fop).
//if (DEBUG) System.out.println(" block end");
if (exactOnly) {
fillTerm();
}
// TODO: not consistent that in the
// not-exact case we don't next() into the next
// frame here
return SeekStatus.END;
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
public SeekStatus scanToTermNonLeaf(BytesRef target, boolean exactOnly) throws IOException {
//if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix + " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" + brToString(term));
assert nextEnt != -1;
if (nextEnt == entCount) {
if (exactOnly) {
fillTerm();
termExists = subCode == 0;
}
return SeekStatus.END;
}
assert prefixMatches(target);
// Loop over each entry (term or sub-block) in this block:
//nextTerm: while(nextEnt < entCount) {
nextTerm: while (true) {
nextEnt++;
final int code = suffixesReader.readVInt();
suffix = code >>> 1;
// if (DEBUG) {
// BytesRef suffixBytesRef = new BytesRef();
// suffixBytesRef.bytes = suffixBytes;
// suffixBytesRef.offset = suffixesReader.getPosition();
// suffixBytesRef.length = suffix;
// System.out.println(" cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " + (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef));
// }
termExists = (code & 1) == 0;
final int termLen = prefix + suffix;
startBytePos = suffixesReader.getPosition();
suffixesReader.skipBytes(suffix);
if (termExists) {
state.termBlockOrd++;
subCode = 0;
} else {
subCode = suffixesReader.readVLong();
lastSubFP = fp - subCode;
}
final int targetLimit = target.offset + (target.length < termLen ? target.length : termLen);
int targetPos = target.offset + prefix;
// Loop over bytes in the suffix, comparing to
// the target
int bytePos = startBytePos;
while(true) {
final int cmp;
final boolean stop;
if (targetPos < targetLimit) {
cmp = (suffixBytes[bytePos++]&0xFF) - (target.bytes[targetPos++]&0xFF);
stop = false;
} else {
assert targetPos == targetLimit;
cmp = termLen - target.length;
stop = true;
}
if (cmp < 0) {
// Current entry is still before the target;
// keep scanning
if (nextEnt == entCount) {
if (exactOnly) {
fillTerm();
//termExists = true;
}
// We are done scanning this block
break nextTerm;
} else {
continue nextTerm;
}
} else if (cmp > 0) {
// Done! Current entry is after target --
// return NOT_FOUND:
fillTerm();
if (!exactOnly && !termExists) {
// We are on a sub-block, and caller wants
// us to position to the next term after
// the target, so we must recurse into the
// sub-frame(s):
currentFrame = pushFrame(null, currentFrame.lastSubFP, termLen);
currentFrame.loadBlock();
while (currentFrame.next()) {
currentFrame = pushFrame(null, currentFrame.lastSubFP, term.length);
currentFrame.loadBlock();
}
}
//if (DEBUG) System.out.println(" not found");
return SeekStatus.NOT_FOUND;
} else if (stop) {
// Exact match!
// This cannot be a sub-block because we
// would have followed the index to this
// sub-block from the start:
assert termExists;
fillTerm();
//if (DEBUG) System.out.println(" found!");
return SeekStatus.FOUND;
}
}
}
// It is possible (and OK) that terms index pointed us
// at this block, but, we scanned the entire block and
// did not find the term to position to. This happens
// when the target is after the last term in the block
// (but, before the next term in the index). EG
// target could be foozzz, and terms index pointed us
// to the foo* block, but the last term in this block
// was fooz (and, eg, first term in the next block will
// bee fop).
//if (DEBUG) System.out.println(" block end");
if (exactOnly) {
fillTerm();
}
// TODO: not consistent that in the
// not-exact case we don't next() into the next
// frame here
return SeekStatus.END;
}
// in lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java
public void addProx(int numProx, DataInput positions, DataInput offsets) throws IOException {
int position = 0;
int lastOffset = 0;
for (int i = 0; i < numProx; i++) {
final int startOffset;
final int endOffset;
if (positions == null) {
position = -1;
} else {
position += positions.readVInt();
}
if (offsets == null) {
startOffset = endOffset = -1;
} else {
startOffset = lastOffset + offsets.readVInt();
endOffset = startOffset + offsets.readVInt();
lastOffset = endOffset;
}
addPosition(position, startOffset, endOffset);
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java
public int merge(MergeState mergeState) throws IOException {
int docCount = 0;
for (MergeState.IndexReaderAndLiveDocs reader : mergeState.readers) {
final int maxDoc = reader.reader.maxDoc();
final Bits liveDocs = reader.liveDocs;
for (int docID = 0; docID < maxDoc; docID++) {
if (liveDocs != null && !liveDocs.get(docID)) {
// skip deleted docs
continue;
}
// NOTE: it's very important to first assign to vectors then pass it to
// termVectorsWriter.addAllDocVectors; see LUCENE-1282
Fields vectors = reader.reader.getTermVectors(docID);
addAllDocVectors(vectors, mergeState.fieldInfos);
docCount++;
mergeState.checkAbort.work(300);
}
}
finish(mergeState.fieldInfos, docCount);
return docCount;
}
// in lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java
protected final void addAllDocVectors(Fields vectors, FieldInfos fieldInfos) throws IOException {
if (vectors == null) {
startDocument(0);
return;
}
final int numFields = vectors.size();
if (numFields == -1) {
throw new IllegalStateException("vectors.size() must be implemented (it returned -1)");
}
startDocument(numFields);
final FieldsEnum fieldsEnum = vectors.iterator();
String fieldName;
String lastFieldName = null;
while((fieldName = fieldsEnum.next()) != null) {
final FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldName);
assert lastFieldName == null || fieldName.compareTo(lastFieldName) > 0: "lastFieldName=" + lastFieldName + " fieldName=" + fieldName;
lastFieldName = fieldName;
final Terms terms = fieldsEnum.terms();
if (terms == null) {
// FieldsEnum shouldn't lie...
continue;
}
final int numTerms = (int) terms.size();
if (numTerms == -1) {
throw new IllegalStateException("terms.size() must be implemented (it returned -1)");
}
final TermsEnum termsEnum = terms.iterator(null);
DocsAndPositionsEnum docsAndPositionsEnum = null;
boolean startedField = false;
// NOTE: this is tricky, because TermVectors allow
// indexing offsets but NOT positions. So we must
// lazily init the field by checking whether first
// position we see is -1 or not.
int termCount = 0;
while(termsEnum.next() != null) {
termCount++;
final int freq = (int) termsEnum.totalTermFreq();
if (startedField) {
startTerm(termsEnum.term(), freq);
}
// TODO: we need a "query" API where we can ask (via
// flex API) what this term was indexed with...
// Both positions & offsets:
docsAndPositionsEnum = termsEnum.docsAndPositions(null, null, true);
final boolean hasOffsets;
boolean hasPositions = false;
if (docsAndPositionsEnum == null) {
// Fallback: no offsets
docsAndPositionsEnum = termsEnum.docsAndPositions(null, null, false);
hasOffsets = false;
} else {
hasOffsets = true;
}
if (docsAndPositionsEnum != null) {
final int docID = docsAndPositionsEnum.nextDoc();
assert docID != DocIdSetIterator.NO_MORE_DOCS;
assert docsAndPositionsEnum.freq() == freq;
for(int posUpto=0; posUpto<freq; posUpto++) {
final int pos = docsAndPositionsEnum.nextPosition();
if (!startedField) {
assert numTerms > 0;
hasPositions = pos != -1;
startField(fieldInfo, numTerms, hasPositions, hasOffsets);
startTerm(termsEnum.term(), freq);
startedField = true;
}
final int startOffset;
final int endOffset;
if (hasOffsets) {
startOffset = docsAndPositionsEnum.startOffset();
endOffset = docsAndPositionsEnum.endOffset();
assert startOffset != -1;
assert endOffset != -1;
} else {
startOffset = -1;
endOffset = -1;
}
assert !hasPositions || pos >= 0;
addPosition(pos, startOffset, endOffset);
}
} else {
if (!startedField) {
assert numTerms > 0;
startField(fieldInfo, numTerms, hasPositions, hasOffsets);
startTerm(termsEnum.term(), freq);
startedField = true;
}
}
}
assert termCount == numTerms;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/TermsConsumer.java
public void merge(MergeState mergeState, TermsEnum termsEnum) throws IOException {
BytesRef term;
assert termsEnum != null;
long sumTotalTermFreq = 0;
long sumDocFreq = 0;
long sumDFsinceLastAbortCheck = 0;
FixedBitSet visitedDocs = new FixedBitSet(mergeState.segmentInfo.getDocCount());
IndexOptions indexOptions = mergeState.fieldInfo.getIndexOptions();
if (indexOptions == IndexOptions.DOCS_ONLY) {
if (docsEnum == null) {
docsEnum = new MappingMultiDocsEnum();
}
docsEnum.setMergeState(mergeState);
MultiDocsEnum docsEnumIn = null;
while((term = termsEnum.next()) != null) {
// We can pass null for liveDocs, because the
// mapping enum will skip the non-live docs:
docsEnumIn = (MultiDocsEnum) termsEnum.docs(null, docsEnumIn, false);
if (docsEnumIn != null) {
docsEnum.reset(docsEnumIn);
final PostingsConsumer postingsConsumer = startTerm(term);
final TermStats stats = postingsConsumer.merge(mergeState, docsEnum, visitedDocs);
if (stats.docFreq > 0) {
finishTerm(term, stats);
sumTotalTermFreq += stats.docFreq;
sumDFsinceLastAbortCheck += stats.docFreq;
sumDocFreq += stats.docFreq;
if (sumDFsinceLastAbortCheck > 60000) {
mergeState.checkAbort.work(sumDFsinceLastAbortCheck/5.0);
sumDFsinceLastAbortCheck = 0;
}
}
}
}
} else if (indexOptions == IndexOptions.DOCS_AND_FREQS) {
if (docsAndFreqsEnum == null) {
docsAndFreqsEnum = new MappingMultiDocsEnum();
}
docsAndFreqsEnum.setMergeState(mergeState);
MultiDocsEnum docsAndFreqsEnumIn = null;
while((term = termsEnum.next()) != null) {
// We can pass null for liveDocs, because the
// mapping enum will skip the non-live docs:
docsAndFreqsEnumIn = (MultiDocsEnum) termsEnum.docs(null, docsAndFreqsEnumIn, true);
assert docsAndFreqsEnumIn != null;
docsAndFreqsEnum.reset(docsAndFreqsEnumIn);
final PostingsConsumer postingsConsumer = startTerm(term);
final TermStats stats = postingsConsumer.merge(mergeState, docsAndFreqsEnum, visitedDocs);
if (stats.docFreq > 0) {
finishTerm(term, stats);
sumTotalTermFreq += stats.totalTermFreq;
sumDFsinceLastAbortCheck += stats.docFreq;
sumDocFreq += stats.docFreq;
if (sumDFsinceLastAbortCheck > 60000) {
mergeState.checkAbort.work(sumDFsinceLastAbortCheck/5.0);
sumDFsinceLastAbortCheck = 0;
}
}
}
} else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
if (postingsEnum == null) {
postingsEnum = new MappingMultiDocsAndPositionsEnum();
}
postingsEnum.setMergeState(mergeState);
MultiDocsAndPositionsEnum postingsEnumIn = null;
while((term = termsEnum.next()) != null) {
// We can pass null for liveDocs, because the
// mapping enum will skip the non-live docs:
postingsEnumIn = (MultiDocsAndPositionsEnum) termsEnum.docsAndPositions(null, postingsEnumIn, false);
assert postingsEnumIn != null;
postingsEnum.reset(postingsEnumIn);
// set PayloadProcessor
if (mergeState.payloadProcessorProvider != null) {
for (int i = 0; i < mergeState.readers.size(); i++) {
if (mergeState.readerPayloadProcessor[i] != null) {
mergeState.currentPayloadProcessor[i] = mergeState.readerPayloadProcessor[i].getProcessor(mergeState.fieldInfo.name, term);
}
}
}
final PostingsConsumer postingsConsumer = startTerm(term);
final TermStats stats = postingsConsumer.merge(mergeState, postingsEnum, visitedDocs);
if (stats.docFreq > 0) {
finishTerm(term, stats);
sumTotalTermFreq += stats.totalTermFreq;
sumDFsinceLastAbortCheck += stats.docFreq;
sumDocFreq += stats.docFreq;
if (sumDFsinceLastAbortCheck > 60000) {
mergeState.checkAbort.work(sumDFsinceLastAbortCheck/5.0);
sumDFsinceLastAbortCheck = 0;
}
}
}
} else {
assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
if (postingsEnum == null) {
postingsEnum = new MappingMultiDocsAndPositionsEnum();
}
postingsEnum.setMergeState(mergeState);
MultiDocsAndPositionsEnum postingsEnumIn = null;
while((term = termsEnum.next()) != null) {
// We can pass null for liveDocs, because the
// mapping enum will skip the non-live docs:
postingsEnumIn = (MultiDocsAndPositionsEnum) termsEnum.docsAndPositions(null, postingsEnumIn, true);
assert postingsEnumIn != null;
postingsEnum.reset(postingsEnumIn);
// set PayloadProcessor
if (mergeState.payloadProcessorProvider != null) {
for (int i = 0; i < mergeState.readers.size(); i++) {
if (mergeState.readerPayloadProcessor[i] != null) {
mergeState.currentPayloadProcessor[i] = mergeState.readerPayloadProcessor[i].getProcessor(mergeState.fieldInfo.name, term);
}
}
}
final PostingsConsumer postingsConsumer = startTerm(term);
final TermStats stats = postingsConsumer.merge(mergeState, postingsEnum, visitedDocs);
if (stats.docFreq > 0) {
finishTerm(term, stats);
sumTotalTermFreq += stats.totalTermFreq;
sumDFsinceLastAbortCheck += stats.docFreq;
sumDocFreq += stats.docFreq;
if (sumDFsinceLastAbortCheck > 60000) {
mergeState.checkAbort.work(sumDFsinceLastAbortCheck/5.0);
sumDFsinceLastAbortCheck = 0;
}
}
}
}
finish(sumTotalTermFreq, sumDocFreq, visitedDocs.cardinality());
}
// in lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state)
throws IOException {
return new FieldsWriter(state);
}
// in lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
Override
public void close() throws IOException {
consumer.close();
}
// in lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
Override
public TermsConsumer addField(FieldInfo field) throws IOException {
final PostingsFormat format = getPostingsFormatForField(field.name);
if (format == null) {
throw new IllegalStateException("invalid null PostingsFormat for field=\"" + field.name + "\"");
}
final String formatName = format.getName();
String previousValue = field.putAttribute(PER_FIELD_FORMAT_KEY, formatName);
assert previousValue == null;
Integer suffix;
FieldsConsumerAndSuffix consumer = formats.get(format);
if (consumer == null) {
// First time we are seeing this format; create a new instance
// bump the suffix
suffix = suffixes.get(formatName);
if (suffix == null) {
suffix = 0;
} else {
suffix = suffix + 1;
}
suffixes.put(formatName, suffix);
final String segmentSuffix = getFullSegmentSuffix(field.name,
segmentWriteState.segmentSuffix,
getSuffix(formatName, Integer.toString(suffix)));
consumer = new FieldsConsumerAndSuffix();
consumer.consumer = format.fieldsConsumer(new SegmentWriteState(segmentWriteState, segmentSuffix));
consumer.suffix = suffix;
formats.put(format, consumer);
} else {
// we've already seen this format, so just grab its suffix
assert suffixes.containsKey(formatName);
suffix = consumer.suffix;
}
previousValue = field.putAttribute(PER_FIELD_SUFFIX_KEY, Integer.toString(suffix));
assert previousValue == null;
// TODO: we should only provide the "slice" of FIS
// that this PF actually sees ... then stuff like
// .hasProx could work correctly?
// NOTE: .hasProx is already broken in the same way for the non-perfield case,
// if there is a fieldinfo with prox that has no postings, you get a 0 byte file.
return consumer.consumer.addField(field);
}
// in lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
Override
public void close() throws IOException {
// Close all subs
IOUtils.close(formats.values());
}
// in lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
Override
public String next() throws IOException {
if (it.hasNext()) {
current = it.next();
} else {
current = null;
}
return current;
}
// in lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
Override
public Terms terms() throws IOException {
return fields.get(current).terms(current);
}
// in lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
Override
public FieldsEnum iterator() throws IOException {
return new FieldsIterator();
}
// in lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
Override
public Terms terms(String field) throws IOException {
FieldsProducer fieldsProducer = fields.get(field);
return fieldsProducer == null ? null : fieldsProducer.terms(field);
}
// in lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
Override
public void close() throws IOException {
IOUtils.close(formats.values());
}
// in lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
Override
public FieldsProducer fieldsProducer(SegmentReadState state)
throws IOException {
return new FieldsReader(state);
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java
protected void readHeader(IndexInput input) throws IOException {
CodecUtil.checkHeader(input, BlockTermsWriter.CODEC_NAME,
BlockTermsWriter.VERSION_START,
BlockTermsWriter.VERSION_CURRENT);
dirOffset = input.readLong();
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java
protected void seekDir(IndexInput input, long dirOffset)
throws IOException {
input.seek(dirOffset);
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java
Override
public void close() throws IOException {
try {
try {
if (indexReader != null) {
indexReader.close();
}
} finally {
// null so if an app hangs on to us (ie, we are not
// GCable, despite being closed) we still free most
// ram
indexReader = null;
if (in != null) {
in.close();
}
}
} finally {
if (postingsReader != null) {
postingsReader.close();
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java
Override
public Terms terms(String field) throws IOException {
assert field != null;
return fields.get(field);
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java
Override
public Terms terms() throws IOException {
return current;
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java
Override
public TermsEnum iterator(TermsEnum reuse) throws IOException {
return new SegmentTermsEnum();
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java
Override
public long getSumDocFreq() throws IOException {
return sumDocFreq;
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java
Override
public int getDocCount() throws IOException {
return docCount;
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java
Override
public SeekStatus seekCeil(final BytesRef target, final boolean useCache) throws IOException {
if (indexEnum == null) {
throw new IllegalStateException("terms index was not loaded");
}
//System.out.println("BTR.seek seg=" + segment + " target=" + fieldInfo.name + ":" + target.utf8ToString() + " " + target + " current=" + term().utf8ToString() + " " + term() + " useCache=" + useCache + " indexIsCurrent=" + indexIsCurrent + " didIndexNext=" + didIndexNext + " seekPending=" + seekPending + " divisor=" + indexReader.getDivisor() + " this=" + this);
if (didIndexNext) {
if (nextIndexTerm == null) {
//System.out.println(" nextIndexTerm=null");
} else {
//System.out.println(" nextIndexTerm=" + nextIndexTerm.utf8ToString());
}
}
// Check cache
if (useCache) {
fieldTerm.term = target;
// TODO: should we differentiate "frozen"
// TermState (ie one that was cloned and
// cached/returned by termState()) from the
// malleable (primary) one?
final TermState cachedState = termsCache.get(fieldTerm);
if (cachedState != null) {
seekPending = true;
//System.out.println(" cached!");
seekExact(target, cachedState);
//System.out.println(" term=" + term.utf8ToString());
return SeekStatus.FOUND;
}
}
boolean doSeek = true;
// See if we can avoid seeking, because target term
// is after current term but before next index term:
if (indexIsCurrent) {
final int cmp = BytesRef.getUTF8SortedAsUnicodeComparator().compare(term, target);
if (cmp == 0) {
// Already at the requested term
return SeekStatus.FOUND;
} else if (cmp < 0) {
// Target term is after current term
if (!didIndexNext) {
if (indexEnum.next() == -1) {
nextIndexTerm = null;
} else {
nextIndexTerm = indexEnum.term();
}
//System.out.println(" now do index next() nextIndexTerm=" + (nextIndexTerm == null ? "null" : nextIndexTerm.utf8ToString()));
didIndexNext = true;
}
if (nextIndexTerm == null || BytesRef.getUTF8SortedAsUnicodeComparator().compare(target, nextIndexTerm) < 0) {
// Optimization: requested term is within the
// same term block we are now in; skip seeking
// (but do scanning):
doSeek = false;
//System.out.println(" skip seek: nextIndexTerm=" + (nextIndexTerm == null ? "null" : nextIndexTerm.utf8ToString()));
}
}
}
if (doSeek) {
//System.out.println(" seek");
// Ask terms index to find biggest indexed term (=
// first term in a block) that's <= our text:
in.seek(indexEnum.seek(target));
boolean result = nextBlock();
// Block must exist since, at least, the indexed term
// is in the block:
assert result;
indexIsCurrent = true;
didIndexNext = false;
blocksSinceSeek = 0;
if (doOrd) {
state.ord = indexEnum.ord()-1;
}
term.copyBytes(indexEnum.term());
//System.out.println(" seek: term=" + term.utf8ToString());
} else {
//System.out.println(" skip seek");
if (state.termBlockOrd == blockTermCount && !nextBlock()) {
indexIsCurrent = false;
return SeekStatus.END;
}
}
seekPending = false;
int common = 0;
// Scan within block. We could do this by calling
// _next() and testing the resulting term, but this
// is wasteful. Instead, we first confirm the
// target matches the common prefix of this block,
// and then we scan the term bytes directly from the
// termSuffixesreader's byte[], saving a copy into
// the BytesRef term per term. Only when we return
// do we then copy the bytes into the term.
while(true) {
// First, see if target term matches common prefix
// in this block:
if (common < termBlockPrefix) {
final int cmp = (term.bytes[common]&0xFF) - (target.bytes[target.offset + common]&0xFF);
if (cmp < 0) {
// TODO: maybe we should store common prefix
// in block header? (instead of relying on
// last term of previous block)
// Target's prefix is after the common block
// prefix, so term cannot be in this block
// but it could be in next block. We
// must scan to end-of-block to set common
// prefix for next block:
if (state.termBlockOrd < blockTermCount) {
while(state.termBlockOrd < blockTermCount-1) {
state.termBlockOrd++;
state.ord++;
termSuffixesReader.skipBytes(termSuffixesReader.readVInt());
}
final int suffix = termSuffixesReader.readVInt();
term.length = termBlockPrefix + suffix;
if (term.bytes.length < term.length) {
term.grow(term.length);
}
termSuffixesReader.readBytes(term.bytes, termBlockPrefix, suffix);
}
state.ord++;
if (!nextBlock()) {
indexIsCurrent = false;
return SeekStatus.END;
}
common = 0;
} else if (cmp > 0) {
// Target's prefix is before the common prefix
// of this block, so we position to start of
// block and return NOT_FOUND:
assert state.termBlockOrd == 0;
final int suffix = termSuffixesReader.readVInt();
term.length = termBlockPrefix + suffix;
if (term.bytes.length < term.length) {
term.grow(term.length);
}
termSuffixesReader.readBytes(term.bytes, termBlockPrefix, suffix);
return SeekStatus.NOT_FOUND;
} else {
common++;
}
continue;
}
// Test every term in this block
while (true) {
state.termBlockOrd++;
state.ord++;
final int suffix = termSuffixesReader.readVInt();
// We know the prefix matches, so just compare the new suffix:
final int termLen = termBlockPrefix + suffix;
int bytePos = termSuffixesReader.getPosition();
boolean next = false;
final int limit = target.offset + (termLen < target.length ? termLen : target.length);
int targetPos = target.offset + termBlockPrefix;
while(targetPos < limit) {
final int cmp = (termSuffixes[bytePos++]&0xFF) - (target.bytes[targetPos++]&0xFF);
if (cmp < 0) {
// Current term is still before the target;
// keep scanning
next = true;
break;
} else if (cmp > 0) {
// Done! Current term is after target. Stop
// here, fill in real term, return NOT_FOUND.
term.length = termBlockPrefix + suffix;
if (term.bytes.length < term.length) {
term.grow(term.length);
}
termSuffixesReader.readBytes(term.bytes, termBlockPrefix, suffix);
//System.out.println(" NOT_FOUND");
return SeekStatus.NOT_FOUND;
}
}
if (!next && target.length <= termLen) {
term.length = termBlockPrefix + suffix;
if (term.bytes.length < term.length) {
term.grow(term.length);
}
termSuffixesReader.readBytes(term.bytes, termBlockPrefix, suffix);
if (target.length == termLen) {
// Done! Exact match. Stop here, fill in
// real term, return FOUND.
//System.out.println(" FOUND");
if (useCache) {
// Store in cache
decodeMetaData();
//System.out.println(" cache! state=" + state);
termsCache.put(new FieldAndTerm(fieldTerm), (BlockTermState) state.clone());
}
return SeekStatus.FOUND;
} else {
//System.out.println(" NOT_FOUND");
return SeekStatus.NOT_FOUND;
}
}
if (state.termBlockOrd == blockTermCount) {
// Must pre-fill term for next block's common prefix
term.length = termBlockPrefix + suffix;
if (term.bytes.length < term.length) {
term.grow(term.length);
}
termSuffixesReader.readBytes(term.bytes, termBlockPrefix, suffix);
break;
} else {
termSuffixesReader.skipBytes(suffix);
}
}
// The purpose of the terms dict index is to seek
// the enum to the closest index term before the
// term we are looking for. So, we should never
// cross another index term (besides the first
// one) while we are scanning:
assert indexIsCurrent;
if (!nextBlock()) {
//System.out.println(" END");
indexIsCurrent = false;
return SeekStatus.END;
}
common = 0;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java
Override
public BytesRef next() throws IOException {
//System.out.println("BTR.next() seekPending=" + seekPending + " pendingSeekCount=" + state.termBlockOrd);
// If seek was previously called and the term was cached,
// usually caller is just going to pull a D/&PEnum or get
// docFreq, etc. But, if they then call next(),
// this method catches up all internal state so next()
// works properly:
if (seekPending) {
assert !indexIsCurrent;
in.seek(state.blockFilePointer);
final int pendingSeekCount = state.termBlockOrd;
boolean result = nextBlock();
final long savOrd = state.ord;
// Block must exist since seek(TermState) was called w/ a
// TermState previously returned by this enum when positioned
// on a real term:
assert result;
while(state.termBlockOrd < pendingSeekCount) {
BytesRef nextResult = _next();
assert nextResult != null;
}
seekPending = false;
state.ord = savOrd;
}
return _next();
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java
private BytesRef _next() throws IOException {
//System.out.println("BTR._next seg=" + segment + " this=" + this + " termCount=" + state.termBlockOrd + " (vs " + blockTermCount + ")");
if (state.termBlockOrd == blockTermCount && !nextBlock()) {
//System.out.println(" eof");
indexIsCurrent = false;
return null;
}
// TODO: cutover to something better for these ints! simple64?
final int suffix = termSuffixesReader.readVInt();
//System.out.println(" suffix=" + suffix);
term.length = termBlockPrefix + suffix;
if (term.bytes.length < term.length) {
term.grow(term.length);
}
termSuffixesReader.readBytes(term.bytes, termBlockPrefix, suffix);
state.termBlockOrd++;
// NOTE: meaningless in the non-ord case
state.ord++;
//System.out.println(" return term=" + fieldInfo.name + ":" + term.utf8ToString() + " " + term + " tbOrd=" + state.termBlockOrd);
return term;
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java
Override
public int docFreq() throws IOException {
//System.out.println("BTR.docFreq");
decodeMetaData();
//System.out.println(" return " + state.docFreq);
return state.docFreq;
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java
Override
public long totalTermFreq() throws IOException {
decodeMetaData();
return state.totalTermFreq;
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java
Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
//System.out.println("BTR.docs this=" + this);
decodeMetaData();
//System.out.println("BTR.docs: state.docFreq=" + state.docFreq);
return postingsReader.docs(fieldInfo, state, liveDocs, reuse, needsFreqs);
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java
Override
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException {
if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
// Positions were not indexed:
return null;
}
if (needsOffsets &&
fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) < 0) {
// Offsets were not indexed:
return null;
}
decodeMetaData();
return postingsReader.docsAndPositions(fieldInfo, state, liveDocs, reuse, needsOffsets);
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java
Override
public void seekExact(BytesRef target, TermState otherState) throws IOException {
//System.out.println("BTR.seekExact termState target=" + target.utf8ToString() + " " + target + " this=" + this);
assert otherState != null && otherState instanceof BlockTermState;
assert !doOrd || ((BlockTermState) otherState).ord < numTerms;
state.copyFrom(otherState);
seekPending = true;
indexIsCurrent = false;
term.copyBytes(target);
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java
Override
public TermState termState() throws IOException {
//System.out.println("BTR.termState this=" + this);
decodeMetaData();
TermState ts = state.clone();
//System.out.println(" return ts=" + ts);
return ts;
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java
Override
public void seekExact(long ord) throws IOException {
//System.out.println("BTR.seek by ord ord=" + ord);
if (indexEnum == null) {
throw new IllegalStateException("terms index was not loaded");
}
assert ord < numTerms;
// TODO: if ord is in same terms block and
// after current ord, we should avoid this seek just
// like we do in the seek(BytesRef) case
in.seek(indexEnum.seek(ord));
boolean result = nextBlock();
// Block must exist since ord < numTerms:
assert result;
indexIsCurrent = true;
didIndexNext = false;
blocksSinceSeek = 0;
seekPending = false;
state.ord = indexEnum.ord()-1;
assert state.ord >= -1: "ord=" + state.ord;
term.copyBytes(indexEnum.term());
// Now, scan:
int left = (int) (ord - state.ord);
while(left > 0) {
final BytesRef term = _next();
assert term != null;
left--;
assert indexIsCurrent;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java
private boolean nextBlock() throws IOException {
// TODO: we still lazy-decode the byte[] for each
// term (the suffix), but, if we decoded
// all N terms up front then seeking could do a fast
// bsearch w/in the block...
//System.out.println("BTR.nextBlock() fp=" + in.getFilePointer() + " this=" + this);
state.blockFilePointer = in.getFilePointer();
blockTermCount = in.readVInt();
//System.out.println(" blockTermCount=" + blockTermCount);
if (blockTermCount == 0) {
return false;
}
termBlockPrefix = in.readVInt();
// term suffixes:
int len = in.readVInt();
if (termSuffixes.length < len) {
termSuffixes = new byte[ArrayUtil.oversize(len, 1)];
}
//System.out.println(" termSuffixes len=" + len);
in.readBytes(termSuffixes, 0, len);
termSuffixesReader.reset(termSuffixes, 0, len);
// docFreq, totalTermFreq
len = in.readVInt();
if (docFreqBytes.length < len) {
docFreqBytes = new byte[ArrayUtil.oversize(len, 1)];
}
//System.out.println(" freq bytes len=" + len);
in.readBytes(docFreqBytes, 0, len);
freqReader.reset(docFreqBytes, 0, len);
metaDataUpto = 0;
state.termBlockOrd = 0;
postingsReader.readTermsBlock(in, fieldInfo, state);
blocksSinceSeek++;
indexIsCurrent = indexIsCurrent && (blocksSinceSeek < indexReader.getDivisor());
//System.out.println(" indexIsCurrent=" + indexIsCurrent);
return true;
}
// in lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java
private void decodeMetaData() throws IOException {
//System.out.println("BTR.decodeMetadata mdUpto=" + metaDataUpto + " vs termCount=" + state.termBlockOrd + " state=" + state);
if (!seekPending) {
// TODO: cutover to random-access API
// here.... really stupid that we have to decode N
// wasted term metadata just to get to the N+1th
// that we really need...
// lazily catch up on metadata decode:
final int limit = state.termBlockOrd;
// We must set/incr state.termCount because
// postings impl can look at this
state.termBlockOrd = metaDataUpto;
// TODO: better API would be "jump straight to term=N"???
while (metaDataUpto < limit) {
//System.out.println(" decode mdUpto=" + metaDataUpto);
// TODO: we could make "tiers" of metadata, ie,
// decode docFreq/totalTF but don't decode postings
// metadata; this way caller could get
// docFreq/totalTF w/o paying decode cost for
// postings
// TODO: if docFreq were bulk decoded we could
// just skipN here:
state.docFreq = freqReader.readVInt();
//System.out.println(" dF=" + state.docFreq);
if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
state.totalTermFreq = state.docFreq + freqReader.readVLong();
//System.out.println(" totTF=" + state.totalTermFreq);
}
postingsReader.nextTerm(fieldInfo, state);
metaDataUpto++;
state.termBlockOrd++;
}
} else {
//System.out.println(" skip! seekPending");
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsEnum.java
MappingMultiDocsEnum reset(MultiDocsEnum docsEnum) throws IOException {
this.numSubs = docsEnum.getNumSubs();
this.subs = docsEnum.getSubs();
upto = -1;
current = null;
return this;
}
// in lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsEnum.java
Override
public int freq() throws IOException {
return current.freq();
}
// in lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsEnum.java
Override
public int advance(int target) throws IOException {
throw new UnsupportedOperationException();
}
// in lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsEnum.java
Override
public int nextDoc() throws IOException {
while(true) {
if (current == null) {
if (upto == numSubs-1) {
return this.doc = NO_MORE_DOCS;
} else {
upto++;
final int reader = subs[upto].slice.readerIndex;
current = subs[upto].docsEnum;
currentBase = mergeState.docBase[reader];
currentMap = mergeState.docMaps[reader];
assert currentMap.maxDoc() == subs[upto].slice.length: "readerIndex=" + reader + " subs.len=" + subs.length + " len1=" + currentMap.maxDoc() + " vs " + subs[upto].slice.length;
}
}
int doc = current.nextDoc();
if (doc != NO_MORE_DOCS) {
// compact deletions
doc = currentMap.get(doc);
if (doc == -1) {
continue;
}
return this.doc = currentBase + doc;
} else {
current = null;
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/VariableGapTermsIndexWriter.java
protected void writeHeader(IndexOutput out) throws IOException {
CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
// Placeholder for dir offset
out.writeLong(0);
}
// in lucene/core/src/java/org/apache/lucene/codecs/VariableGapTermsIndexWriter.java
Override
public FieldWriter addField(FieldInfo field, long termsFilePointer) throws IOException {
////System.out.println("VGW: field=" + field.name);
policy.newField(field);
FSTFieldWriter writer = new FSTFieldWriter(field, termsFilePointer);
fields.add(writer);
return writer;
}
// in lucene/core/src/java/org/apache/lucene/codecs/VariableGapTermsIndexWriter.java
Override
public boolean checkIndexTerm(BytesRef text, TermStats stats) throws IOException {
//System.out.println("VGW: index term=" + text.utf8ToString());
// NOTE: we must force the first term per field to be
// indexed, in case policy doesn't:
if (policy.isIndexTerm(text, stats) || first) {
first = false;
//System.out.println(" YES");
return true;
} else {
lastTerm.copyBytes(text);
return false;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/VariableGapTermsIndexWriter.java
Override
public void add(BytesRef text, TermStats stats, long termsFilePointer) throws IOException {
if (text.length == 0) {
// We already added empty string in ctor
assert termsFilePointer == startTermsFilePointer;
return;
}
final int lengthSave = text.length;
text.length = indexedTermPrefixLength(lastTerm, text);
try {
fstBuilder.add(Util.toIntsRef(text, scratchIntsRef), termsFilePointer);
} finally {
text.length = lengthSave;
}
lastTerm.copyBytes(text);
}
// in lucene/core/src/java/org/apache/lucene/codecs/VariableGapTermsIndexWriter.java
Override
public void finish(long termsFilePointer) throws IOException {
fst = fstBuilder.finish();
if (fst != null) {
fst.save(out);
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/VariableGapTermsIndexWriter.java
public void close() throws IOException {
try {
final long dirStart = out.getFilePointer();
final int fieldCount = fields.size();
int nonNullFieldCount = 0;
for(int i=0;i<fieldCount;i++) {
FSTFieldWriter field = fields.get(i);
if (field.fst != null) {
nonNullFieldCount++;
}
}
out.writeVInt(nonNullFieldCount);
for(int i=0;i<fieldCount;i++) {
FSTFieldWriter field = fields.get(i);
if (field.fst != null) {
out.writeVInt(field.fieldInfo.number);
out.writeVLong(field.indexStart);
}
}
writeTrailer(dirStart);
} finally {
out.close();
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/VariableGapTermsIndexWriter.java
protected void writeTrailer(long dirStart) throws IOException {
out.seek(CodecUtil.headerLength(CODEC_NAME));
out.writeLong(dirStart);
}
// in lucene/core/src/java/org/apache/lucene/codecs/appending/AppendingTermsWriter.java
Override
protected void writeHeader(IndexOutput out) throws IOException {
CodecUtil.writeHeader(out, TERMS_CODEC_NAME, TERMS_VERSION_CURRENT);
}
// in lucene/core/src/java/org/apache/lucene/codecs/appending/AppendingTermsWriter.java
Override
protected void writeIndexHeader(IndexOutput out) throws IOException {
CodecUtil.writeHeader(out, TERMS_INDEX_CODEC_NAME, TERMS_INDEX_VERSION_CURRENT);
}
// in lucene/core/src/java/org/apache/lucene/codecs/appending/AppendingTermsWriter.java
Override
protected void writeTrailer(IndexOutput out, long dirStart) throws IOException {
out.writeLong(dirStart);
}
// in lucene/core/src/java/org/apache/lucene/codecs/appending/AppendingTermsWriter.java
Override
protected void writeIndexTrailer(IndexOutput indexOut, long dirStart) throws IOException {
indexOut.writeLong(dirStart);
}
// in lucene/core/src/java/org/apache/lucene/codecs/appending/AppendingPostingsFormat.java
Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
PostingsWriterBase docsWriter = new Lucene40PostingsWriter(state);
boolean success = false;
try {
FieldsConsumer ret = new AppendingTermsWriter(state, docsWriter, BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
success = true;
return ret;
} finally {
if (!success) {
docsWriter.close();
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/appending/AppendingPostingsFormat.java
Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
PostingsReaderBase postings = new Lucene40PostingsReader(state.dir, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix);
boolean success = false;
try {
FieldsProducer ret = new AppendingTermsReader(
state.dir,
state.fieldInfos,
state.segmentInfo.name,
postings,
state.context,
state.segmentSuffix,
state.termsIndexDivisor);
success = true;
return ret;
} finally {
if (!success) {
postings.close();
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/appending/AppendingTermsReader.java
Override
protected void readHeader(IndexInput input) throws IOException {
CodecUtil.checkHeader(input, AppendingTermsWriter.TERMS_CODEC_NAME,
AppendingTermsWriter.TERMS_VERSION_START,
AppendingTermsWriter.TERMS_VERSION_CURRENT);
}
// in lucene/core/src/java/org/apache/lucene/codecs/appending/AppendingTermsReader.java
Override
protected void readIndexHeader(IndexInput input) throws IOException {
CodecUtil.checkHeader(input, AppendingTermsWriter.TERMS_INDEX_CODEC_NAME,
AppendingTermsWriter.TERMS_INDEX_VERSION_START,
AppendingTermsWriter.TERMS_INDEX_VERSION_CURRENT);
}
// in lucene/core/src/java/org/apache/lucene/codecs/appending/AppendingTermsReader.java
Override
protected void seekDir(IndexInput input, long dirOffset) throws IOException {
input.seek(input.length() - Long.SIZE / 8);
long offset = input.readLong();
input.seek(offset);
}
// in lucene/core/src/java/org/apache/lucene/codecs/PostingsConsumer.java
public TermStats merge(final MergeState mergeState, final DocsEnum postings, final FixedBitSet visitedDocs) throws IOException {
int df = 0;
long totTF = 0;
IndexOptions indexOptions = mergeState.fieldInfo.getIndexOptions();
if (indexOptions == IndexOptions.DOCS_ONLY) {
while(true) {
final int doc = postings.nextDoc();
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
visitedDocs.set(doc);
this.startDoc(doc, 0);
this.finishDoc();
df++;
}
totTF = -1;
} else if (indexOptions == IndexOptions.DOCS_AND_FREQS) {
while(true) {
final int doc = postings.nextDoc();
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
visitedDocs.set(doc);
final int freq = postings.freq();
this.startDoc(doc, freq);
this.finishDoc();
df++;
totTF += freq;
}
} else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
final DocsAndPositionsEnum postingsEnum = (DocsAndPositionsEnum) postings;
while(true) {
final int doc = postingsEnum.nextDoc();
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
visitedDocs.set(doc);
final int freq = postingsEnum.freq();
this.startDoc(doc, freq);
totTF += freq;
for(int i=0;i<freq;i++) {
final int position = postingsEnum.nextPosition();
final BytesRef payload;
if (postingsEnum.hasPayload()) {
payload = postingsEnum.getPayload();
} else {
payload = null;
}
this.addPosition(position, payload, -1, -1);
}
this.finishDoc();
df++;
}
} else {
assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
final DocsAndPositionsEnum postingsEnum = (DocsAndPositionsEnum) postings;
while(true) {
final int doc = postingsEnum.nextDoc();
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
visitedDocs.set(doc);
final int freq = postingsEnum.freq();
this.startDoc(doc, freq);
totTF += freq;
for(int i=0;i<freq;i++) {
final int position = postingsEnum.nextPosition();
final BytesRef payload;
if (postingsEnum.hasPayload()) {
payload = postingsEnum.getPayload();
} else {
payload = null;
}
this.addPosition(position, payload, postingsEnum.startOffset(), postingsEnum.endOffset());
}
this.finishDoc();
df++;
}
}
return new TermStats(df, totTF);
}
// in lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java
public int merge(MergeState mergeState) throws IOException {
int docCount = 0;
for (MergeState.IndexReaderAndLiveDocs reader : mergeState.readers) {
final int maxDoc = reader.reader.maxDoc();
final Bits liveDocs = reader.liveDocs;
for (int i = 0; i < maxDoc; i++) {
if (liveDocs != null && !liveDocs.get(i)) {
// skip deleted docs
continue;
}
// TODO: this could be more efficient using
// FieldVisitor instead of loading/writing entire
// doc; ie we just have to renumber the field number
// on the fly?
// NOTE: it's very important to first assign to doc then pass it to
// fieldsWriter.addDocument; see LUCENE-1282
Document doc = reader.reader.document(i);
addDocument(doc, mergeState.fieldInfos);
docCount++;
mergeState.checkAbort.work(300);
}
}
finish(mergeState.fieldInfos, docCount);
return docCount;
}
// in lucene/core/src/java/org/apache/lucene/codecs/StoredFieldsWriter.java
protected final void addDocument(Iterable<? extends IndexableField> doc, FieldInfos fieldInfos) throws IOException {
int storedCount = 0;
for (IndexableField field : doc) {
if (field.fieldType().stored()) {
storedCount++;
}
}
startDocument(storedCount);
for (IndexableField field : doc) {
if (field.fieldType().stored()) {
writeField(fieldInfos.fieldInfo(field.name()), field);
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/FixedGapTermsIndexReader.java
protected void readHeader(IndexInput input) throws IOException {
CodecUtil.checkHeader(input, FixedGapTermsIndexWriter.CODEC_NAME,
FixedGapTermsIndexWriter.VERSION_START, FixedGapTermsIndexWriter.VERSION_START);
dirOffset = input.readLong();
}
// in lucene/core/src/java/org/apache/lucene/codecs/FixedGapTermsIndexReader.java
private void loadTermsIndex() throws IOException {
if (coreIndex == null) {
coreIndex = new CoreFieldIndex(indexStart, termsStart, packedIndexStart, packedOffsetsStart, numIndexTerms);
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/FixedGapTermsIndexReader.java
Override
public void close() throws IOException {
if (in != null && !indexLoaded) {
in.close();
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/FixedGapTermsIndexReader.java
protected void seekDir(IndexInput input, long dirOffset) throws IOException {
input.seek(dirOffset);
}
// in lucene/core/src/java/org/apache/lucene/codecs/FieldsConsumer.java
public void merge(MergeState mergeState, Fields fields) throws IOException {
FieldsEnum fieldsEnum = fields.iterator();
assert fieldsEnum != null;
String field;
while((field = fieldsEnum.next()) != null) {
mergeState.fieldInfo = mergeState.fieldInfos.fieldInfo(field);
assert mergeState.fieldInfo != null : "FieldInfo for field is null: "+ field;
Terms terms = fieldsEnum.terms();
if (terms != null) {
final TermsConsumer termsConsumer = addField(mergeState.fieldInfo);
termsConsumer.merge(mergeState, terms.iterator(null));
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/VariableGapTermsIndexReader.java
protected void readHeader(IndexInput input) throws IOException {
CodecUtil.checkHeader(input, VariableGapTermsIndexWriter.CODEC_NAME,
VariableGapTermsIndexWriter.VERSION_START, VariableGapTermsIndexWriter.VERSION_START);
dirOffset = input.readLong();
}
// in lucene/core/src/java/org/apache/lucene/codecs/VariableGapTermsIndexReader.java
Override
public long seek(BytesRef target) throws IOException {
//System.out.println("VGR: seek field=" + fieldInfo.name + " target=" + target);
current = fstEnum.seekFloor(target);
//System.out.println(" got input=" + current.input + " output=" + current.output);
return current.output;
}
// in lucene/core/src/java/org/apache/lucene/codecs/VariableGapTermsIndexReader.java
Override
public long next() throws IOException {
//System.out.println("VGR: next field=" + fieldInfo.name);
current = fstEnum.next();
if (current == null) {
//System.out.println(" eof");
return -1;
} else {
return current.output;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/VariableGapTermsIndexReader.java
private void loadTermsIndex() throws IOException {
if (fst == null) {
IndexInput clone = (IndexInput) in.clone();
clone.seek(indexStart);
fst = new FST<Long>(clone, fstOutputs);
clone.close();
/*
final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName));
Util.toDot(fst, w, false, false);
System.out.println("FST INDEX: SAVED to " + dotFileName);
w.close();
*/
if (indexDivisor > 1) {
// subsample
final IntsRef scratchIntsRef = new IntsRef();
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<Long>(fst);
BytesRefFSTEnum.InputOutput<Long> result;
int count = indexDivisor;
while((result = fstEnum.next()) != null) {
if (count == indexDivisor) {
builder.add(Util.toIntsRef(result.input, scratchIntsRef), result.output);
count = 0;
}
count++;
}
fst = builder.finish();
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/VariableGapTermsIndexReader.java
Override
public void close() throws IOException {
if (in != null && !indexLoaded) {
in.close();
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/VariableGapTermsIndexReader.java
protected void seekDir(IndexInput input, long dirOffset) throws IOException {
input.seek(dirOffset);
}
// in lucene/core/src/java/org/apache/lucene/codecs/PerDocProducerBase.java
Override
public void close() throws IOException {
closeInternal(docValues().values());
}
// in lucene/core/src/java/org/apache/lucene/codecs/PerDocProducerBase.java
Override
public DocValues docValues(String field) throws IOException {
return docValues().get(field);
}
// in lucene/core/src/java/org/apache/lucene/codecs/PerDocProducerBase.java
public Comparator<BytesRef> getComparator() throws IOException {
return BytesRef.getUTF8SortedAsUnicodeComparator();
}
// in lucene/core/src/java/org/apache/lucene/codecs/PerDocProducerBase.java
protected TreeMap<String, DocValues> load(FieldInfos fieldInfos,
String segment, int docCount, Directory dir, IOContext context)
throws IOException {
TreeMap<String, DocValues> values = new TreeMap<String, DocValues>();
boolean success = false;
try {
for (FieldInfo fieldInfo : fieldInfos) {
if (canLoad(fieldInfo)) {
final String field = fieldInfo.name;
final String id = docValuesId(segment,
fieldInfo.number);
values.put(field,
loadDocValues(docCount, dir, id, getDocValuesType(fieldInfo), context));
}
}
success = true;
} finally {
if (!success) {
// if we fail we must close all opened resources if there are any
closeInternal(values.values());
}
}
return values;
}
// in lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsWriter.java
Override
public void start(IndexOutput termsOut) throws IOException {
this.termsOut = termsOut;
CodecUtil.writeHeader(termsOut, CODEC, VERSION_CURRENT);
// TODO: -- just ask skipper to "start" here
termsOut.writeInt(skipInterval); // write skipInterval
termsOut.writeInt(maxSkipLevels); // write maxSkipLevels
termsOut.writeInt(skipMinimum); // write skipMinimum
}
// in lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsWriter.java
Override
public void startTerm() throws IOException {
docIndex.mark();
//System.out.println("SEPW: startTerm docIndex=" + docIndex);
if (indexOptions != IndexOptions.DOCS_ONLY) {
freqIndex.mark();
}
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
posIndex.mark();
payloadStart = payloadOut.getFilePointer();
lastPayloadLength = -1;
}
skipListWriter.resetSkip(docIndex, freqIndex, posIndex);
}
// in lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsWriter.java
Override
public void startDoc(int docID, int termDocFreq) throws IOException {
final int delta = docID - lastDocID;
//System.out.println("SEPW: startDoc: write doc=" + docID + " delta=" + delta + " out.fp=" + docOut);
if (docID < 0 || (df > 0 && delta <= 0)) {
throw new CorruptIndexException("docs out of order (" + docID + " <= " + lastDocID + " ) (docOut: " + docOut + ")");
}
if ((++df % skipInterval) == 0) {
// TODO: -- awkward we have to make these two
// separate calls to skipper
//System.out.println(" buffer skip lastDocID=" + lastDocID);
skipListWriter.setSkipData(lastDocID, storePayloads, lastPayloadLength);
skipListWriter.bufferSkip(df);
}
lastDocID = docID;
docOut.write(delta);
if (indexOptions != IndexOptions.DOCS_ONLY) {
//System.out.println(" sepw startDoc: write freq=" + termDocFreq);
freqOut.write(termDocFreq);
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsWriter.java
Override
public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException {
assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
final int delta = position - lastPosition;
assert delta >= 0: "position=" + position + " lastPosition=" + lastPosition; // not quite right (if pos=0 is repeated twice we don't catch it)
lastPosition = position;
if (storePayloads) {
final int payloadLength = payload == null ? 0 : payload.length;
if (payloadLength != lastPayloadLength) {
lastPayloadLength = payloadLength;
// TODO: explore whether we get better compression
// by not storing payloadLength into prox stream?
posOut.write((delta<<1)|1);
posOut.write(payloadLength);
} else {
posOut.write(delta << 1);
}
if (payloadLength > 0) {
payloadOut.writeBytes(payload.bytes, payload.offset, payloadLength);
}
} else {
posOut.write(delta);
}
lastPosition = position;
}
// in lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsWriter.java
Override
public void finishTerm(TermStats stats) throws IOException {
// TODO: -- wasteful we are counting this in two places?
assert stats.docFreq > 0;
assert stats.docFreq == df;
final IntIndexOutput.Index docIndexCopy = docOut.index();
docIndexCopy.copyFrom(docIndex, false);
final IntIndexOutput.Index freqIndexCopy;
final IntIndexOutput.Index posIndexCopy;
if (indexOptions != IndexOptions.DOCS_ONLY) {
freqIndexCopy = freqOut.index();
freqIndexCopy.copyFrom(freqIndex, false);
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
posIndexCopy = posOut.index();
posIndexCopy.copyFrom(posIndex, false);
} else {
posIndexCopy = null;
}
} else {
freqIndexCopy = null;
posIndexCopy = null;
}
final long skipFP;
if (df >= skipMinimum) {
skipFP = skipOut.getFilePointer();
//System.out.println(" skipFP=" + skipFP);
skipListWriter.writeSkip(skipOut);
//System.out.println(" numBytes=" + (skipOut.getFilePointer()-skipFP));
} else {
skipFP = -1;
}
lastDocID = 0;
df = 0;
pendingTerms.add(new PendingTerm(docIndexCopy,
freqIndexCopy,
posIndexCopy,
payloadStart,
skipFP));
}
// in lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsWriter.java
Override
public void flushTermsBlock(int start, int count) throws IOException {
//System.out.println("SEPW: flushTermsBlock: start=" + start + " count=" + count + " pendingTerms.size()=" + pendingTerms.size() + " termsOut.fp=" + termsOut.getFilePointer());
assert indexBytesWriter.getFilePointer() == 0;
final int absStart = pendingTerms.size() - start;
final List<PendingTerm> slice = pendingTerms.subList(absStart, absStart+count);
long lastPayloadFP = 0;
long lastSkipFP = 0;
if (count == 0) {
termsOut.writeByte((byte) 0);
return;
}
final PendingTerm firstTerm = slice.get(0);
final IntIndexOutput.Index docIndexFlush = firstTerm.docIndex;
final IntIndexOutput.Index freqIndexFlush = firstTerm.freqIndex;
final IntIndexOutput.Index posIndexFlush = firstTerm.posIndex;
for(int idx=0;idx<slice.size();idx++) {
final boolean isFirstTerm = idx == 0;
final PendingTerm t = slice.get(idx);
//System.out.println(" write idx=" + idx + " docIndex=" + t.docIndex);
docIndexFlush.copyFrom(t.docIndex, false);
docIndexFlush.write(indexBytesWriter, isFirstTerm);
if (indexOptions != IndexOptions.DOCS_ONLY) {
freqIndexFlush.copyFrom(t.freqIndex, false);
freqIndexFlush.write(indexBytesWriter, isFirstTerm);
//System.out.println(" freqIndex=" + t.freqIndex);
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
posIndexFlush.copyFrom(t.posIndex, false);
posIndexFlush.write(indexBytesWriter, isFirstTerm);
//System.out.println(" posIndex=" + t.posIndex);
if (storePayloads) {
//System.out.println(" payloadFP=" + t.payloadFP);
if (isFirstTerm) {
indexBytesWriter.writeVLong(t.payloadFP);
} else {
indexBytesWriter.writeVLong(t.payloadFP - lastPayloadFP);
}
lastPayloadFP = t.payloadFP;
}
}
}
if (t.skipFP != -1) {
if (isFirstTerm) {
indexBytesWriter.writeVLong(t.skipFP);
} else {
indexBytesWriter.writeVLong(t.skipFP - lastSkipFP);
}
lastSkipFP = t.skipFP;
//System.out.println(" skipFP=" + t.skipFP);
}
}
//System.out.println(" numBytes=" + indexBytesWriter.getFilePointer());
termsOut.writeVLong((int) indexBytesWriter.getFilePointer());
indexBytesWriter.writeTo(termsOut);
indexBytesWriter.reset();
slice.clear();
}
// in lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsWriter.java
Override
public void close() throws IOException {
IOUtils.close(docOut, skipOut, freqOut, posOut, payloadOut);
}
// in lucene/core/src/java/org/apache/lucene/codecs/sep/SepSkipListWriter.java
void setPosOutput(IntIndexOutput posOutput) throws IOException {
this.posOutput = posOutput;
for(int i=0;i<numberOfSkipLevels;i++) {
posIndex[i] = posOutput.index();
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/sep/SepSkipListWriter.java
protected void resetSkip(IntIndexOutput.Index topDocIndex, IntIndexOutput.Index topFreqIndex, IntIndexOutput.Index topPosIndex)
throws IOException {
super.resetSkip();
Arrays.fill(lastSkipDoc, 0);
Arrays.fill(lastSkipPayloadLength, -1); // we don't have to write the first length in the skip list
for(int i=0;i<numberOfSkipLevels;i++) {
docIndex[i].copyFrom(topDocIndex, true);
if (freqOutput != null) {
freqIndex[i].copyFrom(topFreqIndex, true);
}
if (posOutput != null) {
posIndex[i].copyFrom(topPosIndex, true);
}
}
if (payloadOutput != null) {
Arrays.fill(lastSkipPayloadPointer, payloadOutput.getFilePointer());
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/sep/SepSkipListWriter.java
Override
protected void writeSkipData(int level, IndexOutput skipBuffer) throws IOException {
// To efficiently store payloads in the posting lists we do not store the length of
// every payload. Instead we omit the length for a payload if the previous payload had
// the same length.
// However, in order to support skipping the payload length at every skip point must be known.
// So we use the same length encoding that we use for the posting lists for the skip data as well:
// Case 1: current field does not store payloads
// SkipDatum --> DocSkip, FreqSkip, ProxSkip
// DocSkip,FreqSkip,ProxSkip --> VInt
// DocSkip records the document number before every SkipInterval th document in TermFreqs.
// Document numbers are represented as differences from the previous value in the sequence.
// Case 2: current field stores payloads
// SkipDatum --> DocSkip, PayloadLength?, FreqSkip,ProxSkip
// DocSkip,FreqSkip,ProxSkip --> VInt
// PayloadLength --> VInt
// In this case DocSkip/2 is the difference between
// the current and the previous value. If DocSkip
// is odd, then a PayloadLength encoded as VInt follows,
// if DocSkip is even, then it is assumed that the
// current payload length equals the length at the previous
// skip point
assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !curStorePayloads;
if (curStorePayloads) {
int delta = curDoc - lastSkipDoc[level];
if (curPayloadLength == lastSkipPayloadLength[level]) {
// the current payload length equals the length at the previous skip point,
// so we don't store the length again
skipBuffer.writeVInt(delta << 1);
} else {
// the payload length is different from the previous one. We shift the DocSkip,
// set the lowest bit and store the current payload length as VInt.
skipBuffer.writeVInt(delta << 1 | 1);
skipBuffer.writeVInt(curPayloadLength);
lastSkipPayloadLength[level] = curPayloadLength;
}
} else {
// current field does not store payloads
skipBuffer.writeVInt(curDoc - lastSkipDoc[level]);
}
if (indexOptions != IndexOptions.DOCS_ONLY) {
freqIndex[level].mark();
freqIndex[level].write(skipBuffer, false);
}
docIndex[level].mark();
docIndex[level].write(skipBuffer, false);
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
posIndex[level].mark();
posIndex[level].write(skipBuffer, false);
if (curStorePayloads) {
skipBuffer.writeVInt((int) (curPayloadPointer - lastSkipPayloadPointer[level]));
}
}
lastSkipDoc[level] = curDoc;
lastSkipPayloadPointer[level] = curPayloadPointer;
}
// in lucene/core/src/java/org/apache/lucene/codecs/sep/IntIndexInput.java
public IntsRef read(int count) throws IOException {
if (bulkResult == null) {
bulkResult = new IntsRef();
bulkResult.ints = new int[count];
} else {
bulkResult.grow(count);
}
for(int i=0;i<count;i++) {
bulkResult.ints[i] = next();
}
bulkResult.length = count;
return bulkResult;
}
// in lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java
Override
public void init(IndexInput termsIn) throws IOException {
// Make sure we are talking to the matching past writer
CodecUtil.checkHeader(termsIn, SepPostingsWriter.CODEC,
SepPostingsWriter.VERSION_START, SepPostingsWriter.VERSION_START);
skipInterval = termsIn.readInt();
maxSkipLevels = termsIn.readInt();
skipMinimum = termsIn.readInt();
}
// in lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java
Override
public void close() throws IOException {
try {
if (freqIn != null)
freqIn.close();
} finally {
try {
if (docIn != null)
docIn.close();
} finally {
try {
if (skipIn != null)
skipIn.close();
} finally {
try {
if (posIn != null) {
posIn.close();
}
} finally {
if (payloadIn != null) {
payloadIn.close();
}
}
}
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java
Override
public BlockTermState newTermState() throws IOException {
final SepTermState state = new SepTermState();
state.docIndex = docIn.index();
if (freqIn != null) {
state.freqIndex = freqIn.index();
}
if (posIn != null) {
state.posIndex = posIn.index();
}
return state;
}
// in lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java
Override
public void readTermsBlock(IndexInput termsIn, FieldInfo fieldInfo, BlockTermState _termState) throws IOException {
final SepTermState termState = (SepTermState) _termState;
//System.out.println("SEPR: readTermsBlock termsIn.fp=" + termsIn.getFilePointer());
final int len = termsIn.readVInt();
//System.out.println(" numBytes=" + len);
if (termState.bytes == null) {
termState.bytes = new byte[ArrayUtil.oversize(len, 1)];
termState.bytesReader = new ByteArrayDataInput(termState.bytes);
} else if (termState.bytes.length < len) {
termState.bytes = new byte[ArrayUtil.oversize(len, 1)];
}
termState.bytesReader.reset(termState.bytes, 0, len);
termsIn.readBytes(termState.bytes, 0, len);
}
// in lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java
Override
public void nextTerm(FieldInfo fieldInfo, BlockTermState _termState) throws IOException {
final SepTermState termState = (SepTermState) _termState;
final boolean isFirstTerm = termState.termBlockOrd == 0;
//System.out.println("SEPR.nextTerm termCount=" + termState.termBlockOrd + " isFirstTerm=" + isFirstTerm + " bytesReader.pos=" + termState.bytesReader.getPosition());
//System.out.println(" docFreq=" + termState.docFreq);
termState.docIndex.read(termState.bytesReader, isFirstTerm);
//System.out.println(" docIndex=" + termState.docIndex);
if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
termState.freqIndex.read(termState.bytesReader, isFirstTerm);
if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
//System.out.println(" freqIndex=" + termState.freqIndex);
termState.posIndex.read(termState.bytesReader, isFirstTerm);
//System.out.println(" posIndex=" + termState.posIndex);
if (fieldInfo.hasPayloads()) {
if (isFirstTerm) {
termState.payloadFP = termState.bytesReader.readVLong();
} else {
termState.payloadFP += termState.bytesReader.readVLong();
}
//System.out.println(" payloadFP=" + termState.payloadFP);
}
}
}
if (termState.docFreq >= skipMinimum) {
//System.out.println(" readSkip @ " + termState.bytesReader.getPosition());
if (isFirstTerm) {
termState.skipFP = termState.bytesReader.readVLong();
} else {
termState.skipFP += termState.bytesReader.readVLong();
}
//System.out.println(" skipFP=" + termState.skipFP);
} else if (isFirstTerm) {
termState.skipFP = 0;
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java
Override
public DocsEnum docs(FieldInfo fieldInfo, BlockTermState _termState, Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
if (needsFreqs && fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY) {
return null;
}
final SepTermState termState = (SepTermState) _termState;
SepDocsEnum docsEnum;
if (reuse == null || !(reuse instanceof SepDocsEnum)) {
docsEnum = new SepDocsEnum();
} else {
docsEnum = (SepDocsEnum) reuse;
if (docsEnum.startDocIn != docIn) {
// If you are using ParellelReader, and pass in a
// reused DocsAndPositionsEnum, it could have come
// from another reader also using sep codec
docsEnum = new SepDocsEnum();
}
}
return docsEnum.init(fieldInfo, termState, liveDocs);
}
// in lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java
Override
public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState _termState, Bits liveDocs,
DocsAndPositionsEnum reuse, boolean needsOffsets)
throws IOException {
if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
return null;
}
if (needsOffsets) {
return null;
}
assert fieldInfo.getIndexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
final SepTermState termState = (SepTermState) _termState;
SepDocsAndPositionsEnum postingsEnum;
if (reuse == null || !(reuse instanceof SepDocsAndPositionsEnum)) {
postingsEnum = new SepDocsAndPositionsEnum();
} else {
postingsEnum = (SepDocsAndPositionsEnum) reuse;
if (postingsEnum.startDocIn != docIn) {
// If you are using ParellelReader, and pass in a
// reused DocsAndPositionsEnum, it could have come
// from another reader also using sep codec
postingsEnum = new SepDocsAndPositionsEnum();
}
}
return postingsEnum.init(fieldInfo, termState, liveDocs);
}
// in lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java
SepDocsEnum init(FieldInfo fieldInfo, SepTermState termState, Bits liveDocs) throws IOException {
this.liveDocs = liveDocs;
this.indexOptions = fieldInfo.getIndexOptions();
omitTF = indexOptions == IndexOptions.DOCS_ONLY;
storePayloads = fieldInfo.hasPayloads();
// TODO: can't we only do this if consumer
// skipped consuming the previous docs?
docIndex.set(termState.docIndex);
docIndex.seek(docReader);
if (!omitTF) {
freqIndex.set(termState.freqIndex);
freqIndex.seek(freqReader);
}
docFreq = termState.docFreq;
// NOTE: unused if docFreq < skipMinimum:
skipFP = termState.skipFP;
count = 0;
doc = -1;
accum = 0;
skipped = false;
return this;
}
// in lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java
Override
public int nextDoc() throws IOException {
while(true) {
if (count == docFreq) {
return doc = NO_MORE_DOCS;
}
count++;
// Decode next doc
//System.out.println("decode docDelta:");
accum += docReader.next();
if (!omitTF) {
//System.out.println("decode freq:");
freq = freqReader.next();
}
if (liveDocs == null || liveDocs.get(accum)) {
break;
}
}
return (doc = accum);
}
// in lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java
Override
public int freq() throws IOException {
assert !omitTF;
return freq;
}
// in lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java
Override
public int advance(int target) throws IOException {
if ((target - skipInterval) >= doc && docFreq >= skipMinimum) {
// There are enough docs in the posting to have
// skip data, and its not too close
if (skipper == null) {
// This DocsEnum has never done any skipping
skipper = new SepSkipListReader((IndexInput) skipIn.clone(),
freqIn,
docIn,
posIn,
maxSkipLevels, skipInterval);
}
if (!skipped) {
// We haven't yet skipped for this posting
skipper.init(skipFP,
docIndex,
freqIndex,
posIndex,
0,
docFreq,
storePayloads);
skipper.setIndexOptions(indexOptions);
skipped = true;
}
final int newCount = skipper.skipTo(target);
if (newCount > count) {
// Skipper did move
if (!omitTF) {
skipper.getFreqIndex().seek(freqReader);
}
skipper.getDocIndex().seek(docReader);
count = newCount;
doc = accum = skipper.getDoc();
}
}
// Now, linear scan for the rest:
do {
if (nextDoc() == NO_MORE_DOCS) {
return NO_MORE_DOCS;
}
} while (target > doc);
return doc;
}
// in lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java
SepDocsAndPositionsEnum init(FieldInfo fieldInfo, SepTermState termState, Bits liveDocs) throws IOException {
this.liveDocs = liveDocs;
storePayloads = fieldInfo.hasPayloads();
//System.out.println("Sep D&P init");
// TODO: can't we only do this if consumer
// skipped consuming the previous docs?
docIndex.set(termState.docIndex);
docIndex.seek(docReader);
//System.out.println(" docIndex=" + docIndex);
freqIndex.set(termState.freqIndex);
freqIndex.seek(freqReader);
//System.out.println(" freqIndex=" + freqIndex);
posIndex.set(termState.posIndex);
//System.out.println(" posIndex=" + posIndex);
posSeekPending = true;
payloadPending = false;
payloadFP = termState.payloadFP;
skipFP = termState.skipFP;
//System.out.println(" skipFP=" + skipFP);
docFreq = termState.docFreq;
count = 0;
doc = -1;
accum = 0;
pendingPosCount = 0;
pendingPayloadBytes = 0;
skipped = false;
return this;
}
// in lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java
Override
public int nextDoc() throws IOException {
while(true) {
if (count == docFreq) {
return doc = NO_MORE_DOCS;
}
count++;
// TODO: maybe we should do the 1-bit trick for encoding
// freq=1 case?
// Decode next doc
//System.out.println(" sep d&p read doc");
accum += docReader.next();
//System.out.println(" sep d&p read freq");
freq = freqReader.next();
pendingPosCount += freq;
if (liveDocs == null || liveDocs.get(accum)) {
break;
}
}
position = 0;
return (doc = accum);
}
// in lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java
Override
public int freq() throws IOException {
return freq;
}
// in lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java
Override
public int advance(int target) throws IOException {
//System.out.println("SepD&P advance target=" + target + " vs current=" + doc + " this=" + this);
if ((target - skipInterval) >= doc && docFreq >= skipMinimum) {
// There are enough docs in the posting to have
// skip data, and its not too close
if (skipper == null) {
//System.out.println(" create skipper");
// This DocsEnum has never done any skipping
skipper = new SepSkipListReader((IndexInput) skipIn.clone(),
freqIn,
docIn,
posIn,
maxSkipLevels, skipInterval);
}
if (!skipped) {
//System.out.println(" init skip data skipFP=" + skipFP);
// We haven't yet skipped for this posting
skipper.init(skipFP,
docIndex,
freqIndex,
posIndex,
payloadFP,
docFreq,
storePayloads);
skipper.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
skipped = true;
}
final int newCount = skipper.skipTo(target);
//System.out.println(" skip newCount=" + newCount + " vs " + count);
if (newCount > count) {
// Skipper did move
skipper.getFreqIndex().seek(freqReader);
skipper.getDocIndex().seek(docReader);
//System.out.println(" doc seek'd to " + skipper.getDocIndex());
// NOTE: don't seek pos here; do it lazily
// instead. Eg a PhraseQuery may skip to many
// docs before finally asking for positions...
posIndex.set(skipper.getPosIndex());
posSeekPending = true;
count = newCount;
doc = accum = skipper.getDoc();
//System.out.println(" moved to doc=" + doc);
//payloadIn.seek(skipper.getPayloadPointer());
payloadFP = skipper.getPayloadPointer();
pendingPosCount = 0;
pendingPayloadBytes = 0;
payloadPending = false;
payloadLength = skipper.getPayloadLength();
//System.out.println(" move payloadLen=" + payloadLength);
}
}
// Now, linear scan for the rest:
do {
if (nextDoc() == NO_MORE_DOCS) {
//System.out.println(" advance nextDoc=END");
return NO_MORE_DOCS;
}
//System.out.println(" advance nextDoc=" + doc);
} while (target > doc);
//System.out.println(" return doc=" + doc);
return doc;
}
// in lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java
Override
public int nextPosition() throws IOException {
if (posSeekPending) {
posIndex.seek(posReader);
payloadIn.seek(payloadFP);
posSeekPending = false;
}
// scan over any docs that were iterated without their
// positions
while (pendingPosCount > freq) {
final int code = posReader.next();
if (storePayloads && (code & 1) != 0) {
// Payload length has changed
payloadLength = posReader.next();
assert payloadLength >= 0;
}
pendingPosCount--;
position = 0;
pendingPayloadBytes += payloadLength;
}
final int code = posReader.next();
if (storePayloads) {
if ((code & 1) != 0) {
// Payload length has changed
payloadLength = posReader.next();
assert payloadLength >= 0;
}
position += code >>> 1;
pendingPayloadBytes += payloadLength;
payloadPending = payloadLength > 0;
} else {
position += code;
}
pendingPosCount--;
assert pendingPosCount >= 0;
return position;
}
// in lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java
Override
public BytesRef getPayload() throws IOException {
if (!payloadPending) {
throw new IOException("Either no payload exists at this term position or an attempt was made to load it more than once.");
}
assert pendingPayloadBytes >= payloadLength;
if (pendingPayloadBytes > payloadLength) {
payloadIn.seek(payloadIn.getFilePointer() + (pendingPayloadBytes - payloadLength));
}
if (payload == null) {
payload = new BytesRef();
payload.bytes = new byte[payloadLength];
} else if (payload.bytes.length < payloadLength) {
payload.grow(payloadLength);
}
payloadIn.readBytes(payload.bytes, 0, payloadLength);
payloadPending = false;
payload.length = payloadLength;
pendingPayloadBytes = 0;
return payload;
}
// in lucene/core/src/java/org/apache/lucene/codecs/sep/SepSkipListReader.java
Override
protected void seekChild(int level) throws IOException {
super.seekChild(level);
payloadPointer[level] = lastPayloadPointer;
payloadLength[level] = lastPayloadLength;
}
// in lucene/core/src/java/org/apache/lucene/codecs/sep/SepSkipListReader.java
Override
protected int readSkipData(int level, IndexInput skipStream) throws IOException {
int delta;
assert indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !currentFieldStoresPayloads;
if (currentFieldStoresPayloads) {
// the current field stores payloads.
// if the doc delta is odd then we have
// to read the current payload length
// because it differs from the length of the
// previous payload
delta = skipStream.readVInt();
if ((delta & 1) != 0) {
payloadLength[level] = skipStream.readVInt();
}
delta >>>= 1;
} else {
delta = skipStream.readVInt();
}
if (indexOptions != IndexOptions.DOCS_ONLY) {
freqIndex[level].read(skipStream, false);
}
docIndex[level].read(skipStream, false);
if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
posIndex[level].read(skipStream, false);
if (currentFieldStoresPayloads) {
payloadPointer[level] += skipStream.readVInt();
}
}
return delta;
}
// in lucene/core/src/java/org/apache/lucene/codecs/sep/SepDocValuesProducer.java
Override
protected void closeInternal(Collection<? extends Closeable> closeables) throws IOException {
IOUtils.close(closeables);
}
// in lucene/core/src/java/org/apache/lucene/codecs/sep/SepDocValuesProducer.java
Override
protected DocValues loadDocValues(int docCount, Directory dir, String id,
Type type, IOContext context) throws IOException {
switch (type) {
case FIXED_INTS_16:
case FIXED_INTS_32:
case FIXED_INTS_64:
case FIXED_INTS_8:
case VAR_INTS:
return Ints.getValues(dir, id, docCount, type, context);
case FLOAT_32:
return Floats.getValues(dir, id, docCount, context, type);
case FLOAT_64:
return Floats.getValues(dir, id, docCount, context, type);
case BYTES_FIXED_STRAIGHT:
return Bytes.getValues(dir, id, Bytes.Mode.STRAIGHT, true, docCount, getComparator(), context);
case BYTES_FIXED_DEREF:
return Bytes.getValues(dir, id, Bytes.Mode.DEREF, true, docCount, getComparator(), context);
case BYTES_FIXED_SORTED:
return Bytes.getValues(dir, id, Bytes.Mode.SORTED, true, docCount, getComparator(), context);
case BYTES_VAR_STRAIGHT:
return Bytes.getValues(dir, id, Bytes.Mode.STRAIGHT, false, docCount, getComparator(), context);
case BYTES_VAR_DEREF:
return Bytes.getValues(dir, id, Bytes.Mode.DEREF, false, docCount, getComparator(), context);
case BYTES_VAR_SORTED:
return Bytes.getValues(dir, id, Bytes.Mode.SORTED, false, docCount, getComparator(), context);
default:
throw new IllegalStateException("unrecognized index values mode " + type);
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/DocValuesArraySource.java
Override
public DocValuesArraySource newFromInput(IndexInput input, int numDocs)
throws IOException {
return new ByteValues(input, numDocs);
}
// in lucene/core/src/java/org/apache/lucene/codecs/DocValuesArraySource.java
Override
public DocValuesArraySource newFromInput(IndexInput input, int numDocs)
throws IOException {
return new ShortValues(input, numDocs);
}
// in lucene/core/src/java/org/apache/lucene/codecs/DocValuesArraySource.java
Override
public DocValuesArraySource newFromInput(IndexInput input, int numDocs)
throws IOException {
return new IntValues(input, numDocs);
}
// in lucene/core/src/java/org/apache/lucene/codecs/DocValuesArraySource.java
Override
public DocValuesArraySource newFromInput(IndexInput input, int numDocs)
throws IOException {
return new LongValues(input, numDocs);
}
// in lucene/core/src/java/org/apache/lucene/codecs/DocValuesArraySource.java
Override
public DocValuesArraySource newFromInput(IndexInput input, int numDocs)
throws IOException {
return new FloatValues(input, numDocs);
}
// in lucene/core/src/java/org/apache/lucene/codecs/DocValuesArraySource.java
Override
public DocValuesArraySource newFromInput(IndexInput input, int numDocs)
throws IOException {
return new DoubleValues(input, numDocs);
}
// in lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java
MappingMultiDocsAndPositionsEnum reset(MultiDocsAndPositionsEnum postingsEnum) throws IOException {
this.numSubs = postingsEnum.getNumSubs();
this.subs = postingsEnum.getSubs();
upto = -1;
current = null;
return this;
}
// in lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java
Override
public int freq() throws IOException {
return current.freq();
}
// in lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java
Override
public int advance(int target) throws IOException {
throw new UnsupportedOperationException();
}
// in lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java
Override
public int nextDoc() throws IOException {
while(true) {
if (current == null) {
if (upto == numSubs-1) {
return this.doc = NO_MORE_DOCS;
} else {
upto++;
final int reader = subs[upto].slice.readerIndex;
current = subs[upto].docsAndPositionsEnum;
currentBase = mergeState.docBase[reader];
currentMap = mergeState.docMaps[reader];
}
}
int doc = current.nextDoc();
if (doc != NO_MORE_DOCS) {
// compact deletions
doc = currentMap.get(doc);
if (doc == -1) {
continue;
}
return this.doc = currentBase + doc;
} else {
current = null;
}
}
}
// in lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java
Override
public int nextPosition() throws IOException {
return current.nextPosition();
}
// in lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java
Override
public int startOffset() throws IOException {
return current.startOffset();
}
// in lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java
Override
public int endOffset() throws IOException {
return current.endOffset();
}
// in lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java
Override
public BytesRef getPayload() throws IOException {
BytesRef payload = current.getPayload();
if (mergeState.currentPayloadProcessor[upto] != null) {
mergeState.currentPayloadProcessor[upto].processPayload(payload);
}
return payload;
}