001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.collections4.bloomfilter; 018 019import java.util.Objects; 020import java.util.TreeSet; 021import java.util.function.IntPredicate; 022import java.util.function.LongPredicate; 023 024/** 025 * A bloom filter using a TreeSet of integers to track enabled bits. This is a standard 026 * implementation and should work well for most low cardinality Bloom filters. 027 * 028 * @since 4.5.0-M1 029 */ 030public final class SparseBloomFilter implements BloomFilter<SparseBloomFilter> { 031 032 /** 033 * The bitSet that defines this BloomFilter. 034 */ 035 private final TreeSet<Integer> indices; 036 037 /** 038 * The shape of this BloomFilter. 039 */ 040 private final Shape shape; 041 042 /** 043 * Constructs an empty BitSetBloomFilter. 044 * 045 * @param shape The shape of the filter. 046 */ 047 public SparseBloomFilter(final Shape shape) { 048 Objects.requireNonNull(shape, "shape"); 049 this.shape = shape; 050 this.indices = new TreeSet<>(); 051 } 052 053 private SparseBloomFilter(final SparseBloomFilter source) { 054 shape = source.shape; 055 indices = new TreeSet<>(source.indices); 056 } 057 058 /** 059 * Adds the index to the indices. 060 * 061 * @param idx the index to add. 062 * @return {@code true} always 063 */ 064 private boolean add(final int idx) { 065 indices.add(idx); 066 return true; 067 } 068 069 @Override 070 public long[] asBitMapArray() { 071 final long[] result = BitMaps.newBitMap(shape); 072 for (final int i : indices) { 073 BitMaps.set(result, i); 074 } 075 return result; 076 } 077 078 @Override 079 public int cardinality() { 080 return indices.size(); 081 } 082 083 @Override 084 public int characteristics() { 085 return SPARSE; 086 } 087 088 @Override 089 public void clear() { 090 indices.clear(); 091 } 092 093 @Override 094 public boolean contains(final BitMapExtractor bitMapExtractor) { 095 return contains(IndexExtractor.fromBitMapExtractor(bitMapExtractor)); 096 } 097 098 @Override 099 public boolean contains(final IndexExtractor indexExtractor) { 100 return indexExtractor.processIndices(indices::contains); 101 } 102 103 /** 104 * Creates a new instance of this {@link SparseBloomFilter} with the same properties as the current one. 105 * 106 * @return a copy of this {@link SparseBloomFilter}. 107 */ 108 @Override 109 public SparseBloomFilter copy() { 110 return new SparseBloomFilter(this); 111 } 112 113 @Override 114 public Shape getShape() { 115 return shape; 116 } 117 118 @Override 119 public boolean isEmpty() { 120 return indices.isEmpty(); 121 } 122 123 @Override 124 public boolean merge(final BitMapExtractor bitMapExtractor) { 125 Objects.requireNonNull(bitMapExtractor, "bitMapExtractor"); 126 return this.merge(IndexExtractor.fromBitMapExtractor(bitMapExtractor)); 127 } 128 129 @Override 130 public boolean merge(final BloomFilter<?> other) { 131 Objects.requireNonNull(other, "other"); 132 final IndexExtractor indexExtractor = (other.characteristics() & SPARSE) != 0 ? (IndexExtractor) other : IndexExtractor.fromBitMapExtractor(other); 133 merge(indexExtractor); 134 return true; 135 } 136 137 @Override 138 public boolean merge(final Hasher hasher) { 139 Objects.requireNonNull(hasher, "hasher"); 140 merge(hasher.indices(shape)); 141 return true; 142 } 143 144 @Override 145 public boolean merge(final IndexExtractor indexExtractor) { 146 Objects.requireNonNull(indexExtractor, "indexExtractor"); 147 indexExtractor.processIndices(this::add); 148 if (!indices.isEmpty()) { 149 if (indices.last() >= shape.getNumberOfBits()) { 150 throw new IllegalArgumentException(String.format("Value in list %s is greater than maximum value (%s)", 151 indices.last(), shape.getNumberOfBits() - 1)); 152 } 153 if (indices.first() < 0) { 154 throw new IllegalArgumentException( 155 String.format("Value in list %s is less than 0", indices.first())); 156 } 157 } 158 return true; 159 } 160 161 @Override 162 public boolean processBitMaps(final LongPredicate consumer) { 163 Objects.requireNonNull(consumer, "consumer"); 164 final int limit = BitMaps.numberOfBitMaps(shape); 165 // 166 // because our indices are always in order we can shorten the time necessary to 167 // create the longs for the consumer 168 // 169 // the currently constructed bitMap 170 long bitMap = 0; 171 // the bitmap we are working on 172 int idx = 0; 173 for (final int i : indices) { 174 while (BitMaps.getLongIndex(i) != idx) { 175 if (!consumer.test(bitMap)) { 176 return false; 177 } 178 bitMap = 0; 179 idx++; 180 } 181 bitMap |= BitMaps.getLongBit(i); 182 } 183 // we fall through with data in the bitMap 184 if (!consumer.test(bitMap)) { 185 return false; 186 } 187 // account for hte bitMap in the previous block + the next one 188 idx++; 189 // while there are more blocks to generate send zero to the consumer. 190 while (idx < limit) { 191 if (!consumer.test(0L)) { 192 return false; 193 } 194 idx++; 195 } 196 return true; 197 } 198 199 @Override 200 public boolean processIndices(final IntPredicate consumer) { 201 Objects.requireNonNull(consumer, "consumer"); 202 return indices.stream().allMatch(consumer::test); 203 } 204}