@PublicationReference(title="Feature Hashing for Large Scale Multitask Learning", author={"Kilian Weinberger","Anirban Dasgupta","Josh Attenberg","John Langford","Alex Smola"}, year=2009, type=Conference, publication="Proceedings of the 26th Annual International Conference on Machine Learning (ICML)", url="http://arxiv.org/pdf/0902.2206.pdf") public class FeatureHashing extends AbstractCloneableSerializable implements VectorFunction, VectorOutputEvaluator<Vector,Vector>, VectorFactoryContainer
Modifier and Type | Field and Description |
---|---|
static int |
DEFAULT_OUTPUT_DIMENSIONALITY
The default output dimensionality is 100.
|
protected HashFunction |
hashFunction
The hashing function to use.
|
protected int |
outputDimensionality
The output size of the hash.
|
protected VectorFactory<?> |
vectorFactory
Vector factory to use.
|
Constructor and Description |
---|
FeatureHashing()
Creates a new
FeatureHashing . |
FeatureHashing(int outputDimensionality)
Creates a new
FeatureHashing with the given output size. |
FeatureHashing(int outputDimensionality,
HashFunction hashFunction,
VectorFactory<?> vectorFactory)
Creates a new
FeatureHashing with the given parameters. |
Modifier and Type | Method and Description |
---|---|
Vector |
evaluate(Vector input)
Evaluates the function on the given input and returns the output.
|
HashFunction |
getHashFunction()
Gets the hash function to use.
|
int |
getOutputDimensionality()
Gets the expected dimensionality of the output vector of the evaluator,
if it is known.
|
VectorFactory<?> |
getVectorFactory()
Gets the vector factory the object to use to create new vectors.
|
protected int |
hash(int index)
Applies the hashing function to the index.
|
void |
setHashFunction(HashFunction hashFunction)
Gets the hash function to use.
|
void |
setOutputDimensionality(int outputDimensionality)
Sets the output dimensionality, which is the size of the output vector
that the input is hashed into.
|
void |
setVectorFactory(VectorFactory<?> vectorFactory)
Sets the vector factory to use.
|
clone
public static final int DEFAULT_OUTPUT_DIMENSIONALITY
protected int outputDimensionality
protected HashFunction hashFunction
protected VectorFactory<?> vectorFactory
public FeatureHashing()
FeatureHashing
.public FeatureHashing(int outputDimensionality)
FeatureHashing
with the given output size.outputDimensionality
- The output dimensionality. Cannot be negative.public FeatureHashing(int outputDimensionality, HashFunction hashFunction, VectorFactory<?> vectorFactory)
FeatureHashing
with the given parameters.outputDimensionality
- The output dimensionality. Cannot be negative.hashFunction
- The hash function to use.vectorFactory
- The vector factory to use.public Vector evaluate(Vector input)
Evaluator
protected int hash(int index)
index
- The index to hash.public int getOutputDimensionality()
VectorOutputEvaluator
getOutputDimensionality
in interface VectorOutputEvaluator<Vector,Vector>
public void setOutputDimensionality(int outputDimensionality)
outputDimensionality
- The output dimensionality. Cannot be negative.public HashFunction getHashFunction()
public void setHashFunction(HashFunction hashFunction)
hashFunction
- The hash function to use.public VectorFactory<?> getVectorFactory()
VectorFactoryContainer
getVectorFactory
in interface VectorFactoryContainer
public void setVectorFactory(VectorFactory<?> vectorFactory)
vectorFactory
- The vector factory.