@PublicationReference(title="Feature Hashing for Large Scale Multitask Learning", author={"Kilian Weinberger","Anirban Dasgupta","Josh Attenberg","John Langford","Alex Smola"}, year=2009, type=Conference, publication="Proceedings of the 26th Annual International Conference on Machine Learning (ICML)", url="http://arxiv.org/pdf/0902.2206.pdf") public class FeatureHashing extends AbstractCloneableSerializable implements VectorFunction, VectorOutputEvaluator<Vector,Vector>, VectorFactoryContainer
| Modifier and Type | Field and Description |
|---|---|
static int |
DEFAULT_OUTPUT_DIMENSIONALITY
The default output dimensionality is 100.
|
protected HashFunction |
hashFunction
The hashing function to use.
|
protected int |
outputDimensionality
The output size of the hash.
|
protected VectorFactory<?> |
vectorFactory
Vector factory to use.
|
| Constructor and Description |
|---|
FeatureHashing()
Creates a new
FeatureHashing. |
FeatureHashing(int outputDimensionality)
Creates a new
FeatureHashing with the given output size. |
FeatureHashing(int outputDimensionality,
HashFunction hashFunction,
VectorFactory<?> vectorFactory)
Creates a new
FeatureHashing with the given parameters. |
| Modifier and Type | Method and Description |
|---|---|
Vector |
evaluate(Vector input)
Evaluates the function on the given input and returns the output.
|
HashFunction |
getHashFunction()
Gets the hash function to use.
|
int |
getOutputDimensionality()
Gets the expected dimensionality of the output vector of the evaluator,
if it is known.
|
VectorFactory<?> |
getVectorFactory()
Gets the vector factory the object to use to create new vectors.
|
protected int |
hash(int index)
Applies the hashing function to the index.
|
void |
setHashFunction(HashFunction hashFunction)
Gets the hash function to use.
|
void |
setOutputDimensionality(int outputDimensionality)
Sets the output dimensionality, which is the size of the output vector
that the input is hashed into.
|
void |
setVectorFactory(VectorFactory<?> vectorFactory)
Sets the vector factory to use.
|
clonepublic static final int DEFAULT_OUTPUT_DIMENSIONALITY
protected int outputDimensionality
protected HashFunction hashFunction
protected VectorFactory<?> vectorFactory
public FeatureHashing()
FeatureHashing.public FeatureHashing(int outputDimensionality)
FeatureHashing with the given output size.outputDimensionality - The output dimensionality. Cannot be negative.public FeatureHashing(int outputDimensionality,
HashFunction hashFunction,
VectorFactory<?> vectorFactory)
FeatureHashing with the given parameters.outputDimensionality - The output dimensionality. Cannot be negative.hashFunction - The hash function to use.vectorFactory - The vector factory to use.public Vector evaluate(Vector input)
Evaluatorprotected int hash(int index)
index - The index to hash.public int getOutputDimensionality()
VectorOutputEvaluatorgetOutputDimensionality in interface VectorOutputEvaluator<Vector,Vector>public void setOutputDimensionality(int outputDimensionality)
outputDimensionality - The output dimensionality. Cannot be negative.public HashFunction getHashFunction()
public void setHashFunction(HashFunction hashFunction)
hashFunction - The hash function to use.public VectorFactory<?> getVectorFactory()
VectorFactoryContainergetVectorFactory in interface VectorFactoryContainerpublic void setVectorFactory(VectorFactory<?> vectorFactory)
vectorFactory - The vector factory.