SimilarityMeasures.h
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #ifndef STRING_SIMILARITY_H
00012 #define STRING_SIMILARITY_H
00013
00014
00015 #include "Word.h"
00016 #include <vector>
00017
00018
00019
00021
00028 double getLeftTaleConfidenceValue(const vector<double>& dist, double p);
00029
00030 class StringSimilarityMeasure;
00031
00032
00034
00037 class WordPairComparison
00038 {
00039 public:
00041
00045 WordPairComparison( int rank, const StringSimilarityMeasure* sm): theRank( rank ), pSSM( sm ) {}
00046
00048
00055 vector<double> distrEstimate(int minLen, int maxLen, int nSamples = 1000)const;
00056
00058
00064 double comparePair( const Word& w1, const Word& w2, const vector<double>& measureDistr)const;
00065
00067
00072 double comparePair( const Word& w1, const Word& w2)const;
00073 private:
00074 WordPairComparison( const WordPairComparison& );
00075 WordPairComparison& operator = (const WordPairComparison&);
00076 const StringSimilarityMeasure* pSSM;
00077 int theRank;
00078 };
00079
00081
00082
00083
00085
00087 class StringSimilarityMeasure
00088 {
00089 public:
00091
00096 virtual double measure(const Word& w1, const Word& w2)const = 0;
00097 };
00098
00099
00101
00102
00103
00105
00106
00108 class HammingDistance : public StringSimilarityMeasure
00109 {
00110 public:
00111 HammingDistance() {}
00112
00114
00119 double measure(const Word& w1, const Word& w2) const;
00120 };
00121
00123 class HammingDistanceCyclic : public StringSimilarityMeasure
00124 {
00125 public:
00126 HammingDistanceCyclic() {}
00128
00133 double measure(const Word& w1, const Word& w2) const;
00134 };
00135
00137
00141 class SubwordHammingDistanceCyclic : public StringSimilarityMeasure
00142 {
00143 public:
00144 SubwordHammingDistanceCyclic() {}
00146
00151 double measure(const Word& w1, const Word& w2) const;
00152 };
00153
00154
00156
00157
00158
00160
00162 class EditingDistance : public StringSimilarityMeasure
00163 {
00164 public:
00165 EditingDistance() {}
00166
00168
00173 double measure(const Word& w1, const Word& w2) const;
00174 };
00175
00177
00180 class SubwordEditingDistanceCyclic : public StringSimilarityMeasure
00181 {
00182 public:
00183 SubwordEditingDistanceCyclic() {}
00185
00190 double measure(const Word& w1, const Word& w2) const;
00191 };
00192
00193
00194
00195
00197
00198
00199
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210
00212
00213
00214
00216
00217
00218
00219
00220
00221
00222
00223
00224
00225
00226
00227
00228 #endif