verificationhashtable.h

Go to the documentation of this file.
00001 // This file is part of par2cmdline (a PAR 2.0 compatible file verification and
00002 // repair tool). See https://parchive.sourceforge.net for details of PAR 2.0.
00003 //
00004 // Copyright (c) 2003 Peter Brian Clements
00005 //
00006 // par2cmdline is free software; you can redistribute it and/or modify
00007 // it under the terms of the GNU General Public License as published by
00008 // the Free Software Foundation; either version 2 of the License, or
00009 // (at your option) any later version.
00010 //
00011 // par2cmdline is distributed in the hope that it will be useful,
00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00014 // GNU General Public License for more details.
00015 //
00016 // You should have received a copy of the GNU General Public License
00017 // along with this program; if not, write to the Free Software
00018 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
00019 
00020 #ifndef __VERIFICATIONHASHTABLE_H__
00021 #define __VERIFICATIONHASHTABLE_H__
00022 
00023 class Par2RepairerSourceFile;
00024 class VerificationHashTable;
00025 
00026 // The VerificationHashEntry objects form the nodes of a binary trees stored
00027 // in a VerificationHashTable object.
00028 
00029 // There is one VerificationHashEntry object for each data block in the original
00030 // source files.
00031 
00032 class VerificationHashEntry
00033 {
00034 public:
00035   VerificationHashEntry(Par2RepairerSourceFile *_sourcefile,
00036                         DataBlock *_datablock,
00037                         bool _firstblock,
00038                         const FILEVERIFICATIONENTRY *_verificationentry)
00039   {
00040     sourcefile = _sourcefile;
00041     datablock = _datablock;
00042     firstblock = _firstblock;
00043 
00044     crc = _verificationentry->crc;
00045     hash = _verificationentry->hash;
00046 
00047     left = right = same = next = 0;
00048   }
00049 
00050   ~VerificationHashEntry(void)
00051   {
00052     delete left;
00053     delete right;
00054     delete same;
00055   }
00056 
00057   // Insert the current object is a child of the specified parent
00058   void Insert(VerificationHashEntry **parent);
00059 
00060   // Search (starting at the specified parent) for an object with a matching crc
00061   static const VerificationHashEntry* Search(const VerificationHashEntry *entry, u32 crc);
00062 
00063   // Search (starting at the specified parent) for an object with a matching hash
00064   static const VerificationHashEntry* Search(const VerificationHashEntry *entry, const MD5Hash &hash);
00065 
00066   // Comparison operators for searching
00067   bool operator const VerificationHashEntry &r) const 
00068  {
00069     return crc 00070   }
00071   bool operator >(const VerificationHashEntry &r) const 
00072  {
00073     return crc > r.crc || crc == r.crc && hash > r.hash;
00074   }
00075   bool operator ==(const VerificationHashEntry &r) const 
00076  {
00077     return crc == r.crc && hash == r.hash;
00078   }
00079   bool operator const VerificationHashEntry &r) const {return !operator>(r);}
00080   bool operator >=(const VerificationHashEntry &r) const {return !operator00081   bool operator !=(const VerificationHashEntry &r) const {return !operator==(r);}
00082 
00083   // Data
00084   Par2RepairerSourceFile* SourceFile(void) const {return sourcefile;}
00085   const DataBlock* GetDataBlock(void) const {return datablock;}
00086   bool FirstBlock(void) const {return firstblock;}
00087   
00088   // Set/Check the associated datablock
00089   void SetBlock(DiskFile *diskfile, u64 offset) const;
00090   bool IsSet(void) const;
00091 
00092   u32 Checksum(void) const {return crc;}
00093   const MD5Hash& Hash(void) const {return hash;}
00094 
00095   VerificationHashEntry* Same(void) const {return same;}
00096   VerificationHashEntry* Next(void) const {return next;}
00097   void Next(VerificationHashEntry *_next) {next = _next;}
00098 
00099 protected:
00100   // Data
00101   Par2RepairerSourceFile       *sourcefile;
00102   DataBlock                    *datablock;
00103   bool                          firstblock;
00104 
00105   u32                           crc;
00106   MD5Hash                       hash;
00107 
00108 protected:
00109   // Binary tree
00110   VerificationHashEntry *left;
00111   VerificationHashEntry *right;
00112 
00113   // Linked list of entries with the same crc and hash
00114   VerificationHashEntry *same;
00115 
00116   // Linked list of entries in sequence for same file
00117   VerificationHashEntry *next;
00118 };
00119 
00120 inline void VerificationHashEntry::SetBlock(DiskFile *diskfile, u64 offset) const
00121 {
00122   datablock->SetLocation(diskfile, offset);
00123 }
00124 
00125 inline bool VerificationHashEntry::IsSet(void) const
00126 {
00127   return datablock->IsSet();
00128 }
00129 
00130 // Insert a new entry in the tree
00131 inline void VerificationHashEntry::Insert(VerificationHashEntry **parent)
00132 {
00133   while (*parent)
00134   {
00135     if (**parent this)
00136     {
00137       parent = &(*parent)->right;
00138     }
00139     else if (**parent > *this)
00140     {
00141       parent = &(*parent)->left;
00142     }
00143     else
00144     {
00145       break;
00146     }
00147   }
00148 
00149   while (*parent)
00150   {
00151     parent = &(*parent)->same;
00152   }
00153 
00154   *parent = this;
00155 }
00156 
00157 // Search the tree for an entry with the correct crc
00158 inline const VerificationHashEntry* VerificationHashEntry::Search(const VerificationHashEntry *entry, u32 crc)
00159 {
00160   while (entry)
00161   {
00162     if (entry->crc 00163     {
00164       entry = entry->right;
00165     }
00166     else if (entry->crc > crc)
00167     {
00168       entry = entry->left;
00169     }
00170     else
00171     {
00172       break;
00173     }
00174   }
00175 
00176   return entry;
00177 }
00178 
00179 // Search the tree for an entry with the correct hash
00180 inline const VerificationHashEntry* VerificationHashEntry::Search(const VerificationHashEntry *entry, const MD5Hash &hash)
00181 {
00182   u32 crc = entry->crc;
00183 
00184   while (entry)
00185   {
00186     if (entry->crc crc == crc && entry->hash 00187     {
00188       entry = entry->right;
00189     }
00190     else if (entry->crc > crc || entry->crc == crc && entry->hash > hash)
00191     {
00192       entry = entry->left;
00193     }
00194     else
00195     {
00196       break;
00197     }
00198   }
00199 
00200   return entry;
00201 }
00202 
00203 // The VerificationHashTable object contains all of the VerificationHashEntry objects
00204 // and is used to find matches for blocks of data in a target file that is being
00205 // scanned.
00206 
00207 // It is initialised by loading data from all available verification packets for the
00208 // source files.
00209 
00210 class VerificationHashTable
00211 {
00212 public:
00213   VerificationHashTable(void);
00214   ~VerificationHashTable(void);
00215 
00216   void SetLimit(u32 limit);
00217 
00218   // Load the data from the verification packet
00219   void Load(Par2RepairerSourceFile *sourcefile, u64 blocksize);
00220 
00221   // Try to find a match.
00222   // nextentry - The entry which we expect to find next. This is used
00223   // when a sequence of matches are found.
00224   // sourcefile - Which source file we would prefer to find a match for
00225   // if there are more than one possible match (with the
00226   // same crc and hash).
00227   // checksummer - Provides the crc and hash values being tested.
00228   // duplicate - Set on return if the match would have been valid except
00229   // for the fact that the block has already been found.
00230   const VerificationHashEntry* FindMatch(const VerificationHashEntry *nextentry,
00231                                          const Par2RepairerSourceFile *sourcefile,
00232                                          FileCheckSummer &checksummer,
00233                                          bool &duplicate) const;
00234 
00235   // Look up based on the block crc
00236   const VerificationHashEntry* Lookup(u32 crc) const;
00237 
00238   // Continue lookup based on the block hash
00239   const VerificationHashEntry* Lookup(const VerificationHashEntry *entry,
00240                                       const MD5Hash &hash) const;
00241 
00242 protected:
00243   VerificationHashEntry **hashtable;
00244   unsigned int hashmask;
00245 };
00246 
00247 // Search for an entry with the specified crc
00248 inline const VerificationHashEntry* VerificationHashTable::Lookup(u32 crc) const
00249 {
00250   if (hashmask)
00251   {
00252     return VerificationHashEntry::Search(hashtable[crc & hashmask], crc);
00253   }
00254 
00255   return 0;
00256 }
00257 
00258 // Search for an entry with the specified hash
00259 inline const VerificationHashEntry* VerificationHashTable::Lookup(const VerificationHashEntry *entry,
00260                                                                   const MD5Hash &hash) const
00261 {
00262   return VerificationHashEntry::Search(entry, hash);
00263 }
00264 
00265 inline const VerificationHashEntry* VerificationHashTable::FindMatch(const VerificationHashEntry *suggestedentry,
00266                                                                      const Par2RepairerSourceFile *sourcefile,
00267                                                                      FileCheckSummer &checksummer,
00268                                                                      bool &duplicate) const
00269 {
00270   duplicate = false;
00271 
00272   // Get the current checksum from the checksummer
00273   u32 crc = checksummer.Checksum();
00274 
00275   MD5Hash hash;
00276   bool havehash = false;
00277 
00278   // Do we know what the next entry should be
00279   if (0 != suggestedentry)
00280   {
00281     // Is the suggested match supposed to be the last one in the file
00282     if (suggestedentry->Next() == 0)
00283     {
00284       // How long should the last block be
00285       u64 length = suggestedentry->GetDataBlock()->GetLength();
00286 
00287       // Get a short checksum from the checksummer
00288       u32 checksum = checksummer.ShortChecksum(length);
00289 
00290       // Is the checksum correct
00291       if (checksum == suggestedentry->Checksum())
00292       {
00293         // Get a short hash from the checksummer
00294         hash = checksummer.ShortHash(length);
00295 
00296         // If the hash matches as well, then return it
00297         if (hash == suggestedentry->Hash())
00298         {
00299           return suggestedentry;
00300         }
00301       }
00302     }
00303     // If the suggested entry has not already been found, compare the checksum
00304     else if (!suggestedentry->IsSet() && suggestedentry->Checksum() == crc)
00305     {
00306       // Get the hash value from the checksummer
00307       havehash = true;
00308       hash = checksummer.Hash();
00309 
00310       // If the hash value matches, then return it.
00311       if (hash == suggestedentry->Hash())
00312       {
00313         return suggestedentry;
00314       }
00315     }
00316   }
00317 
00318   // Look for other possible matches for the checksum
00319   const VerificationHashEntry *nextentry = VerificationHashEntry::Search(hashtable[crc & hashmask], crc);
00320   if (0 == nextentry)
00321     return 0;
00322 
00323   // If we don't have the hash yet, get it
00324   if (!havehash)
00325   {
00326     hash = checksummer.Hash();
00327   }
00328 
00329   // Look for an entry with a matching hash
00330   nextentry = VerificationHashEntry::Search(nextentry, hash);
00331   if (0 == nextentry)
00332     return 0;
00333 
00334   // Is there one match with the same checksum and hash, or many
00335   if (nextentry->Same() == 0)
00336   {
00337     // If the match is for a block that is part of a target file
00338     // for which we already have a complete match, then don't
00339     // return it.
00340     if (nextentry->SourceFile()->GetCompleteFile() != 0)
00341     {
00342       duplicate = true;
00343       return 0;
00344     }
00345 
00346     // If we are close to the end of the file and the block
00347     // length is wrong, don't return it because it is an
00348     // invalid match
00349     if (checksummer.ShortBlock() && checksummer.BlockLength() != nextentry->GetDataBlock()->GetLength())
00350     {
00351       return 0;
00352     }
00353 
00354     // If the match was at the start of the file and it is the first
00355     // block for a target file, then return it.
00356     if (nextentry->FirstBlock() && checksummer.Offset() == 0)
00357     {
00358       return nextentry;
00359     }
00360 
00361     // Was this match actually the one which had originally been suggested
00362     // but which has presumably already been found
00363     if (nextentry == suggestedentry)
00364     {
00365       // Was the existing match in the same file as the new match
00366       if (nextentry->IsSet() && 
00367           nextentry->GetDataBlock()->GetDiskFile() == checksummer.GetDiskFile())
00368       {
00369         // Yes. Don't return it
00370         duplicate = true;
00371         return 0;
00372       }
00373       else
00374       {
00375         // No, it was in a different file. Return it.
00376         // This ensures that we can find a perfect match for a target
00377         // file even if some of the blocks had already been found
00378         // in a different file.
00379         return nextentry;
00380       }
00381     }
00382     else
00383     {
00384       // return it only if it has not already been used
00385       if (nextentry->IsSet())
00386       {
00387         duplicate = true;
00388         return 0;
00389       }
00390 
00391       return nextentry;
00392     }
00393   }
00394 
00395   // Do we prefer to match entries for a particular source file
00396   if (0 != sourcefile)
00397   {
00398     const VerificationHashEntry *currententry = nextentry;
00399     nextentry = 0;
00400 
00401     // We don't want entries for the wrong source file, ones that
00402     // have already been matched, or ones that are the wrong length
00403     while (currententry && (currententry->SourceFile() != sourcefile || 
00404                             currententry->IsSet() ||
00405                             checksummer.ShortBlock() && checksummer.BlockLength() != currententry->GetDataBlock()->GetLength()
00406                            )
00407           )
00408     {
00409       // If we found an unused entry (which was presumably for the wrong 
00410       // source file) remember it (providing it is the correct length).
00411       if (0 == nextentry && !(currententry->IsSet() || 
00412                               checksummer.ShortBlock() && checksummer.BlockLength() != currententry->GetDataBlock()->GetLength()
00413                              )
00414          )
00415       {
00416         nextentry = currententry;
00417       }
00418 
00419       currententry = currententry->Same();
00420     }
00421 
00422     // If we found an unused entry for the source file we want, return it
00423     if (0 != currententry)
00424       return currententry;
00425   }
00426 
00427   // Check for an unused entry which is the correct length
00428   while (nextentry && (nextentry->IsSet() ||
00429                        checksummer.ShortBlock() && checksummer.BlockLength() != nextentry->GetDataBlock()->GetLength()
00430                       )
00431         )
00432   {
00433     nextentry = nextentry->Same();
00434   }
00435 
00436   // Return what we have found
00437   if (nextentry == 0)
00438   {
00439     duplicate = true;
00440   }
00441 
00442   return nextentry;
00443 }
00444 
00445 #endif // __VERIFICATIONHASHTABLE_H__

Generated on Sun Oct 12 01:45:31 2008 for NNTPGrab by  1.5.4