par2repairer.h

Go to the documentation of this file.
00001 // This file is part of par2cmdline (a PAR 2.0 compatible file verification and
00002 // repair tool). See https://parchive.sourceforge.net for details of PAR 2.0.
00003 //
00004 // Copyright (c) 2003 Peter Brian Clements
00005 //
00006 // par2cmdline is free software; you can redistribute it and/or modify
00007 // it under the terms of the GNU General Public License as published by
00008 // the Free Software Foundation; either version 2 of the License, or
00009 // (at your option) any later version.
00010 //
00011 // par2cmdline is distributed in the hope that it will be useful,
00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00014 // GNU General Public License for more details.
00015 //
00016 // You should have received a copy of the GNU General Public License
00017 // along with this program; if not, write to the Free Software
00018 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
00019 //
00020 // Modifications for concurrent processing, Unicode support, and hierarchial
00021 // directory support are Copyright (c) 2007-2008 Vincent Tan.
00022 // Search for "#if WANT_CONCURRENT" for concurrent code.
00023 // Concurrent processing utilises Intel Thread Building Blocks 2.0,
00024 // Copyright (c) 2007 Intel Corp.
00025 
00026 #ifndef __PAR2REPAIRER_H__
00027 #define __PAR2REPAIRER_H__
00028 
00029 #include 
00030 #include "parheaders.h"
00031 
00032 #if WANT_CONCURRENT
00033 
00034  #include 
00035 
00036   struct u32_hasher {
00037     static  size_t  hash(u32 i) { return static_castsize_t> (i); }
00038     static  bool  equal( u32 x, u32 y ) { return x == y; }
00039   };
00040 
00041   struct string_hasher {
00042     static  size_t  hash(const std::string& x) {
00043       size_t h = 0;
00044       for (const char* s = x.c_str(); *s; ++s)
00045         h = (h*17)^*s;
00046       return h;
00047     }
00048     static  bool  equal( const std::string& x, const std::string& y ) { return x == y; }
00049   };
00050 
00051   struct istring_hasher {
00052     static  size_t  hash(const std::string& x) {
00053       size_t h = 0;
00054       for (const char* s = x.c_str(); *s; ++s)
00055         h = (h*17)^ tolower(*s);
00056       return h;
00057     }
00058     static  bool  equal( const std::string& x, const std::string& y )
00059     { return x.length() == y.length() && 0 == stricmp(x.c_str(), y.c_str()); }
00060   };
00061 
00062   template typename T>
00063   struct atomic_ptr : tbb::atomic {
00064     // wow - C++ sometimes really is ugly...
00065     T  operator->(void) { return tbb::atomic::operator typename tbb::atomic::value_type(); }
00066     atomic_ptr&  operator=(T t) { tbb::atomic::operator=(t); return *this; }
00067   };
00068 
00069   class ConcurrentDiskFileMap {
00070   public:
00071  #if defined(WIN32) || defined(__APPLE_CC__)
00072     typedef tbb::concurrent_hash_map  map_type;
00073  #else
00074     typedef tbb::concurrent_hash_map  map_type;
00075  #endif
00076     ConcurrentDiskFileMap(void) {}
00077     ~ConcurrentDiskFileMap(void) {
00078       map_type::iterator fi;
00079       for (fi = _diskfilemap.begin(); fi != _diskfilemap.end(); ++fi)
00080         delete (*fi).second;
00081     }
00082 
00083     bool  Insert(DiskFile *diskfile) {
00084       assert(!diskfile->FileName().empty());
00085       map_type::accessor  a;
00086       (bool) _diskfilemap.insert(a, diskfile->FileName());
00087       a->second = diskfile;
00088       return true;
00089     }
00090     void Remove(DiskFile *diskfile) {
00091       assert(!diskfile->FileName().empty());
00092       (bool) _diskfilemap.erase(diskfile->FileName());
00093     }
00094     DiskFile* Find(string filename) const {
00095       assert(!filename.empty());
00096       map_type::const_accessor  a;
00097       return _diskfilemap.find(a, filename) ?  a->second : NULL;
00098     }
00099 
00100   protected:
00101     map_type _diskfilemap;             // Map from filename to DiskFile
00102   };
00103 
00104 #endif
00105 
00106 class Par2Repairer
00107 {
00108 public:
00109   Par2Repairer(void);
00110   ~Par2Repairer(void);
00111 
00112   Result Process(const CommandLine &commandline, bool dorepair);
00113 
00114   sigc::signal sig_filename;
00115   sigc::signal sig_progress;
00116   sigc::signal sig_headers;
00117   sigc::signal sig_done;
00118 
00119 protected:
00120   // Steps in verifying and repairing files:
00121 
00122 #if WANT_CONCURRENT
00123 public:
00124  #if WANT_CONCURRENT_SOURCE_VERIFICATION
00125   void VerifyOneSourceFile(Par2RepairerSourceFile *sourcefile, bool& finalresult);
00126  #endif
00127   void ProcessDataForOutputIndex(u32 outputstartindex, u32 outputendindex, size_t blocklength, u32 inputindex);
00128 #endif
00129   // Load packets from the specified file
00130   bool LoadPacketsFromFile(string filename);
00131 #if WANT_CONCURRENT
00132 protected:
00133 #endif
00134   // Finish loading a recovery packet
00135   bool LoadRecoveryPacket(DiskFile *diskfile, u64 offset, PACKET_HEADER &header);
00136   // Finish loading a file description packet
00137   bool LoadDescriptionPacket(DiskFile *diskfile, u64 offset, PACKET_HEADER &header);
00138   // Finish loading a file verification packet
00139   bool LoadVerificationPacket(DiskFile *diskfile, u64 offset, PACKET_HEADER &header);
00140   // Finish loading the main packet
00141   bool LoadMainPacket(DiskFile *diskfile, u64 offset, PACKET_HEADER &header);
00142   // Finish loading the creator packet
00143   bool LoadCreatorPacket(DiskFile *diskfile, u64 offset, PACKET_HEADER &header);
00144 
00145   // Load packets from other PAR2 files with names based on the original PAR2 file
00146   bool LoadPacketsFromOtherFiles(string filename);
00147 
00148   // Load packets from any other PAR2 files whose names are given on the command line
00149   bool LoadPacketsFromExtraFiles(const list<:extrafile> &extrafiles);
00150 
00151   // Check that the packets are consistent and discard any that are not
00152   bool CheckPacketConsistency(void);
00153 
00154   // Use the information in the main packet to get the source files
00155   // into the correct order and determine their filenames
00156   bool CreateSourceFileList(void);
00157 
00158   // Determine the total number of DataBlocks for the recoverable source files
00159   // The allocate the DataBlocks and assign them to each source file
00160   bool AllocateSourceBlocks(void);
00161 
00162   // Create a verification hash table for all files for which we have not
00163   // found a complete version of the file and for which we have
00164   // a verification packet
00165   bool PrepareVerificationHashTable(void);
00166 
00167   // Compute the table for the sliding CRC computation
00168   bool ComputeWindowTable(void);
00169 
00170   // Attempt to verify all of the source files
00171   bool VerifySourceFiles(void);
00172 
00173   // Scan any extra files specified on the command line
00174   bool VerifyExtraFiles(const list<:extrafile> &extrafiles);
00175 
00176   // Attempt to match the data in the DiskFile with the source file
00177   bool VerifyDataFile(DiskFile *diskfile, Par2RepairerSourceFile *sourcefile);
00178 
00179   // Perform a sliding window scan of the DiskFile looking for blocks of data that 
00180   // might belong to any of the source files (for which a verification packet was
00181   // available). If a block of data might be from more than one source file, prefer
00182   // the one specified by the "sourcefile" parameter. If the first data block
00183   // found is for a different source file then "sourcefile" is changed accordingly.
00184   bool ScanDataFile(DiskFile                *diskfile,   // [in] The file being scanned
00185                     Par2RepairerSourceFile* &sourcefile, // [in/out] The source file matched
00186                     MatchType               &matchtype,  // [out] The type of match
00187                     MD5Hash                 &hashfull,   // [out] The full hash of the file
00188                     MD5Hash                 &hash16k,    // [out] The hash of the first 16k
00189                     u32                     &count);     // [out] The number of blocks found
00190 
00191   // Find out how much data we have found
00192   void UpdateVerificationResults(void);
00193 
00194   // Check the verification results and report the results 
00195   bool CheckVerificationResults(void);
00196 
00197   // Rename any damaged or missnamed target files.
00198   bool RenameTargetFiles(void);
00199 
00200   // Work out which files are being repaired, create them, and allocate
00201   // target DataBlocks to them, and remember them for later verification.
00202   bool CreateTargetFiles(void);
00203 
00204   // Work out which data blocks are available, which need to be copied
00205   // directly to the output, and which need to be recreated, and compute
00206   // the appropriate Reed Solomon matrix.
00207   bool ComputeRSmatrix(void);
00208 
00209   // Allocate memory buffers for reading and writing data to disk.
00210   bool AllocateBuffers(size_t memorylimit);
00211 
00212   // Read source data, process it through the RS matrix and write it to disk.
00213   bool ProcessData(u64 blockoffset, size_t blocklength);
00214 
00215   // Verify that all of the reconstructed target files are now correct
00216   bool VerifyTargetFiles(void);
00217 
00218   // Delete all of the partly reconstructed files
00219   bool DeleteIncompleteTargetFiles(void);
00220 
00221 protected:
00222   ParHeaders*               headers;                 // Headers
00223   CommandLine::NoiseLevel   noiselevel;              // OnScreen display
00224 
00225   string                    searchpath;              // Where to find files on disk
00226 
00227   bool                      firstpacket;             // Whether or not a valid packet has been found.
00228   MD5Hash                   setid;                   // The SetId extracted from the first packet.
00229 #if WANT_CONCURRENT
00230   tbb::concurrent_hash_map recoverypacketmap;       // One recovery packet for each exponent value.
00231   ::atomic_ptr    mainpacket;           // One copy of the main packet.
00232   ::atomic_ptr creatorpacket;        // One copy of the creator packet.
00233 
00234   ConcurrentDiskFileMap     diskFileMap;
00235 #else
00236   map recoverypacketmap;       // One recovery packet for each exponent value.
00237   MainPacket               *mainpacket;              // One copy of the main packet.
00238   CreatorPacket            *creatorpacket;           // One copy of the creator packet.
00239 
00240   DiskFileMap               diskFileMap;
00241 #endif
00242 
00243   map sourcefilemap;// Map from FileId to SourceFile
00244   vector      sourcefiles;  // The source files
00245   vector      verifylist;   // Those source files that are being repaired
00246 
00247   u64                       blocksize;               // The block size.
00248   u64                       chunksize;               // How much of a block can be processed.
00249   u32                       sourceblockcount;        // The total number of blocks
00250   u32                       availableblockcount;     // How many undamaged blocks have been found
00251   u32                       missingblockcount;       // How many blocks are missing
00252 
00253   bool                      blocksallocated;         // Whether or not the DataBlocks have been allocated
00254   vector         sourceblocks;            // The DataBlocks that will be read from disk
00255   vector         targetblocks;            // The DataBlocks that will be written to disk
00256 
00257   u32                       windowtable[256];        // Table for sliding CRCs
00258   u32                       windowmask;              // Maks for sliding CRCs
00259 
00260   bool                            blockverifiable;         // Whether and files can be verified at the block level
00261   VerificationHashTable           verificationhashtable;   // Hash table for block verification
00262   list   unverifiablesourcefiles; // Files that are not block verifiable
00263 
00264   u32                       completefilecount;       // How many files are fully verified
00265   u32                       renamedfilecount;        // How many files are verified but have the wrong name
00266   u32                       damagedfilecount;        // How many files exist but are damaged
00267   u32                       missingfilecount;        // How many files are completely missing
00268 
00269   vector        inputblocks;             // Which DataBlocks will be read from disk
00270   vector        copyblocks;              // Which DataBlocks will copied back to disk
00271   vector        outputblocks;            // Which DataBlocks have to calculated using RS
00272 
00273   ReedSolomon     rs;                      // The Reed Solomon matrix.
00274 
00275   void                     *inputbuffer;             // Buffer for reading DataBlocks (chunksize)
00276   void                     *outputbuffer;            // Buffer for writing DataBlocks (chunksize * missingblockcount)
00277 
00278 #if WANT_CONCURRENT
00279   tbb::atomic          progress;                // How much data has been processed.
00280 #else
00281   u64                       progress;                // How much data has been processed.
00282 #endif
00283   u64                       totaldata;               // Total amount of data to be processed.
00284   u64                       totalsize;               // Total data size
00285 
00286 #if WANT_CONCURRENT
00287   unsigned                  concurrent_processing_level;
00288   tbb::mutex                cout_mutex;
00289   tbb::atomic          cout_in_use;             // when repairing, this is used to display % done w/o blocking a thread
00290   tbb::tick_count           last_cout;   // when cout was used for output
00291 #endif
00292 };
00293 
00294 #endif // __PAR2REPAIRER_H__

Generated on Sun Oct 12 01:45:30 2008 for NNTPGrab by  1.5.4