If you're doing prototyping, it might be helpful to use an interface that allows you to read a bit at a time. C++ makes this easy, but it's not necessarily the most efficient way of doing it. However, for testing purposes, it's probably adequate.
Here's an example that maps a given file into memory on MS Windows and allows one to flip bits one by one:
cpp
// bitvector_impl_win.hpp
#ifndef __BITVECTOR_IMPL_WIN_HPP__
#define __BITVECTOR_IMPL_WIN_HPP__
#include <windows.h>
#include <stdexcept>
#define MMAP_PAGE_SIZE (1 << 30)
namespace util {
class mmbitset_impl
{
public:
typedef mmbitset_impl self_type;
typedef unsigned long long size_type;
mmbitset_impl(char* filename, size_type len);
~mmbitset_impl();
bool isset(size_type bit);
void set(size_type bit, bool value);
size_type size() { return size_; }
protected:
void create_mapping(char* filename);
void destroy_mapping();
size_type correct_mapping(size_type bit);
void guard_bit_operation(size_type bit) { if(bit >= size_) throw std::range_error("bit >= size_"); }
private:
size_type size_;
size_type bytes_;
HANDLE fh_;
HANDLE mh_;
void* memmap_;
size_type view_start_;
size_type view_end_;
};
mmbitset_impl::mmbitset_impl(char* filename, mmbitset_impl::size_type len)
: size_(len), bytes_(len >> 3), fh_(NULL), mh_(NULL), memmap_(NULL),
view_start_(0), view_end_(0)
{
create_mapping(filename);
}
mmbitset_impl::~mmbitset_impl() { destroy_mapping(); }
void mmbitset_impl::create_mapping(char* filename)
{
fh_ = CreateFileA(filename, GENERIC_READ | GENERIC_WRITE,
FILE_SHARE_READ | FILE_SHARE_WRITE, NULL,
OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
if(fh_ == INVALID_HANDLE_VALUE)
throw std::runtime_error("Error opening file");
mh_ = CreateFileMapping(fh_, NULL, PAGE_READWRITE, bytes_ >> 32, bytes_ & 0xFFFFFFFF, NULL);
if(mh_ == NULL) {
destroy_mapping();
throw std::runtime_error("Error creating file mapping.");
}
}
void mmbitset_impl::destroy_mapping()
{
if(NULL != mh_) {
if(NULL != memmap_) {
UnmapViewOfFile(memmap_);
}
CloseHandle(mh_);
}
if(INVALID_HANDLE_VALUE != fh_)
CloseHandle(fh_);
}
mmbitset_impl::size_type mmbitset_impl::correct_mapping(mmbitset_impl::size_type bit)
{
if(NULL != memmap_) {
if(bit < view_start_ || bit >= view_end_) {
UnmapViewOfFile(memmap_);
memmap_ = NULL;
}
}
if(NULL == memmap_) {
// Find page lower bound.
view_start_ = bit & ~(MMAP_PAGE_SIZE - 1);
view_end_ = std::min(view_start_ + MMAP_PAGE_SIZE, size_);
memmap_ = MapViewOfFile(mh_, FILE_MAP_ALL_ACCESS, view_start_ >> 35, (view_start_ >> 3) & 0xFFFFFFFF,
((view_end_ - view_start_) >> 3) & 0xFFFFFFFF);
if(memmap_ == NULL)
throw std::runtime_error("Failed to create view of memory mapped file.");
}
return bit & (MMAP_PAGE_SIZE - 1);
}
void mmbitset_impl::set(mmbitset_impl::size_type bit, bool value)
{
guard_bit_operation(bit);
mmbitset_impl::size_type sbit = correct_mapping(bit);
unsigned char* pbyte = reinterpret_cast<unsigned char*>(memmap_) + (sbit >> 3);
if(value)
*pbyte |= (1 << (sbit & 0x7));
else
*pbyte &= ~(1 << (sbit & 0x7));
}
bool mmbitset_impl::isset(mmbitset_impl::size_type bit)
{
guard_bit_operation(bit);
mmbitset_impl::size_type sbit = correct_mapping(bit);
unsigned char* pbyte = reinterpret_cast<unsigned char*>(memmap_) + (sbit >> 3);
return (*pbyte & (1 << (sbit & 0x7)));
}
}
#endif
// bitvector.hpp
#ifndef __BITVECTOR_HPP__
#define __BITVECTOR_HPP__
namespace util {
class mmbitset_impl;
class mmbitset
{
public:
typedef mmbitset self_type;
typedef unsigned long long size_type;
mmbitset(char* filename, size_type len);
~mmbitset();
bool isset(size_type bit);
void set(size_type bit);
void reset(size_type bit);
void set(size_type bit, bool value);
size_type size();
private:
mmbitset_impl* pimpl_;
};
}
#endif
// bitvector.cpp
#include "bitvector.hpp"
#include "bitvector_impl_win.hpp"
namespace util {
mmbitset::mmbitset(char* filename, size_type len)
: pimpl_(new mmbitset_impl(filename, len))
{
}
mmbitset::~mmbitset()
{
delete pimpl_;
}
bool mmbitset::isset(size_type bit) { return pimpl_->isset(bit); }
void mmbitset::set(size_type bit) { pimpl_->set(bit, true); }
void mmbitset::reset(size_type bit) { pimpl_->set(bit, false); }
void mmbitset::set(size_type bit, bool value) { pimpl_->set(bit, value); }
mmbitset::size_type mmbitset::size() { return pimpl_->size(); }
}
Here is a piece that actually uses it:
cpp
#include <iostream>
#include <stdexcept>
#include "bitvector.hpp"
int doit()
{
using namespace util;
mmbitset bs("g:\\hugeset.bin", 0x8000000ull);
mmbitset::size_type n = 0;
for(mmbitset::size_type x = 2; x != bs.size(); x++) {
if(!bs.isset(x)) n++;
}
std::cout << n << " primes found." << std::endl;
return 0;
}
int main()
{
try {
return doit();
}
catch(std::exception& e) {
std::cout << "Exception caught: " << e.what() << std::endl;
}
return 1;
}
That basically reads the first 16MB of the hugeset.bin (which is 512MB total) and counts every bit in the file that is a 1. Each 1 bit indicates a prime number in a sieve of eratosthenes, so the 23rd bit in the file will be 1 as the number 23 is prime. So, the result is the number of primes below 134,217,728.
Here is the time it took:
$ time ./countprimes.exe
7603553 primes found.
real 0m21.718s
user 0m0.031s
sys 0m0.000s
Using mmbitset_impl directly will shave a second or two off this timing.
Of course, for your functionality, you'd need to make a wrapper that cares about how large a file already is (wasn't necessary for my goals).