diff --git a/Makefile b/Makefile index deecb01..aa92a49 100644 --- a/Makefile +++ b/Makefile @@ -66,7 +66,7 @@ config.mk: config.mk.in libchecktestdata.o: config.mk libchecktestdata.o: $(PARSER_GEN) -libchecktestdata.o: %.o: %.cc %.hpp databuffer.hpp parser.h +libchecktestdata.o: %.o: %.cc %.hpp databuffer.hpp bigint.hpp parser.h checktestdata: CPPFLAGS += $(BOOST_CPPFLAGS) checktestdata: LDFLAGS += $(BOOST_LDFLAGS) $(STATIC_LINK_START) $(LIBGMPXX) $(STATIC_LINK_END) diff --git a/bigint.hpp b/bigint.hpp new file mode 100644 index 0000000..eb717bf --- /dev/null +++ b/bigint.hpp @@ -0,0 +1,186 @@ +#ifndef BIGINT_HPP +#define BIGINT_HPP + +#if !defined __has_builtin || !__has_builtin(__builtin_smull_overflow) +#error "This code requires the __builtin_*_overflow compiler builtin functions." +#endif + +#include + +/** + * Wrapper around gmp's mpz_class, with a fast path for values that fit in a long. + */ +class bigint { +private: + void assign_from(const mpz_class& x) const { + large.reset(new mpz_class(x)); + small = LONG_MIN; + } + +public: + // Invariant: if 'large' is set, 'small' must be set to LONG_MIN and not used. + // ('small' is allowed to be LONG_MIN if 'large' is null however.) + mutable long small; + mutable std::unique_ptr large; + + bigint(): small(0) {} + bigint(long x): small(x) {} + bigint(const mpz_class& x) { assign_from(x); } + bigint(bigint&& other): small(other.small), large(std::move(other.large)) {} + bigint(const bigint& other): small(other.small) { + if (other.large) { + assign_from(*other.large); + } + } + bigint(const std::string& str) { + bool neg = false; + size_t i = 0; + if (str[0] == '-') { + neg = true; + i = 1; + } + long val = 0; + for (; i < str.size(); i++) { + int dig = str[i] - '0'; + if (__builtin_smull_overflow(val, 10, &val) || + __builtin_saddl_overflow(val, dig, &val)) { + assign_from(mpz_class(str)); + return; + } + } + small = neg ? -val : val; + } + mpz_class to_mpz() const { + if (!large) { + assign_from(mpz_class(small)); + } + return *large; + }; + void shrink() const { + if (large && large->fits_slong_p()) { + small = large->get_si(); + if (small != LONG_MIN) { + large.reset(); + } + } + } + bigint& operator=(const bigint& other) { + small = other.small; + if (other.large) { + assign_from(*other.large); + } else { + large = nullptr; + } + return *this; + } + bigint& operator=(bigint&& other) { + small = other.small; + large = std::move(other.large); + return *this; + } + std::string get_str() const { + return large ? large->get_str() : std::to_string(small); + } + bool fits_ulong_p() const { + return large ? large->fits_ulong_p() : (0 <= small && (unsigned long) small < ULONG_MAX); + } + unsigned long get_ui() const { + return large ? large->get_ui() : (unsigned long) small; + } + long get_si() const { + return large ? large->get_si() : (long) small; + } +}; + +inline bigint operator+(const bigint& a, const bigint& b) { + long res; + if (!a.large && !b.large && !__builtin_saddl_overflow(a.small, b.small, &res)) { + return {res}; + } + return {a.to_mpz() + b.to_mpz()}; +} + +inline bigint operator-(const bigint& a, const bigint& b) { + long res; + if (!a.large && !b.large && !__builtin_ssubl_overflow(a.small, b.small, &res)) { + return {res}; + } + return {a.to_mpz() - b.to_mpz()}; +} + +inline bigint operator-(const bigint& a) { + if (!a.large && a.small != LONG_MIN) { + return {-a.small}; + } + return {-*a.large}; +} + +inline bigint operator*(const bigint& a, const bigint& b) { + long res; + if (!a.large && !b.large && !__builtin_smull_overflow(a.small, b.small, &res)) { + return {res}; + } + return {a.to_mpz() * b.to_mpz()}; +} + +inline bigint operator/(const bigint& a, const bigint& b) { + if (!a.large && !b.large && a.small != LONG_MIN) { + return {a.small / b.small}; + } + return {a.to_mpz() / b.to_mpz()}; +} + +inline bigint operator%(const bigint& a, const bigint& b) { + if (!a.large && !b.large && a.small != LONG_MIN) { + return {a.small % b.small}; + } + return {a.to_mpz() % b.to_mpz()}; +} + +inline bigint& operator%=(bigint& a, const bigint& b) { a = a % b; return a; } + +#define BINOP(op, opeq) \ + inline bigint operator op(const bigint& a, int b) { return a op bigint(b); } \ + inline bigint operator op(int a, const bigint& b) { return bigint(a) op b; } \ + inline mpf_class operator op(const bigint& a, const mpf_class& b) { return a.to_mpz() op b; } \ + inline mpf_class operator op(const mpf_class& a, const bigint& b) { return a op b.to_mpz(); } \ + inline bigint& operator opeq(bigint& a, const bigint& b) { a = a op b; return a; } + +#define RELOP(op) \ + inline bool operator op(const bigint& a, const mpf_class& b) { return a.to_mpz() op b; } \ + inline bool operator op(const mpf_class& a, const bigint& b) { return a op b.to_mpz(); } \ + inline bool operator op(const bigint& a, const bigint& b) { \ + return !a.large && !b.large ? a.small op b.small : a.to_mpz() op b.to_mpz(); \ + } \ + inline bool operator op(const bigint& a, int b) { \ + return !a.large ? a.small op b : a.to_mpz() op b; \ + } \ + inline bool operator op(int a, const bigint& b) { \ + return !b.large ? a op b.small : a op b.to_mpz(); \ + } + + +BINOP(+, +=) +BINOP(-, -=) +BINOP(*, *=) +BINOP(/, /=) + +RELOP(<) +RELOP(>) +RELOP(<=) +RELOP(>=) +RELOP(==) +RELOP(!=) + +#undef BINOP +#undef RELOP + +inline std::ostream& operator<<(std::ostream& os, const bigint& x) { + if (x.large) { + return os << *x.large; + } else { + return os << x.small; + } +} + +#endif /* BIGINT_HPP */ diff --git a/checktestdata.cc b/checktestdata.cc index 91fea2f..9708352 100644 --- a/checktestdata.cc +++ b/checktestdata.cc @@ -148,7 +148,7 @@ int main(int argc, char **argv) fstream fdata; if ( argc>optind+1 ) { char *datafile = argv[optind+1]; - ios_base::openmode mode = generate ? ios_base::out|ios_base::trunc : ios_base::in; + ios_base::openmode mode = generate ? ios_base::out|ios_base::trunc|ios_base::binary : ios_base::in|ios_base::binary; fdata.open(datafile, mode); if ( fdata.fail() ) { cerr << "Error opening '" << datafile << "'.\n"; diff --git a/databuffer.hpp b/databuffer.hpp index 4b8fdb6..fa6fff9 100644 --- a/databuffer.hpp +++ b/databuffer.hpp @@ -16,11 +16,12 @@ int isspace_notnewline(char c) { return isspace(c) && c!='\n'; } class databuffer { private: std::string data; - size_t _pos, _line, _lpos; + size_t _pos = 0, _line = 0, _lpos = 0; public: databuffer() {} - databuffer(const std::string &_data): data(_data), _pos(0), _line(0), _lpos(0) {} + databuffer(const std::string &_data): data(_data) {} + databuffer(std::string&& _data): data(std::move(_data)) {} bool eof() const { return _pos >= data.size(); } diff --git a/libchecktestdata.cc b/libchecktestdata.cc index 0f6e787..ac06278 100644 --- a/libchecktestdata.cc +++ b/libchecktestdata.cc @@ -31,6 +31,7 @@ #include "parser.h" #include "libchecktestdata.hpp" #include "databuffer.hpp" +#include "bigint.hpp" using namespace std; @@ -62,8 +63,8 @@ vector program; // This stores array-type variables like x[i,j] as string "x" and // vector of the indices. Plain variables are stored using an index // vector of zero length. -typedef map,value_t> indexmap; -typedef map>> valuemap; +typedef map,value_t> indexmap; +typedef map>> valuemap; map variable, preset; map rev_variable, rev_preset; @@ -151,14 +152,22 @@ void readtestdata(istream &in) { debug("reading testdata..."); - stringstream ss; - ss << in.rdbuf(); - if ( in.fail() ) { - cerr << "error reading testdata" << endl; - exit(exit_failure); + in.seekg(0, std::ios::end); + auto size = in.tellg(); + if (size != -1) { + std::string buffer(size, '\0'); + in.seekg(0); + if (size != 0) { + in.read(&buffer[0], size); + } + if (!in.fail()) { + data = databuffer(std::move(buffer)); + return; + } } - data = databuffer(ss.str()); + cerr << "error reading testdata" << endl; + exit(exit_failure); } void error(const string &msg = "") @@ -197,15 +206,17 @@ long string2int(const string &s) return res; } -value_t eval(const expr&); // forward declaration +// forward declarations +value_t eval(const expr&); +bigint eval_as_int(const expr& e); value_t getvar(const expr& var, int use_preset = 0) { - // Construct index array. The cast to mpz_class automatically - // verifies that the index value is of type mpz_class. - vector ind; + // Construct index array. The cast to bigint automatically + // verifies that the index value is of type bigint. + vector ind; for(size_t i=0; i ind; + // Construct index array. The cast to bigint automatically + // verifies that the index value is of type bigint. + vector ind; for(size_t i=0; i *varlist = &variable; @@ -283,7 +294,12 @@ value_t value(const expr& x) mpz_class intval; mpf_class fltval; - if ( intval.set_str(x.val,0)==0 ) return x.cache = value_t(intval); + if ( intval.set_str(x.val,0)==0 ) { + bigint c = bigint(intval); + c.shrink(); + x.cached_long = c.small; + return x.cache = value_t(c); + } else if ( fltval.set_str(x.val,0)==0 ) { // Set sufficient precision: if ( fltval.get_prec()<4*x.val.length() ) { @@ -299,15 +315,15 @@ value_t value(const expr& x) template struct arith_result { typedef typename conditional< - is_same::value && is_same::value, - mpz_class, + is_same::value && is_same::value, + bigint, mpf_class >::type type; }; template struct arith_compatible { - constexpr static bool value = (is_same::value || is_same::value) && - (is_same::value || is_same::value); + constexpr static bool value = (is_same::value || is_same::value) && + (is_same::value || is_same::value); }; template struct is_comparable { @@ -368,13 +384,13 @@ DECL_VALUE_CMPOP(!=,ne) value_t operator -(const value_t &x) { - return value_t(mpz_class(0)) - x; + return value_t(bigint(0)) - x; } value_t operator %(const value_t &x, const value_t &y) { - const mpz_class *xp, *yp; - if ( (xp = boost::get(&x.val)) && (yp = boost::get(&y.val))) { + const bigint *xp, *yp; + if ( (xp = boost::get(&x.val)) && (yp = boost::get(&y.val))) { auto res = *xp; res %= *yp; return value_t(res); @@ -385,31 +401,34 @@ value_t operator %(const value_t &x, const value_t &y) struct pow_visitor : public boost::static_visitor { template - value_t operator()(const B& b, const E& e) const { + value_t operator()(const B&, const E&) const { cerr << "only integer exponents allowed in " << program[prognr] << endl; exit(exit_failure); } template - value_t operator()(const B& b, const mpz_class& e) const { + value_t operator()(const B& b, const bigint& e) const { if(!e.fits_ulong_p()) { cerr << "integer exponent " << e << " does not fit in unsigned long in " << program[prognr] << endl; exit(exit_failure); } - return pow(b, e); + unsigned long f = e.get_ui(); + return pow(b, f); } - value_t pow(const mpz_class& b, const mpz_class& e) const { + value_t pow(const bigint& b, unsigned long e) const { mpz_class res; - mpz_pow_ui(res.get_mpz_t(), b.get_mpz_t(), e.get_ui()); - return value_t(res); + mpz_pow_ui(res.get_mpz_t(), b.to_mpz().get_mpz_t(), e); + bigint res2(res); + res2.shrink(); + return value_t(res2); } - value_t pow(const mpf_class& b, const mpz_class& e) const { + value_t pow(const mpf_class& b, unsigned long e) const { mpf_class res; - mpf_pow_ui(res.get_mpf_t(), b.get_mpf_t(), e.get_ui()); + mpf_pow_ui(res.get_mpf_t(), b.get_mpf_t(), e); return value_t(res); } template - value_t pow(const B&, const mpz_class&) const { + value_t pow(const B&, unsigned long) const { cerr << "exponentiation base must be of arithmetic type in " << program[prognr] << endl; exit(exit_failure); @@ -426,7 +445,7 @@ value_t evalfun(args_t funargs) string fun = funargs[0].val; if ( fun=="STRLEN" ) { string str = eval(funargs[1]).getstr(); - return value_t(mpz_class(str.length())); + return value_t(bigint(str.length())); } cerr << "unknown function '" << fun << "' in " @@ -486,10 +505,24 @@ value_t eval(const expr& e) << program[prognr] << endl; exit(exit_failure); } - if ( cachable(e) ) e.cache = res; + if ( cachable(e) ) { + e.cache = res; + if ( res.val.which()==value_int ) { + bigint x = res; + e.cached_long = x.small; + } + } return res; } +bigint eval_as_int(const expr& e) +{ + if ( e.cached_long != LONG_MIN ) { + return bigint(e.cached_long); + } + return eval(e); +} + bool compare(const expr& cmp) { string op = cmp.val; @@ -537,7 +570,7 @@ bool unique(const args_t& varlist) vector,const indexmap::key_type*>> tuples; for(indexmap::iterator it=vars[0]->begin(); it!=vars[0]->end(); ++it) { - const vector &index = it->first; + const vector &index = it->first; vector tuple; for(size_t i=0; ifind(index); @@ -754,17 +787,17 @@ string genregex(const string &exp) possible.insert(exp[i]); } } - vector possibleVec; + vector possible_vec; if ( inverted ) { for (char c = ' '; c <= '~'; c++) { - if ( !possible.count(c) ) possibleVec.push_back(c); + if ( !possible.count(c) ) possible_vec.push_back(c); } } else { - copy(possible.begin(), possible.end(), std::back_inserter(possibleVec)); + copy(possible.begin(), possible.end(), std::back_inserter(possible_vec)); } int mult = getmult(exp, i); for (int cnt = 0; cnt < mult; cnt++) { - res += possibleVec[get_random(possibleVec.size())]; + res += possible_vec[get_random(possible.size())]; } } break; @@ -820,7 +853,7 @@ void getdecrange(const command& cmd, int *decrange) if ( arg.val.which()!=value_int ) { error((i==0 ? "min":"max")+string("decimal is not an integer")); } - mpz_class val = arg; + bigint val = arg; if ( val<0 || val>=INT_MAX ) { error(string("the value of ")+(i==0 ? "min":"max")+"decimal is out of range"); } @@ -839,9 +872,9 @@ void gentoken(command cmd, ostream &datastream) else if ( cmd.name()=="NEWLINE" ) datastream << '\n'; else if ( cmd.name()=="INT" ) { - mpz_class lo = eval(cmd.args[0]); - mpz_class hi = eval(cmd.args[1]); - mpz_class x(lo + gmp_rnd.get_z_range(hi - lo + 1)); + bigint lo = eval(cmd.args[0]); + bigint hi = eval(cmd.args[1]); + bigint x(lo.to_mpz() + gmp_rnd.get_z_range((hi - lo + 1).to_mpz())); if ( cmd.nargs()>=3 ) { // Check if we have a preset value, then override the @@ -950,12 +983,16 @@ void checktoken(const command& cmd) // Accepts format (0|-?[1-9][0-9]*), i.e. no leading zero's // and no '-0' accepted. string num; - while ( isdigit(data.peek()) || (num.empty() && data.peek()=='-') ) { + if ( data.peek()=='-' ) { + data.readchar(); + num += '-'; + } + while ( isdigit(data.peek()) ) { num += data.readchar(); } - mpz_class lo = eval(cmd.args[0]); - mpz_class hi = eval(cmd.args[1]); + bigint lo = eval_as_int(cmd.args[0]); + bigint hi = eval_as_int(cmd.args[1]); // debug("%s <= %s <= %s",lo.get_str().c_str(),num.c_str(),hi.get_str().c_str()); if ( cmd.nargs()>=3 ) debug("'%s' = '%s'", @@ -967,7 +1004,7 @@ void checktoken(const command& cmd) if ( num.size()>=1 && num[0]=='-' && (num.size()==1 || num[1]=='0') ) error("invalid minus sign (-0 not allowed)"); - mpz_class x(num); + bigint x(num); if ( xhi ) error("value out of range"); if ( cmd.nargs()>=3 ) setvar(cmd.args[2],value_t(x)); @@ -1136,11 +1173,11 @@ void checktestdata(ostream &datastream) if ( cmd.name()=="REPI" || cmd.name()=="WHILEI" ) { loopvar = 1; - setvar(cmd.args[0],value_t(mpz_class(i))); + setvar(cmd.args[0],value_t(bigint(i))); } if ( cmd.name()=="REP" || cmd.name()=="REPI" ) { - mpz_class n = eval(cmd.args[loopvar]); + bigint n = eval(cmd.args[loopvar]); if ( !n.fits_ulong_p() ) { cerr << "'" << n << "' does not fit in an unsigned long in " << program[prognr] << endl; @@ -1179,7 +1216,7 @@ void checktestdata(ostream &datastream) } checktestdata(datastream); i++; - if ( loopvar ) setvar(cmd.args[0],value_t(mpz_class(i))); + if ( loopvar ) setvar(cmd.args[0],value_t(bigint(i))); } // And skip to end of loop diff --git a/parsetype.cc b/parsetype.cc index 67707f3..36c0e13 100644 --- a/parsetype.cc +++ b/parsetype.cc @@ -60,15 +60,15 @@ std::ostream& operator<<(std::ostream& os, const none_t&) { return os << ""; } -value_t::operator mpz_class() const +value_t::operator bigint() const { - return boost::get(val); + return boost::get(val); } value_t::operator mpf_class() const { - if(const mpz_class* p = boost::get(&val)) - return *p; + if(const bigint* p = boost::get(&val)) + return (*p).to_mpz(); return boost::get(val); } diff --git a/parsetype.hpp b/parsetype.hpp index b84e207..15ea44c 100644 --- a/parsetype.hpp +++ b/parsetype.hpp @@ -5,9 +5,12 @@ #include #include #include +#include #include #include +#include "bigint.hpp" + struct parse_t; typedef std::string val_t; @@ -24,14 +27,14 @@ struct none_t {}; std::ostream& operator<<(std::ostream&, const none_t&); struct value_t { - boost::variant val; + boost::variant val; value_t(): val(none_t()) {} - explicit value_t(mpz_class x): val(x) {} + explicit value_t(bigint x): val(x) {} explicit value_t(mpf_class x): val(x) {} explicit value_t(std::string x): val(x) {} - operator mpz_class() const; + operator bigint() const; operator mpf_class() const; // This is a member function instead of a casting operator, since @@ -78,6 +81,7 @@ struct parse_t { ~ uninitialized object, to detect unset default arguments */ + mutable long cached_long = LONG_MIN; mutable checktestdata::value_t cache; parse_t(): val(), args(), op('~') {} diff --git a/tests/test_34_data.in b/tests/test_34_data.in new file mode 100644 index 0000000..947ac3e --- /dev/null +++ b/tests/test_34_data.in @@ -0,0 +1,29 @@ +5 +9223372036854775807 1 9223372036854775808 +2 9223372036854775807 9223372036854775809 +-9223372036854775808 -1 -9223372036854775809 +-9223372036854775809 9223372036854775809 0 +-10000000000000000000 20000000000000000000 10000000000000000000 +2 +-9223372036854775809 -1 -9223372036854775808 +9223372036854775808 1 9223372036854775807 +3 +-9223372036854775808 -1 9223372036854775808 +9223372036854775808 -1 -9223372036854775808 +100000000000000 100000000000000 10000000000000000000000000000 +7 +-9223372036854775808 -1 9223372036854775808 +9223372036854775808 -1 -9223372036854775808 +9223372036854775808 10 922337203685477580 +-9223372036854775808 -10 922337203685477580 +9223372036854775808 -10 -922337203685477580 +123456 10000000000000000000000000000 0 +10000000000000000000000000000 -100 -100000000000000000000000000 +7 +-9223372036854775808 -1 0 +9223372036854775808 -1 0 +9223372036854775808 10 8 +-9223372036854775808 -10 -8 +9223372036854775808 -10 8 +123456 10000000000000000000000000000 123456 +10000000000000000000000000000 -100 0 diff --git a/tests/test_34_prog.in b/tests/test_34_prog.in new file mode 100644 index 0000000..130097e --- /dev/null +++ b/tests/test_34_prog.in @@ -0,0 +1,16 @@ +# IGNORE GENERATE TESTING +SET(lim=10^100) +INT(1,1000,n) NEWLINE +REP(n) INT(-lim,lim,a) SPACE INT(-lim,lim,b) SPACE INT(-lim,lim,c) ASSERT(a+b==c) NEWLINE END +INT(1,1000,n) NEWLINE +REP(n) INT(-lim,lim,a) SPACE INT(-lim,lim,b) SPACE INT(-lim,lim,c) ASSERT(a-b==c) NEWLINE END +INT(1,1000,n) NEWLINE +REP(n) INT(-lim,lim,a) SPACE INT(-lim,lim,b) SPACE INT(-lim,lim,c) ASSERT(a*b==c) NEWLINE END +INT(1,1000,n) NEWLINE +REP(n) INT(-lim,lim,a) SPACE INT(-lim,lim,b) SPACE INT(-lim,lim,c) ASSERT(a/b==c) NEWLINE END +INT(1,1000,n) NEWLINE +REP(n) INT(-lim,lim,a) SPACE INT(-lim,lim,b) SPACE INT(-lim,lim,c) ASSERT(a%b==c) NEWLINE END + +ASSERT( (9223372036854775807+1)-1 == 9223372036854775807 ) +ASSERT( (-9223372036854775808*-1)*-1 == -9223372036854775808 ) +ASSERT( 10^100 == 10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 )