Both APIs are "thread-safe", that is the implementations do all necessary locking so that concurrent calls by multiple threads function correctly.
Note: We assume that the target platform's mutual exclusion primitives are fast relative to the time for a call on the prediction procedures.
typedef int ee_bool;
class ee_person { // an opaque uid for a person
public:
int uid;
ee_person(int uu = 0): uid(uu) {};
};
inline int operator==(const ee_person& x, const ee_person& xx)
{ return x.uid == xx.uid; }
const ee_person ee_person_null(0); // unused "null" value
class ee_item { // an opaque uid for an item
public:
int uid;
ee_item(int uu = 0): uid(uu) {};
};
inline int operator==(const ee_item& y, const ee_item& yy)
{ return y.uid == yy.uid; }
const ee_item ee_item_null(0); // unused "null" value
class ee_vote { // a vote
public:
float s; // score
float w; // weight
ee_vote(float ss = 0.0f, float ww = 0.0f): s(ss), w(ww) {};
};
const ee_vote ee_vote_null(0.0f, 0.0f); // unused "null" value
const ee_model_ints = 32;
class ee_model { // a model
public:
int a[ee_model_ints];
};
const ee_model ee_model_null = { { 0 } }; // initial "null" value
Here we define the logical schema as if this information is stored in a relational database with five tables, Persons, Items, Votes, VotesLog, and Generations. Of course, the application programmer is free to merge these tables with existing tables, or to implement them in a different storage medium such as a file system, subject to performance considerations.
The Persons table contains records of the form:
ee_person person; // unique key
ee_model model;
The
Items table contains records of the form:
ee_item item; // unique key
ee_model model;
The
Votes table contains records of the form:
ee_person person; // unique key: person, item
ee_item item;
ee_vote vote;
The
VotesLog table contains records of the form:
ee_person person; // unique key: person, item, time
ee_item item;
ee_vote vote;
timestamp time; // (e.g., DATE) indexed by time
The
VotesLog table helps the solver process detect updates to the
Votes table made by the predictor process as it records new votes.
Every transaction that modifies (inserts, updates, or deletes a record in)
Votes must also insert a corresponding record in VotesLog. A
deletion to Votes is indicated by a record in VotesLog with
v equal to ee_vote_null. The time field determines
the order in which the solver processes the entries, and so must have enough
precision to resolve successive updates to a vote, e.g., when a person decides
to change or delete a vote.The Generations table contains a single record of the form:
int id; // always equal to one
int generation; // initially equal to zero
The
Generations table helps the predictor process notice updates to the
Items table made by the solver process. Each time the solver has
completed a set of updates to the Items table, it must update the
record in the Generations table by increasing the generation
field by one.
1. The models in the Persons and Items tables don't have to be updated atomically; they may be from different runs of the solver.
2. If there is a vote for a person or item without a model, a model will be generated internally and will be available from get_*_model, etc. If there is a person or item without any votes, its model will be very near the null model.
3. The VotesLog table, if applied to the Votes table (doing the insertions, modifications, deletions), gives the "truth" from which the system works. It is therefore okay for the VotesLog table to contain votes that have already been applied to the Votes table.
#include "eetypes.h"
class ee_predict_impl; // private implementation class
class ee_predict {
public:
ee_predict(); // constructor
/* Multiple distributed predictors can increase performance and
availability; changes to the person/item/votes model database
should be propagated via reset_vote() to each instance. */
virtual ~ee_predict(); // destructor
ee_vote predict_item(const ee_person x, const ee_item y);
/* Return person x's predicted vote (score, weight) for item y. */
ee_vote predict_person(const ee_person x, const ee_person xx);
/* Return person x's predicted correlation (score, weight) with
person xx. predict_person is not necessarily symmetric with
respect to x and xx; it expresses the interest of person x in
person xx. */
void reset_vote(const ee_person x, const ee_item y, const ee_vote v);
/* Note v as person x's new or revised vote for item y, or delete
if v is null. To delete a whole person or item, delete all the
votes for that person or item. This call should follow a
change in the nonvolatile votes table; it resets any cache the
predictor may have kept. */
void reset_all_models(void);
/* Reset the internal state to include no knowledge of any item's
model. This should be called each time new models are available
from the solver. This call should follow any mass changes to
the nonvolatile model data and resets any cache the predictor
might have kept. */
void reset_all(void);
/* An extension of reset_all_models that also clears any
cache of votes. */
/* Pure virtual functions, to be implemented by the client in a class
derived from ee_predict: */
virtual void get_person_votes(
const ee_person x,
void (*setvote)(
const ee_person x, const ee_item y, const ee_vote& v, void* a),
void* arg) = 0;
/* Retrieve all <person, item, vote> records with person==x from
nonvolatile storage, calling (*setvote)(person, item, vote, arg)
for each. */
virtual void get_item_model(const ee_item y, ee_model& m) = 0;
/* Retrieve item y's model from nonvolatile storage, and return it
in m; if there is no model, return the null model. */
private:
// Disable the copy constructor and assignment operator:
ee_predict(const ee_predict& rhs);
ee_predict& operator=(const ee_predict& rhs);
ee_predict_impl *impl; // instance data
};
ee_solve uses an iterative refinement algorithm. The longer this algorithm runs, the more accurate the predictions that are generated from the models it produces. The progress of the algorithm is reflected in a quantity called the residue, which measures the remaining opportunity for further refinement. The residue is proportional to the count of votes, decreases with time, and approaches an asymptote greater than zero. Functions are provided to retrieve the current residue and the count of votes.
It's possible to do a "cold start" of ee_solve, giving it only a set of votes. However, it will more quickly reach low residue values if it is given models from a previous run as well as the set of votes. As shown in the Sample Application section, it can be run continously by supplying it with a log of changes to the Votes database.
#include "eetypes.h"
// Private implementation classes:
class ee_solve_impl;
class ee_solve_person_iterator_impl;
class ee_solve_item_iterator_impl;
class ee_solve { // the solver, normally with exactly one instance
public:
ee_solve(); // constructor
~ee_solve(); // destructor
void restart(void);
/* Reset the internal state to that following a constructor. */
void set_vote(const ee_person x, const ee_item y, const ee_vote v);
/* Set or update person x's vote for item y to be v, or delete if
v is null. */
void set_person_model(const ee_person x, const ee_model& m);
/* Set the model for person x to be m. Before initialization, a
model is treated as null. */
void set_item_model(const ee_item y, const ee_model& m);
/* Set the model for item y to be m. Before initialization, a
model is treated as null. */
void solve_step();
/* Update models (a potentially lengthy operation!). solve_step
is meant to be called repeatedly until approximate convergence
is reached; see get_residue. */
double get_residue();
/* Return the current value of the "residue", a non-negative
value. Calling solve_step decreases in the residue; setting
votes or models can increase it. The residue grows with the
number of votes, and the client can use the residue per vote to
determine approximate convergence. */
int get_vote_count();
/* Return the current number of votes; overridden votes are not
counted. */
void get_person_model(const ee_person x, ee_model& m);
/* Set m to person x's model. */
void get_item_model(const ee_item y, ee_model& m);
/* Set m to item y's model. */
class person_iterator {
public:
person_iterator(ee_solve& s); // constructor
~person_iterator(); // destructor
ee_person get_next(); // iterate over persons
private:
// Disable the copy constructor and assignment operator:
person_iterator(const person_iterator& rhs);
person_iterator& operator=(const person_iterator& rhs);
ee_solve_person_iterator_impl *impl; // instance data
};
/* A person_iterator is used to discover all the persons known to
the solver. A person is known to the solver if one or more
votes by that person have been registered via set_vote and not
subsequently deleted, or if a model for that person has been
registered via set_person_model and not subsequently deleted.
Each call to get_next returns another person not previously
returned since the iterator was constructed, or ee_person_null
if none remain. The result of interleaving calls to get_next
with calls to set_vote or set_person_model is undefined. */
class item_iterator {
public:
item_iterator(ee_solve& s); // constructor
~item_iterator(); // destructor
ee_item get_next(); // iterate over items
private:
// Disable the copy constructor and assignment operator:
item_iterator(const item_iterator& rhs);
item_iterator& operator=(const item_iterator& rhs);
ee_solve_item_iterator_impl *impl; // instance data
};
/* An item_iterator is used to discover all the items known to the
solver; see person_iterator for details. */
friend class person_iterator;
friend class item_iterator;
private:
// Disable the copy constructor and assignment operator:
ee_solve(const ee_solve& rhs);
ee_solve& operator=(const ee_solve& rhs);
ee_solve_impl *impl; // instance data
};