Merge lp:~vadim-tk/sysbench/zipf-distribution into lp:sysbench

Proposed by Alexey Kopytov
Status: Needs review
Proposed branch: lp:~vadim-tk/sysbench/zipf-distribution
Merge into: lp:sysbench
Diff against target: 158 lines (+94/-2)
2 files modified
sysbench/sysbench.c (+91/-2)
sysbench/sysbench.h (+3/-0)
To merge this branch: bzr merge lp:~vadim-tk/sysbench/zipf-distribution
Reviewer Review Type Date Requested Status
Alexey Kopytov Pending
Review via email: mp+104775@code.launchpad.net
To post a comment you must log in.
110. By Vadim Tkachenko

Added self-similar distribution

111. By Vadim Tkachenko

Renamed self-similar into Pareto

Unmerged revisions

111. By Vadim Tkachenko

Renamed self-similar into Pareto

110. By Vadim Tkachenko

Added self-similar distribution

109. By root <email address hidden>

change default theta for zipf

108. By root <email address hidden>

Initial naive implementation of Zipf distribution

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'sysbench/sysbench.c'
2--- sysbench/sysbench.c 2012-03-21 08:20:02 +0000
3+++ sysbench/sysbench.c 2012-05-11 19:27:18 +0000
4@@ -81,7 +81,9 @@
5 {
6 DIST_TYPE_UNIFORM,
7 DIST_TYPE_GAUSSIAN,
8- DIST_TYPE_SPECIAL
9+ DIST_TYPE_SPECIAL,
10+ DIST_TYPE_ZIPF,
11+ DIST_TYPE_PARETO
12 } rand_dist_t;
13
14 /* Event queue data type for the tx-rate mode */
15@@ -99,6 +101,16 @@
16 static unsigned int rand_res;
17 static int rand_seed; /* optional seed set on the command line */
18
19+/* parameters for zipf distribution */
20+static double zipf_theta; /* parameter theta */
21+static unsigned int zipf_nitems = 0; /* number of items to choose from */
22+static double zipf_zetan; /* precalculated ZetaN, based on nitems */
23+static double zipf_zeta2; /* precalculated Zeta2, based on theta */
24+
25+/* parameters for Pareto distribution */
26+static double pareto_h; /* parameter h */
27+static double pareto_power; /* parameter pre-calculated by h */
28+
29 /* Random seed used to generate unique random numbers */
30 static unsigned long long rnd_seed;
31 /* Mutex to protect random seed */
32@@ -131,14 +143,16 @@
33 {"help", "print help and exit", SB_ARG_TYPE_FLAG, NULL},
34 {"version", "print version and exit", SB_ARG_TYPE_FLAG, "off"},
35 {"rand-init", "initialize random number generator", SB_ARG_TYPE_FLAG, "off"},
36- {"rand-type", "random numbers distribution {uniform,gaussian,special}", SB_ARG_TYPE_STRING,
37+ {"rand-type", "random numbers distribution {uniform,gaussian,special,zipf,pareto}", SB_ARG_TYPE_STRING,
38 "special"},
39 {"rand-spec-iter", "number of iterations used for numbers generation", SB_ARG_TYPE_INT, "12"},
40 {"rand-spec-pct", "percentage of values to be treated as 'special' (for special distribution)",
41 SB_ARG_TYPE_INT, "1"},
42 {"rand-spec-res", "percentage of 'special' values to use (for special distribution)",
43 SB_ARG_TYPE_INT, "75"},
44+ {"rand-zipf-t", "parameter theta for zipf distibution", SB_ARG_TYPE_FLOAT, "1.16"},
45 {"rand-seed", "seed for random number generator, ignored when 0", SB_ARG_TYPE_INT, "0"},
46+ {"rand-pareto-h", "parameter h for pareto distibution", SB_ARG_TYPE_FLOAT, "0.2"},
47 {NULL, NULL, SB_ARG_TYPE_NULL, NULL}
48 };
49
50@@ -1020,6 +1034,16 @@
51 rand_type = DIST_TYPE_SPECIAL;
52 rand_func = &sb_rand_special;
53 }
54+ else if (!strcmp(s, "zipf"))
55+ {
56+ rand_type = DIST_TYPE_ZIPF;
57+ rand_func = &sb_rand_zipf;
58+ }
59+ else if (!strcmp(s, "pareto"))
60+ {
61+ rand_type = DIST_TYPE_PARETO;
62+ rand_func = &sb_rand_pareto;
63+ }
64 else
65 {
66 log_text(LOG_FATAL, "Invalid random numbers distribution: %s.", s);
67@@ -1030,6 +1054,12 @@
68 rand_pct = sb_get_value_int("rand-spec-pct");
69 rand_res = sb_get_value_int("rand-spec-res");
70
71+ zipf_theta = sb_get_value_float("rand-zipf-t");
72+ zipf_zeta2 = sb_rand_zeta(2., zipf_theta);
73+
74+ pareto_h = sb_get_value_float("rand-pareto-h");
75+ pareto_power = log(pareto_h)/log(1.0-pareto_h);
76+
77 sb_globals.tx_rate = sb_get_value_int("tx-rate");
78 sb_globals.report_interval = sb_get_value_int("report-interval");
79
80@@ -1226,6 +1256,65 @@
81 return a + sum / rand_iter;
82 }
83
84+/* Pareto distribution */
85+
86+int sb_rand_pareto(int a, int b)
87+{
88+ double randf;
89+ randf = (double) sb_rnd() / (double) SB_MAX_RND;
90+
91+ return a + (int)(b - a + 1) * pow(randf, pareto_power);
92+}
93+
94+
95+/* zipf distribution */
96+
97+/* aux function to calculate zeta */
98+double sb_rand_zeta(int n, double theta)
99+{
100+ int i;
101+ double ans=0.0;
102+
103+ for (i=1; i <= n; i++)
104+ ans += pow(1./(double)i, theta);
105+ return(ans);
106+}
107+
108+
109+int sb_rand_zipf(int a, int b)
110+{
111+
112+ double alpha, eta, rand_uni, rand_z;
113+ unsigned int n;
114+ unsigned int val;
115+
116+ n = b - a + 1;
117+
118+ /* we pre-cache zipf_zetan, as calculation is slow */
119+ if (n != zipf_nitems)
120+ {
121+ zipf_zetan = sb_rand_zeta(n, zipf_theta);
122+ zipf_nitems = n;
123+ }
124+
125+ alpha = 1. / (1. - zipf_theta);
126+ eta = (1. - pow(2./n, 1. - zipf_theta)) / (1. - zipf_zeta2/zipf_zetan);
127+
128+ rand_uni = (double) sb_rnd() / (double) SB_MAX_RND;
129+ rand_z = rand_uni * zipf_zetan;
130+
131+ if (rand_z < 1.)
132+ {
133+ val = 1;
134+ } else if (rand_z < (1. + pow(0.5, zipf_theta))) {
135+ val = 2;
136+ } else {
137+ val = 1 + (unsigned int)(n * pow(eta*rand_uni - eta + 1., alpha));
138+ }
139+
140+ return a + val - 1;
141+}
142+
143 /* 'special' distribution */
144
145 int sb_rand_special(int a, int b)
146
147=== modified file 'sysbench/sysbench.h'
148--- sysbench/sysbench.h 2012-03-18 22:35:16 +0000
149+++ sysbench/sysbench.h 2012-05-11 19:27:18 +0000
150@@ -227,6 +227,9 @@
151 int sb_rand_uniform(int, int);
152 int sb_rand_gaussian(int, int);
153 int sb_rand_special(int, int);
154+int sb_rand_zipf(int, int);
155+int sb_rand_pareto(int, int);
156+double sb_rand_zeta(int n, double theta);
157 int sb_rand_uniq(int a, int b);
158 void sb_rand_str(const char *, char *);
159

Subscribers

People subscribed via source and target branches

to status/vote changes: