Skip to content

Commit e8fbdbe

Browse files
GiggleLiuisPANN
andauthored
Fix #408: [Model] ExpectedRetrievalCost (#743)
* Add plan for #408: [Model] ExpectedRetrievalCost * Implement #408: [Model] ExpectedRetrievalCost * chore: remove plan file after implementation * fix formatting after merge with main --------- Co-authored-by: Xiwei Pan <xiwei.pan@connect.hkust-gz.edu.cn>
1 parent 84e189b commit e8fbdbe

9 files changed

Lines changed: 469 additions & 18 deletions

File tree

docs/paper/reductions.typ

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@
148148
"MinimumTardinessSequencing": [Minimum Tardiness Sequencing],
149149
"MultipleChoiceBranching": [Multiple Choice Branching],
150150
"MultipleCopyFileAllocation": [Multiple Copy File Allocation],
151+
"ExpectedRetrievalCost": [Expected Retrieval Cost],
151152
"MultiprocessorScheduling": [Multiprocessor Scheduling],
152153
"PartitionIntoPathsOfLength2": [Partition into Paths of Length 2],
153154
"PartitionIntoTriangles": [Partition Into Triangles],
@@ -2460,6 +2461,45 @@ A classical NP-complete problem from Garey and Johnson @garey1979[Ch.~3, p.~76],
24602461
]
24612462
}
24622463

2464+
#{
2465+
let x = load-model-example("ExpectedRetrievalCost")
2466+
let K = x.instance.bound
2467+
[
2468+
#problem-def("ExpectedRetrievalCost")[
2469+
Given a set $R = {r_1, dots, r_n}$ of records, access probabilities $p(r) in [0, 1]$ with $sum_(r in R) p(r) = 1$, a positive integer $m$ of circular storage sectors, and a bound $K$, determine whether there exists a partition $R_1, dots, R_m$ of $R$ such that
2470+
$sum_(i=1)^m sum_(j=1)^m p(R_i) p(R_j) d(i, j) <= K,$
2471+
where $p(R_i) = sum_(r in R_i) p(r)$ and
2472+
$d(i, j) = j - i - 1$ for $1 <= i < j <= m$, while $d(i, j) = m - i + j - 1$ for $1 <= j <= i <= m$.
2473+
][
2474+
Expected Retrieval Cost is storage-and-retrieval problem SR4 in Garey and Johnson @garey1979. The model abstracts a drum-like storage device with fixed read heads: placing probability mass evenly around the cycle reduces the expected waiting time until the next requested sector rotates under the head. Cody and Coffman introduced the formulation and analyzed exact and heuristic record-allocation algorithms for fixed numbers of sectors @codycoffman1976. Garey and Johnson record that the general decision problem is NP-complete in the strong sense via transformations from Partition and 3-Partition @garey1979. The implementation in this repository uses one $m$-ary variable per record, so the registered exact baseline enumerates $m^n$ assignments. For practicality, the code stores the probabilities and bound as floating-point values even though the book states $K$ as an integer.
2475+
2476+
*Example.* Take six records with probabilities $(0.2, 0.15, 0.15, 0.2, 0.1, 0.2)$, three sectors, and $K = #K$. Assign
2477+
$R_1 = {r_1, r_5}$, $R_2 = {r_2, r_4}$, and $R_3 = {r_3, r_6}$.
2478+
Then the sector masses are $(p(R_1), p(R_2), p(R_3)) = (0.3, 0.35, 0.35)$.
2479+
For $m = 3$, the non-zero latencies are $d(1, 1) = d(2, 2) = d(3, 3) = 2$, $d(1, 3) = d(2, 1) = d(3, 2) = 1$, and the remaining pairs contribute 0. Hence the expected retrieval cost is $1.0025 <= #K$, so the allocation is satisfying.
2480+
2481+
#pred-commands(
2482+
"pred create --example ExpectedRetrievalCost -o expected-retrieval-cost.json",
2483+
"pred solve expected-retrieval-cost.json --solver brute-force",
2484+
"pred evaluate expected-retrieval-cost.json --config " + x.optimal_config.map(str).join(","),
2485+
)
2486+
2487+
#figure(
2488+
table(
2489+
columns: 3,
2490+
inset: 6pt,
2491+
stroke: 0.5pt + luma(180),
2492+
[Sector], [Records], [Mass],
2493+
[$S_1$], [$r_1, r_5$], [$0.3$],
2494+
[$S_2$], [$r_2, r_4$], [$0.35$],
2495+
[$S_3$], [$r_3, r_6$], [$0.35$],
2496+
),
2497+
caption: [Expected Retrieval Cost example with cyclic sector order $S_1 -> S_2 -> S_3 -> S_1$. The satisfying allocation yields masses $(0.3, 0.35, 0.35)$ and total cost $1.0025$.],
2498+
) <fig:expected-retrieval-cost>
2499+
]
2500+
]
2501+
}
2502+
24632503
== Set Problems
24642504

24652505
#{

docs/paper/references.bib

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1116,6 +1116,17 @@ @article{coffman1972
11161116
doi = {10.1007/BF00288685}
11171117
}
11181118

1119+
@article{codycoffman1976,
1120+
author = {R. A. Cody and E. G. Coffman, Jr.},
1121+
title = {Record Allocation for Minimizing Expected Retrieval Costs on Drum-Like Storage Devices},
1122+
journal = {Journal of the ACM},
1123+
volume = {23},
1124+
number = {1},
1125+
pages = {103--115},
1126+
year = {1976},
1127+
doi = {10.1145/321921.321933}
1128+
}
1129+
11191130
@inproceedings{cordella2004,
11201131
author = {Luigi P. Cordella and Pasquale Foggia and Carlo Sansone and Mario Vento},
11211132
title = {A (Sub)Graph Isomorphism Algorithm for Matching Large Graphs},

problemreductions-cli/src/cli.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,7 @@ Flags by problem type:
249249
CapacityAssignment --capacities, --cost-matrix, --delay-matrix, --cost-budget, --delay-budget
250250
SubsetSum --sizes, --target
251251
SumOfSquaresPartition --sizes, --num-groups, --bound
252+
ExpectedRetrievalCost --probabilities, --num-sectors, --latency-bound
252253
PaintShop --sequence
253254
MaximumSetPacking --sets [--weights]
254255
MinimumHittingSet --universe, --sets
@@ -474,6 +475,9 @@ pub struct CreateArgs {
474475
/// Item sizes for BinPacking (comma-separated, e.g., "3,3,2,2")
475476
#[arg(long)]
476477
pub sizes: Option<String>,
478+
/// Record access probabilities for ExpectedRetrievalCost (comma-separated, e.g., "0.2,0.15,0.15,0.2,0.1,0.2")
479+
#[arg(long)]
480+
pub probabilities: Option<String>,
477481
/// Bin capacity for BinPacking
478482
#[arg(long)]
479483
pub capacity: Option<String>,
@@ -546,6 +550,9 @@ pub struct CreateArgs {
546550
/// Bound parameter (lower bound for LongestCircuit; upper or length bound for BoundedComponentSpanningForest, LengthBoundedDisjointPaths, LongestCommonSubsequence, MultipleCopyFileAllocation, MultipleChoiceBranching, OptimalLinearArrangement, RootedTreeArrangement, RuralPostman, ShortestCommonSupersequence, or StringToStringCorrection)
547551
#[arg(long, allow_hyphen_values = true)]
548552
pub bound: Option<i64>,
553+
/// Upper bound on expected retrieval latency for ExpectedRetrievalCost
554+
#[arg(long)]
555+
pub latency_bound: Option<f64>,
549556
/// Upper bound on total path length
550557
#[arg(long)]
551558
pub length_bound: Option<i32>,
@@ -703,6 +710,9 @@ pub struct CreateArgs {
703710
/// Number of groups for SumOfSquaresPartition
704711
#[arg(long)]
705712
pub num_groups: Option<usize>,
713+
/// Number of sectors for ExpectedRetrievalCost
714+
#[arg(long)]
715+
pub num_sectors: Option<usize>,
706716
/// Source string for StringToStringCorrection (comma-separated symbol indices, e.g., "0,1,2,3")
707717
#[arg(long)]
708718
pub source_string: Option<String>,

problemreductions-cli/src/commands/create.rs

Lines changed: 127 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,14 @@ use problemreductions::models::graph::{
2121
use problemreductions::models::misc::{
2222
AdditionalKey, BinPacking, BoyceCoddNormalFormViolation, CapacityAssignment, CbqRelation,
2323
ConjunctiveBooleanQuery, ConsistencyOfDatabaseFrequencyTables, EnsembleComputation,
24-
FlowShopScheduling, FrequencyTable, KnownValue, LongestCommonSubsequence,
25-
MinimumTardinessSequencing, MultiprocessorScheduling, PaintShop, PartiallyOrderedKnapsack,
26-
QueryArg, RectilinearPictureCompression, ResourceConstrainedScheduling,
27-
SchedulingWithIndividualDeadlines, SequencingToMinimizeMaximumCumulativeCost,
28-
SequencingToMinimizeWeightedCompletionTime, SequencingToMinimizeWeightedTardiness,
29-
SequencingWithReleaseTimesAndDeadlines, SequencingWithinIntervals, ShortestCommonSupersequence,
30-
StringToStringCorrection, SubsetSum, SumOfSquaresPartition, TimetableDesign,
24+
ExpectedRetrievalCost, FlowShopScheduling, FrequencyTable, KnownValue,
25+
LongestCommonSubsequence, MinimumTardinessSequencing, MultiprocessorScheduling, PaintShop,
26+
PartiallyOrderedKnapsack, QueryArg, RectilinearPictureCompression,
27+
ResourceConstrainedScheduling, SchedulingWithIndividualDeadlines,
28+
SequencingToMinimizeMaximumCumulativeCost, SequencingToMinimizeWeightedCompletionTime,
29+
SequencingToMinimizeWeightedTardiness, SequencingWithReleaseTimesAndDeadlines,
30+
SequencingWithinIntervals, ShortestCommonSupersequence, StringToStringCorrection, SubsetSum,
31+
SumOfSquaresPartition, TimetableDesign,
3132
};
3233
use problemreductions::models::BiconnectivityAugmentation;
3334
use problemreductions::prelude::*;
@@ -43,6 +44,10 @@ const MULTIPLE_COPY_FILE_ALLOCATION_EXAMPLE_ARGS: &str =
4344
"--graph 0-1,1-2,2-3 --usage 5,4,3,2 --storage 1,1,1,1 --bound 8";
4445
const MULTIPLE_COPY_FILE_ALLOCATION_USAGE: &str =
4546
"Usage: pred create MultipleCopyFileAllocation --graph 0-1,1-2,2-3 --usage 5,4,3,2 --storage 1,1,1,1 --bound 8";
47+
const EXPECTED_RETRIEVAL_COST_EXAMPLE_ARGS: &str =
48+
"--probabilities 0.2,0.15,0.15,0.2,0.1,0.2 --num-sectors 3 --latency-bound 1.01";
49+
const EXPECTED_RETRIEVAL_COST_USAGE: &str =
50+
"Usage: pred create ExpectedRetrievalCost --probabilities 0.2,0.15,0.15,0.2,0.1,0.2 --num-sectors 3 --latency-bound 1.01";
4651

4752
/// Check if all data flags are None (no problem-specific input provided).
4853
fn all_data_flags_empty(args: &CreateArgs) -> bool {
@@ -85,6 +90,7 @@ fn all_data_flags_empty(args: &CreateArgs) -> bool {
8590
&& args.requirement_2.is_none()
8691
&& args.requirement.is_none()
8792
&& args.sizes.is_none()
93+
&& args.probabilities.is_none()
8894
&& args.capacity.is_none()
8995
&& args.sequence.is_none()
9096
&& args.sets.is_none()
@@ -110,6 +116,7 @@ fn all_data_flags_empty(args: &CreateArgs) -> bool {
110116
&& args.tree.is_none()
111117
&& args.required_edges.is_none()
112118
&& args.bound.is_none()
119+
&& args.latency_bound.is_none()
113120
&& args.length_bound.is_none()
114121
&& args.weight_bound.is_none()
115122
&& args.cost_bound.is_none()
@@ -152,6 +159,7 @@ fn all_data_flags_empty(args: &CreateArgs) -> bool {
152159
&& args.task_avail.is_none()
153160
&& args.alphabet_size.is_none()
154161
&& args.num_groups.is_none()
162+
&& args.num_sectors.is_none()
155163
&& args.dependencies.is_none()
156164
&& args.num_attributes.is_none()
157165
&& args.source_string.is_none()
@@ -606,6 +614,7 @@ fn example_for(canonical: &str, graph_type: Option<&str>) -> &'static str {
606614
}
607615
"MultiprocessorScheduling" => "--lengths 4,5,3,2,6 --num-processors 2 --deadline 10",
608616
"MinimumMultiwayCut" => "--graph 0-1,1-2,2-3 --terminals 0,2 --edge-weights 1,1,1",
617+
"ExpectedRetrievalCost" => EXPECTED_RETRIEVAL_COST_EXAMPLE_ARGS,
609618
"SequencingWithinIntervals" => "--release-times 0,0,5 --deadlines 11,11,6 --lengths 3,1,1",
610619
"StaffScheduling" => {
611620
"--schedules \"1,1,1,1,1,0,0;0,1,1,1,1,1,0;0,0,1,1,1,1,1;1,0,0,1,1,1,1;1,1,0,0,1,1,1\" --requirements 2,2,2,3,3,2,1 --num-workers 4 --k 5"
@@ -1510,6 +1519,59 @@ pub fn create(args: &CreateArgs, out: &OutputConfig) -> Result<()> {
15101519
)
15111520
}
15121521

1522+
// ExpectedRetrievalCost (probabilities + sectors + latency bound)
1523+
"ExpectedRetrievalCost" => {
1524+
let probabilities_str = args.probabilities.as_deref().ok_or_else(|| {
1525+
anyhow::anyhow!(
1526+
"ExpectedRetrievalCost requires --probabilities\n\n{EXPECTED_RETRIEVAL_COST_USAGE}"
1527+
)
1528+
})?;
1529+
let probabilities: Vec<f64> = util::parse_comma_list(probabilities_str)
1530+
.map_err(|e| anyhow::anyhow!("{e}\n\n{EXPECTED_RETRIEVAL_COST_USAGE}"))?;
1531+
anyhow::ensure!(
1532+
!probabilities.is_empty(),
1533+
"ExpectedRetrievalCost requires at least one probability\n\n{EXPECTED_RETRIEVAL_COST_USAGE}"
1534+
);
1535+
anyhow::ensure!(
1536+
probabilities.iter().all(|p| p.is_finite() && (0.0..=1.0).contains(p)),
1537+
"ExpectedRetrievalCost probabilities must be finite values in [0, 1]\n\n{EXPECTED_RETRIEVAL_COST_USAGE}"
1538+
);
1539+
let total_probability: f64 = probabilities.iter().sum();
1540+
anyhow::ensure!(
1541+
(total_probability - 1.0).abs() <= 1e-9,
1542+
"ExpectedRetrievalCost probabilities must sum to 1.0\n\n{EXPECTED_RETRIEVAL_COST_USAGE}"
1543+
);
1544+
1545+
let num_sectors = args.num_sectors.ok_or_else(|| {
1546+
anyhow::anyhow!(
1547+
"ExpectedRetrievalCost requires --num-sectors\n\n{EXPECTED_RETRIEVAL_COST_USAGE}"
1548+
)
1549+
})?;
1550+
anyhow::ensure!(
1551+
num_sectors >= 2,
1552+
"ExpectedRetrievalCost requires at least two sectors\n\n{EXPECTED_RETRIEVAL_COST_USAGE}"
1553+
);
1554+
1555+
let latency_bound = args.latency_bound.ok_or_else(|| {
1556+
anyhow::anyhow!(
1557+
"ExpectedRetrievalCost requires --latency-bound\n\n{EXPECTED_RETRIEVAL_COST_USAGE}"
1558+
)
1559+
})?;
1560+
anyhow::ensure!(
1561+
latency_bound.is_finite() && latency_bound >= 0.0,
1562+
"ExpectedRetrievalCost requires a finite non-negative --latency-bound\n\n{EXPECTED_RETRIEVAL_COST_USAGE}"
1563+
);
1564+
1565+
(
1566+
ser(ExpectedRetrievalCost::new(
1567+
probabilities,
1568+
num_sectors,
1569+
latency_bound,
1570+
))?,
1571+
resolved_variant.clone(),
1572+
)
1573+
}
1574+
15131575
// UndirectedFlowLowerBounds (graph + capacities + lower bounds + terminals + requirement)
15141576
"UndirectedFlowLowerBounds" => {
15151577
let usage = "Usage: pred create UndirectedFlowLowerBounds --graph 0-1,0-2,1-3,2-3,1-4,3-5,4-5 --capacities 2,2,2,2,1,3,2 --lower-bounds 1,1,0,0,1,0,1 --source 0 --sink 5 --requirement 3";
@@ -7049,6 +7111,7 @@ mod tests {
70497111
requirement_1: None,
70507112
requirement_2: None,
70517113
sizes: None,
7114+
probabilities: None,
70527115
capacity: None,
70537116
sequence: None,
70547117
sets: None,
@@ -7073,6 +7136,7 @@ mod tests {
70737136
tree: None,
70747137
required_edges: None,
70757138
bound: None,
7139+
latency_bound: None,
70767140
length_bound: None,
70777141
weight_bound: None,
70787142
cost_bound: None,
@@ -7111,6 +7175,7 @@ mod tests {
71117175
craftsman_avail: None,
71127176
task_avail: None,
71137177
num_groups: None,
7178+
num_sectors: None,
71147179
domain_size: None,
71157180
relations: None,
71167181
conjuncts_spec: None,
@@ -7375,6 +7440,61 @@ mod tests {
73757440
std::fs::remove_file(output_path).ok();
73767441
}
73777442

7443+
#[test]
7444+
fn test_create_expected_retrieval_cost_json() {
7445+
use crate::dispatch::ProblemJsonOutput;
7446+
use problemreductions::models::misc::ExpectedRetrievalCost;
7447+
7448+
let mut args = empty_args();
7449+
args.problem = Some("ExpectedRetrievalCost".to_string());
7450+
args.probabilities = Some("0.2,0.15,0.15,0.2,0.1,0.2".to_string());
7451+
args.num_sectors = Some(3);
7452+
args.latency_bound = Some(1.01);
7453+
7454+
let output_path = std::env::temp_dir().join(format!(
7455+
"expected-retrieval-cost-{}.json",
7456+
std::process::id()
7457+
));
7458+
let out = OutputConfig {
7459+
output: Some(output_path.clone()),
7460+
quiet: true,
7461+
json: false,
7462+
auto_json: false,
7463+
};
7464+
7465+
create(&args, &out).unwrap();
7466+
7467+
let json = std::fs::read_to_string(&output_path).unwrap();
7468+
let created: ProblemJsonOutput = serde_json::from_str(&json).unwrap();
7469+
assert_eq!(created.problem_type, "ExpectedRetrievalCost");
7470+
7471+
let problem: ExpectedRetrievalCost = serde_json::from_value(created.data).unwrap();
7472+
assert_eq!(problem.num_records(), 6);
7473+
assert_eq!(problem.num_sectors(), 3);
7474+
assert!(problem.evaluate(&[0, 1, 2, 1, 0, 2]));
7475+
7476+
let _ = std::fs::remove_file(output_path);
7477+
}
7478+
7479+
#[test]
7480+
fn test_create_expected_retrieval_cost_requires_latency_bound() {
7481+
let mut args = empty_args();
7482+
args.problem = Some("ExpectedRetrievalCost".to_string());
7483+
args.probabilities = Some("0.2,0.15,0.15,0.2,0.1,0.2".to_string());
7484+
args.num_sectors = Some(3);
7485+
args.latency_bound = None;
7486+
7487+
let out = OutputConfig {
7488+
output: None,
7489+
quiet: true,
7490+
json: false,
7491+
auto_json: false,
7492+
};
7493+
7494+
let err = create(&args, &out).unwrap_err().to_string();
7495+
assert!(err.contains("ExpectedRetrievalCost requires --latency-bound"));
7496+
}
7497+
73787498
#[test]
73797499
fn test_create_stacker_crane_json() {
73807500
let mut args = empty_args();

src/lib.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,9 @@ pub mod prelude {
6969
pub use crate::models::misc::{
7070
AdditionalKey, BinPacking, BoyceCoddNormalFormViolation, CapacityAssignment, CbqRelation,
7171
ConjunctiveBooleanQuery, ConjunctiveQueryFoldability, ConsistencyOfDatabaseFrequencyTables,
72-
EnsembleComputation, Factoring, FlowShopScheduling, Knapsack, LongestCommonSubsequence,
73-
MinimumTardinessSequencing, MultiprocessorScheduling, PaintShop, Partition, QueryArg,
74-
RectilinearPictureCompression, ResourceConstrainedScheduling,
72+
EnsembleComputation, ExpectedRetrievalCost, Factoring, FlowShopScheduling, Knapsack,
73+
LongestCommonSubsequence, MinimumTardinessSequencing, MultiprocessorScheduling, PaintShop,
74+
Partition, QueryArg, RectilinearPictureCompression, ResourceConstrainedScheduling,
7575
SchedulingWithIndividualDeadlines, SequencingToMinimizeMaximumCumulativeCost,
7676
SequencingToMinimizeWeightedCompletionTime, SequencingToMinimizeWeightedTardiness,
7777
SequencingWithReleaseTimesAndDeadlines, SequencingWithinIntervals,

0 commit comments

Comments
 (0)