First, some simple structs for storing the graph data:
type Node struct {
// normal stuff
id int
edges []*Edge
// KL/FM stuff
gain int
part int
lock bool
}
type Edge struct {
id int
w int
n1,n2 *Node
}
type Graph struct {
nodes []Node
edges []Edge
maxN int
}
The graphs and edges are pretty standard, I added a maximum neighbor count to the graph so that I can bound the number of gain buckets I need to [-maxN:maxN]. The node is augmented to track the currrent partition, whether the node has been considered, and the current gain value, calculated as follows:
func (n *Node) calcGain() int {
var ecost,icost int
for _,e := range n.edges {
var n2 *Node
if n == e.n2 { n2 = e.n1 } else { n2 = e.n2 }
if n.part == n2.part {
icost += e.w
} else {
ecost += e.w
}
}
return ecost - icost
}
func (n *Node) calcSwap(n2 *Node) int {
var E *Edge
for _,e := range n.edges {
if e.n1 == n2 || e.n2 == n2 {
E = e
break
}
}
c,c2 := n.calcGain(),n2.calcGain()
return c + c2 - E.w
}
func (g *Graph) calcCut() int {
var cut int
for _,e := range g.edges {
if e.n1.part != e.n2.part {
cut += e.w
}
}
return cut
}
The following is the implementation of the gain buckets:
type BList struct {
gain int
nodes *list.List
}
type Buckets struct {
side int
size int
pos []*BList
neg []*BList
maxB int // max bucket size
}
func (b *Buckets) insertNode(n *Node) {
var side []*BList
var pos int
if n.gain < 0 {
side = b.neg
pos = -1 * n.gain
} else {
side = b.pos
pos = n.gain
}
bl := side[pos]
if bl == nil {
side[pos] = new(BList)
bl = side[pos]
bl.gain = n.gain
}
if bl.gain > b.maxB {
b.maxB = bl.gain
}
bl.pushNode(n)
b.size++
}
func (b *Buckets) bestNode() (n *Node) {
for ; b.maxB > -len(b.neg); b.maxB-- {
var bl *BList
if b.maxB < 0 {
pos := -1 * b.maxB
bl = b.neg[pos]
} else {
bl = b.pos[b.maxB]
}
if bl != nil && bl.nodes != nil && bl.nodes.Len() > 0 {
n = bl.popNode()
b.size--
break;
}
}
return
}
func (b *Buckets) updateNode( n *Node ) {
var side []*BList
var pos int
if n.gain < 0 {
side = b.neg
pos = -1 * n.gain
} else {
side = b.pos
pos = n.gain
}
bl := side[pos]
bl.rmvNode(n)
n.CalcGain()
b.insertNode(n)
b.size-- // readjust for insertNode(n) size++ op
}
BList is a single gain bucket with its gain value and a list that is used as a stack. The buckets tracks which partition(side) it is and the total number of nodes remaining in the bucket. I have stored the individual buckets in two arrays, one for the negative buckets and one for the non-negative buckets. I did this to allow for constant time look up when updating and inserting nodes. The maxB stores the size of the highest value bucket for constant time lookup of the associated bucket when finding the best node for swapping.
The Partitions, I think, is pretty straight forward:
type Partitions struct {
parts []*Buckets // an array[2] of lists, each is a list of buckets
sizes []int // to keep track of the sum of bucket sizes
}
func partitionGraph( g *Graph ) (p *Partitions) {
// randomly assign nodes
max := 0
for i,_ := range g.nodes {
r := rand.Float64()
n := &(g.nodes[i])
if r < 0.5 {
n.part = 0;
} else {
n.part = 1;
}
if l:=len(n.edges); l > max {
max = l
}
}
g.maxN = max+1 // make sure we have enough gain buckets spots
for i,_ := range g.nodes {
g.nodes[i].CalcGain()
}
// make partitions
p = new( Partitions )
p.fillPartitions(g)
return p
}
func (p *Partitions) fillPartitions( g *Graph ) {
p.parts = make([]*Buckets,2)
p.sizes = make([]int,2)
for i,_ := range p.parts {
p.parts[i] = new(Buckets)
p.parts[i].side = i
p.parts[i].pos = make( []*BList, g.maxN )
p.parts[i].neg = make( []*BList, g.maxN )
}
for i,_ := range g.nodes {
g.nodes[i].lock = false
p.insertNode(&(g.nodes[i]))
}
}
func (p *Partitions) insertNode( n *Node ) {
p.sizes[n.part]++
B := p.parts[n.part]
B.insertNode(n)
}
I added the fillPartitions() function as a helper when initializing the partitions, but then used it in the KL/FM loop to avoid inserting locked nodes into a gain bucket. I believe that once a node has been considered for swap, it can't be swapped again, so I don't put in any gain bucket. After all of the gain buckets are drained, the partitions are reverted to the best found so far and the nodes are all unlocked at returned to the gain buckets. This repeats until no new global minimum is found.
And finally the main algorithm:
func KLFM(g *Graph) (MIN int, PARTS []int) {
p := partitionGraph(g)
Cut := g.calcCut()
CUT := Cut + 1
min := 1000000
PARTS = make( []int, len(g.nodes) )
iter := 0
for Cut < CUT { // while finding improvements
CUT = Cut
cnt := 0
for cnt < len(g.nodes) { // while unlocked nodes
var B *Buckets
diff := p.sizes[0]-p.sizes[1]
// partition selection
if (diff > 0 && p.parts[0].size>0) || p.parts[1].size == 0 {
B = p.parts[0]
p.sizes[0]--
p.sizes[1]++
} else {
B = p.parts[1]
p.sizes[0]++
p.sizes[1]--
}
// get best node & swap
N := B.bestNode()
N.part = (N.part+1)%2
N.lock = true
N.CalcGain()
// update neigbbors
for _,e := range N.edges {
var N2 *Node
if e.n1 == N {
N2 = e.n2
} else {
N2 = e.n1
}
if N2.lock == false {
p.parts[N2.part].updateNode(N2)
} else {
N2.CalcGain()
}
}
// calculate cut value (fix? maxes loop O(N*E) )
cut := g.calcCut()
// save iteration best
if cut < Cut {
Cut = cut
}
// save global best
if cut < min {
min = cut
// save partition
for i,n := range g.nodes {
PARTS[i] = n.part
}
}
cnt++
}
// roll back
for i,_ := range g.nodes {
g.nodes[i].part = PARTS[i]
}
// refill gain buckets
p.fillPartitions(g)
iter++
}
return min,PARTS
}
For the experiments, I am using the assignment data file and add20 from the benchmarks I linked to in my last post. I am running 1000 iterations of the KL/FM algorithm with initial seeds from [0:100). So far my best scores are:
- datafile3.txt: 1340 (an improvement of 108)
- add20.graph: 617 (only 23 from the best!)
Tomorrow I will let you know how the experiments finished up. I also want to run the algorithm with a modified partition selection that allows for uneven partition sizing. If I have time I will cover another algorithm too, but it is the end of the semester...
No comments:
Post a Comment