Sunday, May 6, 2012

Graph Partitioning 2 - KL/FM Implementation

In my last post, I gave you an overview of the KL/FM algorithm for graph partitioning. Today we will go over the code and reach a new low score, now that I have correctly(hopefully) implemented the algorithm.

First, some simple structs for storing the graph data:

 type Node struct {
  // normal stuff
  id int
  edges []*Edge
  // KL/FM stuff
  gain int
  part int
  lock bool  
}
type Edge struct {
  id int
  w  int
  n1,n2 *Node
}
type Graph struct {
  nodes []Node
  edges []Edge
  maxN int
}

The graphs and edges are pretty standard, I added a maximum neighbor count to the graph so that I can bound the number of gain buckets I need to [-maxN:maxN]. The node is augmented to track the currrent partition, whether the node has been considered, and the current gain value, calculated as follows:

 func (n *Node) calcGain() int {
  var ecost,icost int
  for _,e := range n.edges {
    var n2 *Node
    if n == e.n2 { n2 = e.n1 } else { n2 = e.n2 }
    if n.part == n2.part {
      icost += e.w
    } else {
      ecost += e.w
    }
  }
  return ecost - icost
}
func (n *Node) calcSwap(n2 *Node) int {
  var E *Edge
  for _,e := range n.edges {
    if e.n1 == n2 || e.n2 == n2 {
      E = e
      break
    }
  }
  c,c2 := n.calcGain(),n2.calcGain()
  return c + c2 - E.w
}
func (g *Graph) calcCut() int {
  var cut int
  for _,e := range g.edges {
    if e.n1.part != e.n2.part {
      cut += e.w
    }
  }
  return cut
}

The following is the implementation of the gain buckets:

 type BList struct {
  gain int
  nodes *list.List
}
type Buckets struct {
  side int
  size int
  pos []*BList
  neg []*BList
  maxB  int // max bucket size 
}

func (b *Buckets) insertNode(n *Node) { 
  var side []*BList
  var pos int
  if n.gain < 0 {
    side = b.neg
    pos = -1 * n.gain
  } else {
    side = b.pos
    pos =  n.gain
  }

  bl := side[pos]
  if bl == nil {
    side[pos] = new(BList)
    bl = side[pos]
    bl.gain = n.gain
  }
  
  if bl.gain > b.maxB {
    b.maxB = bl.gain
  }
  bl.pushNode(n)
  b.size++
}

func (b *Buckets) bestNode() (n *Node) {
  for ; b.maxB > -len(b.neg); b.maxB-- {
    var bl *BList
    if b.maxB < 0 {
      pos := -1 * b.maxB
      bl = b.neg[pos]
    } else {
      bl = b.pos[b.maxB]
    }
    if bl != nil && bl.nodes != nil && bl.nodes.Len() > 0 {
      n = bl.popNode()
      b.size--
      break;
    }
  }
  return
}

func (b *Buckets) updateNode( n *Node ) {
  var side []*BList
  var pos int
  if n.gain < 0 {
    side = b.neg
    pos = -1 * n.gain
  } else {
    side = b.pos
    pos =  n.gain
  }
  bl := side[pos]
  bl.rmvNode(n)
  n.CalcGain()
  b.insertNode(n)  
  b.size-- // readjust for insertNode(n) size++ op
}

BList is a single gain bucket with its gain value and a list that is used as a stack. The buckets tracks which partition(side) it is and the total number of nodes remaining in the bucket. I have stored the individual buckets in two arrays, one for the negative buckets and one for the non-negative buckets. I did this to allow for constant time look up when updating and inserting nodes. The maxB stores the size of the highest value bucket for constant time lookup of the associated bucket when finding the best node for swapping.

The Partitions, I think, is pretty straight forward:

 type Partitions struct {
  parts []*Buckets  // an array[2] of lists, each is a list of buckets
  sizes []int  // to keep track of the sum of bucket sizes
}

func partitionGraph( g *Graph ) (p *Partitions) {
  // randomly assign nodes
  max := 0
  for i,_ := range g.nodes {
    r := rand.Float64()
    n := &(g.nodes[i])
    if r < 0.5 {
      n.part = 0;
    } else {
      n.part = 1;
    }
    if l:=len(n.edges); l > max {
      max = l
    }
  }
  g.maxN = max+1 // make sure we have enough gain buckets spots
  for i,_ := range g.nodes {
    g.nodes[i].CalcGain()
  }
  
  // make partitions
  p = new( Partitions )
  p.fillPartitions(g)
  
  return p
}

func (p *Partitions) fillPartitions( g *Graph ) {
  p.parts = make([]*Buckets,2)
  p.sizes = make([]int,2)
  for i,_ := range p.parts {
    p.parts[i] = new(Buckets)
    p.parts[i].side = i
    p.parts[i].pos = make( []*BList, g.maxN )
    p.parts[i].neg = make( []*BList, g.maxN )
  }
  for i,_ := range g.nodes {
    g.nodes[i].lock = false
    p.insertNode(&(g.nodes[i]))
  }
  
}

func (p *Partitions) insertNode( n *Node ) {
  p.sizes[n.part]++
  B := p.parts[n.part]
  B.insertNode(n)
}

I added the fillPartitions() function as a helper when initializing the partitions, but then used it in the KL/FM loop to avoid inserting locked nodes into a gain bucket. I believe that once a node has been considered for swap, it can't be swapped again, so I don't put in any gain bucket. After all of the gain buckets are drained, the partitions are reverted to the best found so far and the nodes are all unlocked at returned to the gain buckets. This repeats until no new global minimum is found.

And finally the main algorithm:

 func KLFM(g *Graph) (MIN int, PARTS []int) {
  p := partitionGraph(g)
  
  Cut := g.calcCut()
  CUT := Cut + 1

  min := 1000000
  PARTS = make( []int, len(g.nodes) )
  
  iter := 0
  for Cut < CUT { // while finding improvements
    CUT = Cut
    cnt := 0
    for cnt < len(g.nodes) { // while unlocked nodes
      var B *Buckets
      diff := p.sizes[0]-p.sizes[1]
      // partition selection
      if (diff > 0 && p.parts[0].size>0) || p.parts[1].size == 0 {
        B = p.parts[0]
        p.sizes[0]--
        p.sizes[1]++
      } else {
        B = p.parts[1]
        p.sizes[0]++
        p.sizes[1]--
      }

      // get best node & swap
      N  := B.bestNode()
      N.part = (N.part+1)%2
      N.lock = true
      N.CalcGain()

      // update neigbbors
      for _,e := range N.edges {
        var N2 *Node
        if e.n1 == N {
          N2 = e.n2
        } else {
          N2 = e.n1
        }
        if N2.lock == false {
          p.parts[N2.part].updateNode(N2)
        } else {
          N2.CalcGain()
        }
      }

      // calculate cut value (fix? maxes loop O(N*E) )
      cut := g.calcCut()
      
      // save iteration best
      if cut < Cut { 
        Cut = cut 
      }
      // save global best
      if cut < min { 
        min = cut 
        // save partition
        for i,n := range g.nodes {
          PARTS[i] = n.part 
        }
      }
      cnt++
    }
    
    // roll back
    for i,_ := range g.nodes {
      g.nodes[i].part = PARTS[i]
    }
    
    // refill gain buckets
    p.fillPartitions(g)
    iter++
  }
  return min,PARTS
}

For the experiments, I am using the assignment data file and add20 from the benchmarks I linked to in my last post. I am running 1000 iterations of the KL/FM algorithm with initial seeds from [0:100). So far my best scores are:

  • datafile3.txt:  1340 (an improvement of 108)
  • add20.graph:  617 (only 23 from the best!)


Tomorrow I will let you know how the experiments finished up. I also want to run the algorithm with a modified partition selection that allows for uneven partition sizing. If I have time I will cover another algorithm too, but it is the end of the semester...

No comments:

Post a Comment