12/18/08

Playing with Scala 5 ½: More variations


As James pointed out, the implicit conversion creates a new object every time we convert the Node to Ordered[Node], and although the JVM and the GC can deal pretty well with that kind of scenario, it will be better if we can avoid the creation of the object (BTW, the fact that Scala can leverage years of work on JIT optimizations is something that shouldn't be taken lightly... I still want tail call optimization, though)

We need to add the Ordered trait for Node somewhere in the hierarchy. The reasonable place is when we add the Weight trait (once we have weight we can compare the nodes):

class DfsNode extends NodeImpl with Weight with Ordered[DfsNode]{

...

def compare(y: Node): Int = this.weight.compare(y.weight)

}

Now we can compare the nodes by their weight: “lighter” nodes are before “heavier” nodes, but the comparation is not what we need for the priority queue, we need to reverse the the ordering.

The most immediate way is just reverse DfsNode's comparation function:

def compare(y: Node): Int = -this.weight.compare(y.weight)

but this has the undesirable effect of reversing all the node comparations, and most of the time I'll bet we want to have the lighter nodes before the heavier ones, the reverse is counter-intuitive (and the potential source of nightmares). The solution? Let's create our particular flavor of DfsNode with the comparation reversed to use it in the PriorityQeue:

class PQNode extends DfsNode {

override def compare(y: Node): Int = -super.compare(y)

}

And we just need to change our newNode method to return PQNodes instead of DfsNodes:

protected def newNode: Node = new PQNode

For extra points on modularization, we can extract those changes and the code related to the algorithm in a subclass of DfsGraph.

Here is the complete code:




package dfs2;

abstract class Graph {
type Edge
type Node <: NodeIntf
abstract class NodeIntf {
def connectWith(node: Node): Edge
}
def nodes: Set[Node]
def edges: Set[Edge]
def addNode: Node
}

abstract class DirectedGraph extends Graph {
type Edge <: EdgeImpl
class EdgeImpl(origin: Node, dest: Node) {
def from = origin
def to = dest
}

class NodeImpl extends NodeIntf {
self: Node =>
def connectWith(node: Node): Edge = {
val edge = newEdge(this, node)
edges = edges + edge
edge
}
}

protected def newNode: Node
protected def newEdge(from: Node, to: Node): Edge

var nodes: Set[Node] =Set()
var edges: Set[Edge] =Set()
def addNode: Node = {
val node = newNode
nodes = nodes + node
node
}
}

trait Weight {
var weight= Float.PositiveInfinity
}

import scala.collection.mutable.PriorityQueue;

class DfsGraph extends DirectedGraph {
class DfsNode extends NodeImpl with Weight with Ordered[DfsNode]{
var previous: DfsNode = _
var label:String = _
var nodeEdges: Set[Edge] = Set()

override def connectWith(node: Node): Edge = {
val newEdge=super.connectWith(node)
nodeEdges = nodeEdges + newEdge
newEdge
}
def -->(n2:Node):DfsEdge =connectWith(n2)

override def toString()= label+ " w:"+weight
def compare(y: Node): Int = this.weight.compare(y.weight)
}


class DfsEdge(origin:Node, dest:Node) extends EdgeImpl(origin, dest) with Weight {
override def toString()= from.label+"-->"+to.label+" w:"+weight
}

def addNewNode(l:String):Node={
val newNode=super.addNode
newNode.label=l
newNode
}

type Node= DfsNode
type Edge= DfsEdge

protected def newEdge(from: Node, to: Node): Edge = new DfsEdge(from,to)
protected def newNode: Node = new DfsNode

}

class DijkstraSPGraph extends DfsGraph {

class PQNode extends DfsNode {
override def compare(y: Node): Int = -super.compare(y)
}

override protected def newNode: Node = new PQNode

def shortestPath(start: DfsNode, end: DfsNode) = {
val unvisited=new PriorityQueue[Node]()
unvisited++=(nodes)
start.weight=0
while (!unvisited.isEmpty) {
val vertx=unvisited.dequeue
for (v <- vertx.nodeEdges if canImprove(v)){
unvisited+improveDistance(v)
}
}
}

def canImprove(a:Edge)=(a.from.weight+a.weight< a.to.weight)

def improveDistance(a:Edge) ={
val v=a.to
v.weight=a.from.weight+a.weight
v.previous=a.from
v
}

def pathToStart(end:DfsNode):List[DfsNode] = {
if (end == null)
Nil
else
end :: pathToStart(end.previous)
}
}


object Example extends Application {

val graph = new DijkstraSPGraph

val n1=graph addNewNode "start"
val n2=graph addNewNode "n2"
val n3=graph addNewNode "n3"
val n4=graph addNewNode "n4"
val n5=graph addNewNode "n5"
val n6=graph addNewNode "end"
graph addNewNode "alone"

n1-->n2 weight=2
n1-->n3 weight=1
n2-->n4 weight=1
n3-->n4 weight=3
n2-->n5 weight=1
n4-->n6 weight=1
n5-->n6 weight=3

n1-->n6 weight=7
n1-->n4 weight=10

graph.shortestPath(n1,n6)
println("Path")
graph.pathToStart(n6).reverse.map(println(_))

}

12/14/08

Playing with Scala 5: improving code, use of views and comprehensions

After reading about different optimization to pathfinding algorithms, I decided to improve my Dijkstra implementation by using a priority queue instead of a set for the unvisited nodes. Using the “weight” of the vertex as the priority, we can avoid searching the whole set for the minimum and we reduce the cost of the operation from O(n) to O(log n).

The first change is replace the set with Scala's priority queue:


val unvisited=new PriorityQueue[Node]()




But as it is, it will not compile: the priority queue's signature is


class PriorityQueue[A](implicit view$1 : (A) => Ordered[A]) extends ResizableArray[A] with CloneableCollection


You need to provide an implicit conversion from the parameter type A to the type Ordered[A]. But nothing to worry about: is as easy as


implicit def node2ordered(n: Node): Ordered[Node] = new Ordered[Node] {

//reverse the comparation so highest priority = lower cost

def compare(y: Node): Int = -(n.weight.compare(y.weight))

}


Look at the improveDistance function, it does two things: filters the nodes and updates the distance if we can improve it. I think is better if we split into two different functions, separating the filtering from the updating, so instead of:


def improveDistance(a:Edge) ={
if (a.from.weight+a.weight< a.to.weight) {
a.to.weight=a.from.weight+a.weight
a.to.previous=a.from
}
}


we get:

def canImprove(a:Edge)=(a.from.weight+a.weight< a.to.weight)

and

def improveDistance(a:Edge) ={

a.to.weight=a.from.weight+a.weight

a.to.previous=a.from

a.to

}


Then, instead of a function map over the nodeEdges, we have to filter and then map:


vertx.nodeEdges.map(improveDistance(_))


is replaced by


vertx.nodeEdges.filter(canImprove(_))).map(improveDistance(_)


that gives a better picture of the steps involved.

As we need to add the updated nodes to the priority queue so they can “float” to the top, the line would look like:


unvisited++ (vertx.nodeEdges.filter(canImprove(_))).map(improveDistance(_))


But I don't really think the explicit use of map and filter helps readability.

Enter Scala's for comprehensions: we can replace the map/filter with a comprehension:


for (v <- vertx.nodeEdges if canImprove(v)){

unvisited+improveDistance(v)

}

Nothing fantastic, but an improvement, isn't it?

For future posts, I'm planning on generalizing the algorithm into a A* pathfinding with different heuristic functions... stay tuned!

10/22/08

Simple Java tricks to protect your web application against SQL injection

Your application is vulnerable to SQL Injection when you send unfiltered strings to the database. Most modern ORM frameworks should take care of it (but don't take my word!... go ahead and check how secure your framework is).
Sometimes, you have to work with plain JDBC (or ODBC). Here is a couple of tricks that help:
  1. First and foremost, avoid concatenating strings for SQL queries. Use prepared statements unless is not possible (i.e. cases when you have undefined number of parameters)
  2. Leverage the language type system: If you're passing a number, use Integer instead of String... any invalid character will fail the conversion and will not reach the DB.
  3. If there's no option but concatenate strings, make sure the database comment quotes are escaped (for example, in DB2 you have to replace the single quote character with 2 single quote characters: instead of "SELECT * FROM users WHERE name='"+param+"'" use "SELECT * FROM users WHERE name='"+param.replaceAll("'","''")+"'"
For something a little more advanced, you can wrap the strings in some kind of "EscapedString" class, and use that class in the signature of the DAOs (related to 2. )

Note: by no means this is a comprehensive list. Application security is very hard, check your database documentation...

9/24/08

Java: The programming tool of choice for discriminating hackers

In spite of all the Java bashing that's so in vogue this days, Team Smartass won the ICFP Programming Contest using Java, giving bragging rights to say that Java is "The programming tool of choice for discriminating hackers".
while(tongue in cheek){
take(this);
(lisp (snobs all))
fuctional_programing = Maybe[zealots]
ruby('fanboys')
}
Another proof of what matters is not the tool, is how you use it...

8/25/08

State machines using "Fluent interfaces/ DSL" in Java

I always found state machines very useful for describing particular aspects of object behavior. A classic example can be a shopping order: it can be "pending","in progress", "shipped", etc.
In my previous project, I started with a simple design using a transition matrix. I must confess it didn't work well. It was hard to explain and it grew more complex as new requirements were added. I though it would be simpler if I didn't use any "fancy OO stuff" and advanced idioms. I've should know better!
This time I wanted to try something different, and based in my previous experiments with internal DSLs with Scala and my (very limited) knowledge of the patters to achieve that in java, I just went ahead and tried to come up with something useful and understandable. This time seems to be working: although it has some "magic" behind curtains is only in the creation and is very easy to add or modify the configuration class for new or updated states and events. Other members of my project were able to add and remove states and transitions with only a brief explanation.

The basic classes are pretty simple: State and Event.

package statemachine;


public class Event {

final private String label;

public Event(String newLabel){

label=newLabel;

}

/**

* @return Returns the label.

*/

public String getLabel() {

return label;

}

public String toString(){

return label;

}

}


package statemachine;

import java.util.HashMap;

import java.util.Map;


public class State {

private final String label;

private final Map<Event,Transition> transitions= new HashMap<Event,Transition>();

//no public access to the constructor, must be in the same package

State(String newLabel){

this.label=newLabel;

}

void addTransition(Transition t){

transitions.put(t.getEvent(),t);

}

public State doEvent(Event e){

return (transitions.get(e)).getDestination();

}

/**

* @return Returns the label.

*/

public String getLabel() {

return label;

}

/**

* @return Returns the transitions.

*/

public Map<Event,Transition> getTransitions() {

return transitions;

}

}


You send a event to a sate and it returns the next state:
State newState=state.doEvent(event);
The State class holds a map of [event->state] (with the Transition class in the middle, for added effect :) )
As an extra safety measure, the State constructor and the addTransition method is accessible only to the package (well, unless you subclass it)
The "fluency" is provided by the Transition class (the useful methods return this to allow method chaining), so you can write:
new Transition(event).from(origin).to(destination)

package statemachine;

class Transition {

private State origin,destination;

private Event event;

public Transition(Event e){

event=e;

}

public Transition from(State orig){

origin=orig;

origin.addTransition(this);

return this;

}

public Transition to(State dest){

destination=dest;

return this;

}

/**

* @return Returns the destination.

*/

public State getDestination() {

return destination;

}

/**

* @return Returns the event.

*/

public Event getEvent() {

return event;

}

/**

* @return Returns the origin.

*/

public State getOrigin() {

return origin;

}

}



How this will look? Suppose you have a pretty simple FSM:















Using the previous pseudoDSL/Fluent interface will look like:

package statemachine;


public class FSMDef {

public final static State SUBMITTED=new State("Submitted");

public final static State OPEN= new State("Open");

public final static State CANCELLED= new State("Cancelled");

public final static State CLOSED= new State("Closed");

//Events

static class Events {

public final static Event OPEN = new Event("Open");

public final static Event CLOSE = new Event("Close");

public final static Event REOPEN = new Event("Re-Open");

public final static Event CANCEL = new Event("Cancel");

}

static {

new Transition(Events.OPEN).from(SUBMITTED).to(OPEN);

new Transition(Events.CLOSE).from(OPEN).to(CLOSED);

new Transition(Events.REOPEN).from(CLOSED).to(OPEN);

new Transition(Events.CANCEL).from(OPEN).to(CANCELLED);

}

}



Doesn't seems too complex, right?
I'm just starting with this, and probably barely skim the surface (sure it has some drawbacks), but so far it worked :)

[EDIT: Fixed the generics declarations that were eaten by the HTML format. BTW, if anybody knows a good code formatter for blog posts, let me know ]

7/21/08

Scala: fold/reduce cheatsheet

My fold/reduce 'cheat sheet' is something like:

For the list (a,b,c,d) fold/reduce is:

reduceLeft(f) => f(f(f(a,b),c),d)

foldLeft(Z)(f) => f(f(f(f(Z,a),b),c),d)

("applies the function from the left")


reduceRight(f) => f(a,f(b,f(c,d)))

foldRight(Z)(f) => f(a,f(b,f(c,f(d,Z))))

("
applies the function from the right")

[EDIT: as you see, fold and reduce are essentially the same, but fold starts with an outside element ]

5/30/08

Questions, only questions


Today I'm full of questions. Software development and programming is hard. Is a close encounter with layers and layers of complexity. Once we master one level, we open a whole new level. To tackle it one must employ multiple techniques. But they always involve trade-offs. How to balance programming using standard and maybe cumbersome idioms vs more elegant but more obscure idioms? What if the programming style is too personal and strange to others? Shall we settle for the lowest common denominator ? How do we balance the conflict between something harder to understand at first but easier to extend and maintain against something more familiar but with exponential complexity growth after each change? How do we maintain the "conceptual integrity" (F. Brooks) through the life of the system? As the project complexity grows, the need for sophisticated techniques increases. (I'm coming to the conclusion that if a developer can't comprehend some level of complexity, shouldn't be allowed to participate in the project without close mentoring). Refactoring is necessary because entropy. Every code base starts to degrade and loose integrity as evolves. I wonder if there's a code entropy metric... At what point a refactoring is good enough? When do you stop? At least your architecture should be stable, or you didn't validate it. I guess complexity is in the eye of the beholder. I found that a system composed of smaller simple pieces with complex interactions is easier to evolve than bigger pieces with simple interactions because each piece is internally complex and the limited interactions means its harder to combinate them in new ways. Of course, gratuitous complexity should be avoided at all costs. Sometimes I look for a "zen" approach to coding. At the end, I always take a pragmatic approach and find a trade-off that balances the forces. Isn't coding about that? Each line you write is a choice you need to make... At some point we need to use "introspection" and "metaprogramming" but not the programming techniques: introspection on how we approach problem solving and programming and metaprogramming on what we can do to program better.

2/3/08

Playing with Scala 4: Abstract types and Self-types

Thanks to the scala mailing list, and to Eric, I have a satisfying enough model of a graph to be used by the Dijkstra's algorithm.
(well, there already was a generic graph and directed graph written in Scala... talking about reinventing the wheel!. The solution in Eric's comment works great too... is almost the same, but he was faster than me on coming up with it :D )
http://www.scala-lang.org/intro/abstracttypes.html
http://www.scala-lang.org/intro/selfrefs.html
I just used the same code, and changed the nodes and edges to use sets instead of lists (really, we don't care about the order)
The Scala example of Graph uses abstract types: it declares type Node and type Edge as type "parameters" to be defined by concrete implementation. Even more, by declaring type Node <: NodeIntf, we're restricting Node to be subtypes of NodeIntf. The declaration "self: Node =>" tells "I'll use self as Node type"
Then I declared the trait Weighted and extended the class DirectedGraph adding my extensions to NodeImpl and EdgeImpl to use weighted trait and add the node label and the previous property. I had to add a collection of the adjacent arcs, because they weren't in the original Graph and is easier for the algorithm. I overrode the connectWith method to keep track of the adjacent arcs as they're added, also I defined our --> operator to call the connectWith method.

class DfsNode extends NodeImpl with Weighted {
var previous: DfsNode = _
var label:String = _
var nodeEdges: Set[Edge] = Set()

override def connectWith(node: Node): Edge = {
val newEdge=super.connectWith(node)
nodeEdges = nodeEdges + newEdge
newEdge
}

def -->(n2:Node):DfsEdge =connectWith(n2)

override def toString()= label+ " w:"+weight
}

Edges are simpler, just add the weight and override toString:
class DfsEdge(origin:Node, dest:Node) extends EdgeImpl(origin, dest) with Weighted {
override def toString()= from.label+"-->"+to.label+" w:"+weight
}

All are enclosed in the DfsGraph class extending the DirectedGraph class, now I can assign the concrete types for Node and Edge:
type Node= DfsNode
type Edge= DfsEdge

The DirectedGraph class has a addNode method. To keep the DSL-ish quality of the construction of the graph, I added the method addNewNode to take the node label as parameter
def addNewNode(l:String):Node={
val newNode=super.addNode
newNode.label=l
newNode
}

So we can write: graph addNewNode "nodeLabel".

The example looks like this:

object Example extends Application {

val graph = new DfsGraph

val n1=graph addNewNode "start"
val n2=graph addNewNode "n2"
val n3=graph addNewNode "n3"
val n4=graph addNewNode "n4"
val n5=graph addNewNode "n5"
val n6=graph addNewNode "end"

n1-->n2 weight=2
n1-->n3 weight=1
n2-->n4 weight=1
n3-->n4 weight=3
n2-->n5 weight=1
n4-->n6 weight=1
n5-->n6 weight=3

graph.shortestPath(n1,n6)
println("Path")
graph.pathToStart(n6).reverse.map(println(_))

}

Pretty easy to read, don't you think?
In the next post, I'll show you how we can use this algorithm to solve the "Abbott's revenge" type of maze. Ideally, I want to create an interpreter for the input, exploring Scala's features for creating language interpreters.
Here is the complete code:

package dfs2;

abstract class Graph {
type Edge
type Node <: NodeIntf
abstract class NodeIntf {
def connectWith(node: Node): Edge
}
def nodes: Set[Node]
def edges: Set[Edge]
def addNode: Node
}

abstract class DirectedGraph extends Graph {
type Edge <: EdgeImpl
class EdgeImpl(origin: Node, dest: Node) {
def from = origin
def to = dest
}

class NodeImpl extends NodeIntf {
self: Node =>
def connectWith(node: Node): Edge = {
val edge = newEdge(this, node)
edges = edges + edge
edge
}
}

protected def newNode: Node
protected def newEdge(from: Node, to: Node): Edge

var nodes: Set[Node] =Set()
var edges: Set[Edge] =Set()
def addNode: Node = {
val node = newNode
nodes = nodes + node
node
}
}

trait Weighted {
var weight= Float.PositiveInfinity
}
class DfsGraph extends DirectedGraph {
class DfsNode extends NodeImpl with Weighted {
var previous: DfsNode = _
var label:String = _
var nodeEdges: Set[Edge] = Set()

override def connectWith(node: Node): Edge = {
val newEdge=super.connectWith(node)
nodeEdges = nodeEdges + newEdge
newEdge
}

def -->(n2:Node):DfsEdge =connectWith(n2)

override def toString()= label+ " w:"+weight
}

class DfsEdge(origin:Node, dest:Node) extends EdgeImpl(origin, dest) with Weighted {
override def toString()= from.label+"-->"+to.label+" w:"+weight
}

def addNewNode(l:String):Node={
val newNode=super.addNode
newNode.label=l
newNode
}

type Node= DfsNode
type Edge= DfsEdge

protected def newEdge(from: Node, to: Node): Edge = new DfsEdge(from,to)
protected def newNode: Node = new DfsNode

def shortestPath(start: DfsNode, end: DfsNode) = {
var unvisited=nodes
start.weight=0
while (!unvisited.isEmpty) {
val vertx=min(unvisited)
vertx.nodeEdges.map(improveDistance(_))
unvisited=unvisited-vertx
}
}

def improveDistance(a:Edge) ={
if (a.from.weight+a.weight< a.to.weight) {
a.to.weight=a.from.weight+a.weight
a.to.previous=a.from
}
}

def min(nodes: Set[DfsNode]): DfsNode = {
nodes.reduceLeft((a:DfsNode,b:DfsNode)=>if (a.weight<b.weight) a else b )
}

def pathToStart(end:DfsNode):List[DfsNode] = {
if (end == null)
Nil
else
end :: pathToStart(end.previous)
}
}