1. Visualization, Part 3: Getting data

This notebook will examine how to get data into your program so that you can visualize them.

1.1 Direct

The most straight-froward, if not a bit time-consuming, is to just enter the data directly into arrays in your program. This program contains two arrays: states and pop:

In [4]:
String [] states = new String[50];
int [] pop = new int[50];

states[0] = "AL";
states[1] = "AK";
states[2] = "AZ";
states[3] = "AR";
states[4] = "CA";
states[5] = "CO";
states[6] = "CT";
states[7] = "DE";
states[8] = "FL";
states[9] = "GA";
states[10] = "HI";
states[11] = "ID";
states[12] = "IL";
states[13] = "IN";
states[14] = "IA";
states[15] = "KS";
states[16] = "KY";
states[17] = "LA";
states[18] = "ME";
states[19] = "MD";
states[20] = "MA";
states[21] = "MI";
states[22] = "MN";
states[23] = "MS";
states[24] = "MO";
states[25] = "MT";
states[26] = "NE";
states[27] = "NV";
states[28] = "NH";
states[29] = "NJ";
states[30] = "NM";
states[31] = "NY";
states[32] = "NC";
states[33] = "ND";
states[34] = "OH";
states[35] = "OK";
states[36] = "OR";
states[37] = "PA";
states[38] = "RI";
states[39] = "SC";
states[40] = "SD";
states[41] = "TN";
states[42] = "TX";
states[43] = "UT";
states[44] = "VT";
states[45] = "VA";
states[46] = "WA";
states[47] = "WV";
states[48] = "WI";
states[49] = "WY";

pop[0] = 4708708;
pop[1] = 698473;
pop[2] = 6595778;
pop[3] = 2889450;
pop[4] = 36961664;
pop[5] = 5024748;
pop[6] = 3518288;
pop[7] = 885122;
pop[8] = 18537969;
pop[9] = 9829211;
pop[10] = 1295178;
pop[11] = 1545801;
pop[12] = 12910409;
pop[13] = 6423113;
pop[14] = 3007856;
pop[15] = 2818747;
pop[16] = 4314113;
pop[17] = 4492076;
pop[18] = 1318301;
pop[19] = 5699478;
pop[20] = 6593587;
pop[21] = 9969727;
pop[22] = 5266214;
pop[23] = 2951996;
pop[24] = 5987580;
pop[25] = 974989;
pop[26] = 1796619;
pop[27] = 2643085;
pop[28] = 1324575;
pop[29] = 8707739;
pop[30] = 2009671;
pop[31] = 19541453;
pop[32] = 9380884;
pop[33] = 646844;
pop[34] = 11542645;
pop[35] = 3687050;
pop[36] = 3825657;
pop[37] = 12604767;
pop[38] = 1053209;
pop[39] = 4561242;
pop[40] = 812383;
pop[41] = 6296254;
pop[42] = 24782302;
pop[43] = 2784572;
pop[44] = 621760;
pop[45] = 7882590;
pop[46] = 6664195;
pop[47] = 1819777;
pop[48] = 5654774;
pop[49] = 544270;

PShape usa;

void setup() {
    size(959, 593);  
    usa = loadShape("usa-wikipedia.svg");
}

void draw() {
    background(255);
    shape(usa, 0, 0);
    for (int i = 0; i < 50; i++) {   
        st = usa.getChild(states[i]);
        st.disableStyle();
        // Scale the color between 255 and 0:
        c = 255 * (pop[i] - min(pop))/(max(pop) - min(pop));
        // Make the colors go between blue and red:
        fill(255 - c, 0, c);
        shape(st, 0, 0); 
    }
    noLoop();
}
Sketch #3:

Sketch #3 state: Loading...

1.2 Construct a CSV file

Another method is to construct a CSV file in the notebook using the %%file magic. %%file takes the name of the file, followed by the contents of the file. The first line of the file is the "header" that provides the names of the columns:

In [4]:
%%file test.csv
"State","Population"
AL,4708708
AK,698473
AZ,6595778
AR,2889450
CA,36961664
CO,5024748
CT,3518288
DE,885122
FL,18537969
GA,9829211
HI,1295178
ID,1545801
IL,12910409
IN,6423113
IA,3007856
KS,2818747
KY,4314113
LA,4492076
ME,1318301
MD,5699478
MA,6593587
MI,9969727
MN,5266214
MS,2951996
MO,5987580
MT,974989
NE,1796619
NV,2643085
NH,1324575
NJ,8707739
NM,2009671
NY,19541453
NC,9380884
ND,646844
OH,11542645
OK,3687050
OR,3825657
PA,12604767
RI,1053209
SC,4561242
SD,812383
TN,6296254
TX,24782302
UT,2784572
VT,621760
VA,7882590
WA,6664195
WV,1819777
WI,5654774
WY,544270
Created file '/home/dblank/Public/CS110 Intro to Computing/2015/Lectures/test.csv'.

You can then load the CSV file in using the following import and loadTable() function. There are a few things of interest:

  • Need to use import processing.table.*;
  • Use loadTable(FILENAME, "header"); to load the data
  • Use table.getRowCount() to get the total number of lines of data; doesn't count header row
  • Use new form of for to loop through data, like: for (TableRow row : table.rows()) { ...}
  • Use row.getString(COLUMN_NAME), row.getInt(COLUMN_NAME), or row.getFloat(COLUMN_NAME) to get the value of that row/column
In [5]:
import processing.table.*;

Table table;

void setup() {
    table = loadTable("test.csv", "header");
    println(table.getRowCount() + " total rows in table"); 
    size(959, 593);  
    usa = loadShape("usa-wikipedia.svg");
}

long findMax() {
    int retval = 0;
    for (TableRow row : table.rows()) {
        pop = row.getInt("Population");
        if (pop > retval)
            retval = pop;
    }
    return retval;
}

void draw() {
    background(255);
    shape(usa, 0, 0);
    max = findMax();
    for (TableRow row : table.rows()) {
        state = row.getString("State");
        pop = row.getInt("Population");
        st = usa.getChild(state);
        st.disableStyle();
        // Portion of 255:
        c = 255 * pop/max;
        // From red to black:
        fill(255 - c, 0, 0);
        shape(st, 0, 0); 
    }
    noLoop();
}
Sketch #4:

Sketch #4 state: Loading...
In [8]:
import processing.table.*;

Table table;

void setup() {
    size(500, 500);
    table = loadTable("test.csv", "header");
}

long findSum() {
    int retval = 0;
    for (TableRow row : table.rows()) {
        pop = row.getInt("Population");
        retval += pop;
    }
    return retval;
}

int findMax() {
    int mmax = 0;
    for (TableRow row : table.rows()) {
        pop = row.getInt("Population");
        if (pop > mmax)
            mmax = pop;
    }
    return mmax;
}

int findMin() {
    int mmin = 999999999999;
    for (TableRow row : table.rows()) {
        pop = row.getInt("Population");
        if (pop < mmin)
            mmin = pop;
    }
    return mmin;
}


int[] rotateAround(int x, int y, int length, int angle) {
    return new int[2] {x + length * cos(angle),
                       y - length * sin(angle)};
}

void draw() {
    background(255);
    sum = findSum();
    // First, draw the pies:
    start = 0;
    for (TableRow row : table.rows()) {
        state = row.getString("State");
        pop = row.getInt("Population");
        //fill(0);
        fill(255 - 255 * (pop - findMin())/(findMax() - findMin()), 0, 0);
        arc(width/2, height/2, width, height, start, start + pop/sum * 2 * PI);
        start += pop/sum * 2 * PI; 
    }
    // Next, draw the labels:
    start = 0;
    for (TableRow row : table.rows()) {
        state = row.getString("State");
        pop = row.getInt("Population");
        int[] xy = rotateAround(width/2, height/2, height/2.2, -(start + pop/sum * PI));
        // Draw shadow, and the text:
        fill(0);
        text(state, xy[0] + 1, xy[1] + 1);
        fill(255);
        text(state, xy[0], xy[1]);
        // Increment by the size of the pie:
        start += pop/sum * 2 * PI; 
    }
    noLoop();
}
Sketch #6:

Sketch #6 state: Loading...

1.3 Download CSV

Finally, you can find a CSV file (or make one in Excel or another Spreadsheet program) and either upload it to you notebook server, or download it right here from the Internet:

In [65]:
%download http://www.departments.bucknell.edu/biology/resources/msw3/export.asp -f mammals.csv
Downloaded 'mammals.csv'.

This takes the first 101 lines and stores it in a file called mammals_100.csv.

In [9]:
!head -101 mammals.csv > mammals_100.csv

This checks to see how many lines are in a file:

In [10]:
! cat -n mammals.csv | tail -1

This file has 13,583 lines.

This command lists the first line of the file to see the headers:

In [11]:
!head -1 mammals_100.csv

Finally, we load the file and print out one column:

In [71]:
import processing.table.*;

Table table;

void setup() {
    table = loadTable("mammals_100.csv", "header");
    println(table.getRowCount() + " total rows in table"); 
    size(959, 593);  
    usa = loadShape("usa-wikipedia.svg");
}

void draw() {
    background(255);
    for (TableRow row : table.rows()) {
        order = row.getString("Species");
        println(order);
    }
    noLoop();
}
Sketch #52:

Sketch #52 state: Loading...

1.4 Latitude, Longitude data in CSV: Plotting points on map

For this exmaple, consider that we have latitude/longitude data that we would like to plot in specific locations on the map.

First, we make a CSV file:

In [16]:
%%file states.csv
"Latitude","Longitude","City","State"
33.57,86.75,Birmingham,AL
34.65,86.77,Huntsville,AL
30.68,88.25,Mobile,AL
32.30,86.40,Montgomery,AL
32.34,86.99,Selma,AL
31.87,86.02,Troy,AL
33.23,87.62,Tuscaloosa,AL
Created file '/home/dblank/Public/CS110 Intro to Computing/2015/Lectures/states.csv'.

And then we read it in, getting the data from the file and displaying in a visually-pleasing way.

In [21]:
import processing.table.*;

PShape usa;
Table table;

void setup() {
    table = loadTable("states.csv", "header");
    size(959, 593);  
    usa = loadShape("usa-wikipedia.svg");
}

float[] albers(lat, lng) {
    lat0 =  23.0 * (PI/180);   // Latitude_Of_Origin
    lng0 = -96.0 * (PI/180);   // Central_Meridian
    phi1 =  30.0 * (PI/180);   // Standard_Parallel_1
    phi2 =  50.0 * (PI/180);   // Standard_Parallel_2

    n = 0.5 * (sin(phi1) + sin(phi2));
    c = cos(phi1);
    C = c * c + 2 * n * sin(phi1);
    p0 = sqrt(C - 2 * n * sin(lat0)) / n;
    theta = n * (lng * PI/180 - lng0);
    p = sqrt(C - 2 * n * sin(lat * PI/180)) / n;
    x = p * sin(theta);
    y = p0 - p * cos(theta);
    return new float[2] { x, y };
}

void plot(lat, lon, city, c, radius) {
    // Values to scale the lat, lon to fit on the USA map:
    xoffset = 485;
    xscale = 1245;
    yoffset = 630;    
    yscale = 1250;
    
    float[2] xy = albers(lat, lon);
    fill(c);
    ellipse(xoffset + xy[0] * xscale, yoffset - xy[1] * yscale, radius, radius);
    fill(0);
    text(city, xoffset + xy[0] * xscale + 1, yoffset - xy[1] * yscale + 1);
    fill(255);
    text(city, xoffset + xy[0] * xscale, yoffset - xy[1] * yscale);
}

void draw() {
    background(255);
    shape(usa, 0, 0);  
    for (TableRow row : table.rows()) {
        lat = row.getFloat("Latitude");
        lon = row.getFloat("Longitude");
        city = row.getString("City");
        plot(lat, -lon, city, color(random(255), random(255), random(255), 128), random(5, 20));
    }
    noLoop();
}
Sketch #18:

Sketch #18 state: Loading...