{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# PCA - Principal Component Analysis\n", "\n", "**Problem**: you have a multidimensional set of data (such as a set of hidden unit activations) and you want to see which points are closest to others.\n", "\n", "PCA allows you to identify the dimensions of greatest variance, to the dimensions of least variance. PCA1 has greatest variance.\n", "\n", "## Example\n", "\n", "Let's look at a dataset that has nothing to do with networks: measurements of flowers, specifically Irises." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from sklearn import datasets\n", "iris = datasets.load_iris()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "['sepal length (cm)',\n", " 'sepal width (cm)',\n", " 'petal length (cm)',\n", " 'petal width (cm)']" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "iris.get(\"feature_names\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "https://rpubs.com/sarthakdasadia11/iris" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([[ 5.1, 3.5, 1.4, 0.2],\n", " [ 4.9, 3. , 1.4, 0.2],\n", " [ 4.7, 3.2, 1.3, 0.2],\n", " [ 4.6, 3.1, 1.5, 0.2],\n", " [ 5. , 3.6, 1.4, 0.2],\n", " [ 5.4, 3.9, 1.7, 0.4],\n", " [ 4.6, 3.4, 1.4, 0.3],\n", " [ 5. , 3.4, 1.5, 0.2],\n", " [ 4.4, 2.9, 1.4, 0.2],\n", " [ 4.9, 3.1, 1.5, 0.1],\n", " [ 5.4, 3.7, 1.5, 0.2],\n", " [ 4.8, 3.4, 1.6, 0.2],\n", " [ 4.8, 3. , 1.4, 0.1],\n", " [ 4.3, 3. , 1.1, 0.1],\n", " [ 5.8, 4. , 1.2, 0.2],\n", " [ 5.7, 4.4, 1.5, 0.4],\n", " [ 5.4, 3.9, 1.3, 0.4],\n", " [ 5.1, 3.5, 1.4, 0.3],\n", " [ 5.7, 3.8, 1.7, 0.3],\n", " [ 5.1, 3.8, 1.5, 0.3],\n", " [ 5.4, 3.4, 1.7, 0.2],\n", " [ 5.1, 3.7, 1.5, 0.4],\n", " [ 4.6, 3.6, 1. , 0.2],\n", " [ 5.1, 3.3, 1.7, 0.5],\n", " [ 4.8, 3.4, 1.9, 0.2],\n", " [ 5. , 3. , 1.6, 0.2],\n", " [ 5. , 3.4, 1.6, 0.4],\n", " [ 5.2, 3.5, 1.5, 0.2],\n", " [ 5.2, 3.4, 1.4, 0.2],\n", " [ 4.7, 3.2, 1.6, 0.2],\n", " [ 4.8, 3.1, 1.6, 0.2],\n", " [ 5.4, 3.4, 1.5, 0.4],\n", " [ 5.2, 4.1, 1.5, 0.1],\n", " [ 5.5, 4.2, 1.4, 0.2],\n", " [ 4.9, 3.1, 1.5, 0.1],\n", " [ 5. , 3.2, 1.2, 0.2],\n", " [ 5.5, 3.5, 1.3, 0.2],\n", " [ 4.9, 3.1, 1.5, 0.1],\n", " [ 4.4, 3. , 1.3, 0.2],\n", " [ 5.1, 3.4, 1.5, 0.2],\n", " [ 5. , 3.5, 1.3, 0.3],\n", " [ 4.5, 2.3, 1.3, 0.3],\n", " [ 4.4, 3.2, 1.3, 0.2],\n", " [ 5. , 3.5, 1.6, 0.6],\n", " [ 5.1, 3.8, 1.9, 0.4],\n", " [ 4.8, 3. , 1.4, 0.3],\n", " [ 5.1, 3.8, 1.6, 0.2],\n", " [ 4.6, 3.2, 1.4, 0.2],\n", " [ 5.3, 3.7, 1.5, 0.2],\n", " [ 5. , 3.3, 1.4, 0.2],\n", " [ 7. , 3.2, 4.7, 1.4],\n", " [ 6.4, 3.2, 4.5, 1.5],\n", " [ 6.9, 3.1, 4.9, 1.5],\n", " [ 5.5, 2.3, 4. , 1.3],\n", " [ 6.5, 2.8, 4.6, 1.5],\n", " [ 5.7, 2.8, 4.5, 1.3],\n", " [ 6.3, 3.3, 4.7, 1.6],\n", " [ 4.9, 2.4, 3.3, 1. ],\n", " [ 6.6, 2.9, 4.6, 1.3],\n", " [ 5.2, 2.7, 3.9, 1.4],\n", " [ 5. , 2. , 3.5, 1. ],\n", " [ 5.9, 3. , 4.2, 1.5],\n", " [ 6. , 2.2, 4. , 1. ],\n", " [ 6.1, 2.9, 4.7, 1.4],\n", " [ 5.6, 2.9, 3.6, 1.3],\n", " [ 6.7, 3.1, 4.4, 1.4],\n", " [ 5.6, 3. , 4.5, 1.5],\n", " [ 5.8, 2.7, 4.1, 1. ],\n", " [ 6.2, 2.2, 4.5, 1.5],\n", " [ 5.6, 2.5, 3.9, 1.1],\n", " [ 5.9, 3.2, 4.8, 1.8],\n", " [ 6.1, 2.8, 4. , 1.3],\n", " [ 6.3, 2.5, 4.9, 1.5],\n", " [ 6.1, 2.8, 4.7, 1.2],\n", " [ 6.4, 2.9, 4.3, 1.3],\n", " [ 6.6, 3. , 4.4, 1.4],\n", " [ 6.8, 2.8, 4.8, 1.4],\n", " [ 6.7, 3. , 5. , 1.7],\n", " [ 6. , 2.9, 4.5, 1.5],\n", " [ 5.7, 2.6, 3.5, 1. ],\n", " [ 5.5, 2.4, 3.8, 1.1],\n", " [ 5.5, 2.4, 3.7, 1. ],\n", " [ 5.8, 2.7, 3.9, 1.2],\n", " [ 6. , 2.7, 5.1, 1.6],\n", " [ 5.4, 3. , 4.5, 1.5],\n", " [ 6. , 3.4, 4.5, 1.6],\n", " [ 6.7, 3.1, 4.7, 1.5],\n", " [ 6.3, 2.3, 4.4, 1.3],\n", " [ 5.6, 3. , 4.1, 1.3],\n", " [ 5.5, 2.5, 4. , 1.3],\n", " [ 5.5, 2.6, 4.4, 1.2],\n", " [ 6.1, 3. , 4.6, 1.4],\n", " [ 5.8, 2.6, 4. , 1.2],\n", " [ 5. , 2.3, 3.3, 1. ],\n", " [ 5.6, 2.7, 4.2, 1.3],\n", " [ 5.7, 3. , 4.2, 1.2],\n", " [ 5.7, 2.9, 4.2, 1.3],\n", " [ 6.2, 2.9, 4.3, 1.3],\n", " [ 5.1, 2.5, 3. , 1.1],\n", " [ 5.7, 2.8, 4.1, 1.3],\n", " [ 6.3, 3.3, 6. , 2.5],\n", " [ 5.8, 2.7, 5.1, 1.9],\n", " [ 7.1, 3. , 5.9, 2.1],\n", " [ 6.3, 2.9, 5.6, 1.8],\n", " [ 6.5, 3. , 5.8, 2.2],\n", " [ 7.6, 3. , 6.6, 2.1],\n", " [ 4.9, 2.5, 4.5, 1.7],\n", " [ 7.3, 2.9, 6.3, 1.8],\n", " [ 6.7, 2.5, 5.8, 1.8],\n", " [ 7.2, 3.6, 6.1, 2.5],\n", " [ 6.5, 3.2, 5.1, 2. ],\n", " [ 6.4, 2.7, 5.3, 1.9],\n", " [ 6.8, 3. , 5.5, 2.1],\n", " [ 5.7, 2.5, 5. , 2. ],\n", " [ 5.8, 2.8, 5.1, 2.4],\n", " [ 6.4, 3.2, 5.3, 2.3],\n", " [ 6.5, 3. , 5.5, 1.8],\n", " [ 7.7, 3.8, 6.7, 2.2],\n", " [ 7.7, 2.6, 6.9, 2.3],\n", " [ 6. , 2.2, 5. , 1.5],\n", " [ 6.9, 3.2, 5.7, 2.3],\n", " [ 5.6, 2.8, 4.9, 2. ],\n", " [ 7.7, 2.8, 6.7, 2. ],\n", " [ 6.3, 2.7, 4.9, 1.8],\n", " [ 6.7, 3.3, 5.7, 2.1],\n", " [ 7.2, 3.2, 6. , 1.8],\n", " [ 6.2, 2.8, 4.8, 1.8],\n", " [ 6.1, 3. , 4.9, 1.8],\n", " [ 6.4, 2.8, 5.6, 2.1],\n", " [ 7.2, 3. , 5.8, 1.6],\n", " [ 7.4, 2.8, 6.1, 1.9],\n", " [ 7.9, 3.8, 6.4, 2. ],\n", " [ 6.4, 2.8, 5.6, 2.2],\n", " [ 6.3, 2.8, 5.1, 1.5],\n", " [ 6.1, 2.6, 5.6, 1.4],\n", " [ 7.7, 3. , 6.1, 2.3],\n", " [ 6.3, 3.4, 5.6, 2.4],\n", " [ 6.4, 3.1, 5.5, 1.8],\n", " [ 6. , 3. , 4.8, 1.8],\n", " [ 6.9, 3.1, 5.4, 2.1],\n", " [ 6.7, 3.1, 5.6, 2.4],\n", " [ 6.9, 3.1, 5.1, 2.3],\n", " [ 5.8, 2.7, 5.1, 1.9],\n", " [ 6.8, 3.2, 5.9, 2.3],\n", " [ 6.7, 3.3, 5.7, 2.5],\n", " [ 6.7, 3. , 5.2, 2.3],\n", " [ 6.3, 2.5, 5. , 1.9],\n", " [ 6.5, 3. , 5.2, 2. ],\n", " [ 6.2, 3.4, 5.4, 2.3],\n", " [ 5.9, 3. , 5.1, 1.8]])" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "iris.data" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", " 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", " 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n", " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n", " 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "iris.target" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [], "source": [ "%matplotlib notebook" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from sklearn.decomposition import PCA" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": true }, "outputs": [], "source": [ "pca = PCA(n_components=2)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "PCA(copy=True, n_components=2, whiten=False)" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pca.fit(iris.data)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [], "source": [ "X = pca.transform(iris.data)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "data": { "application/javascript": [ "/* Put everything inside the global mpl namespace */\n", "window.mpl = {};\n", "\n", "mpl.get_websocket_type = function() {\n", " if (typeof(WebSocket) !== 'undefined') {\n", " return WebSocket;\n", " } else if (typeof(MozWebSocket) !== 'undefined') {\n", " return MozWebSocket;\n", " } else {\n", " alert('Your browser does not have WebSocket support.' +\n", " 'Please try Chrome, Safari or Firefox ≥ 6. ' +\n", " 'Firefox 4 and 5 are also supported but you ' +\n", " 'have to enable WebSockets in about:config.');\n", " };\n", "}\n", "\n", "mpl.figure = function(figure_id, websocket, ondownload, parent_element) {\n", " this.id = figure_id;\n", "\n", " this.ws = websocket;\n", "\n", " this.supports_binary = (this.ws.binaryType != undefined);\n", "\n", " if (!this.supports_binary) {\n", " var warnings = document.getElementById(\"mpl-warnings\");\n", " if (warnings) {\n", " warnings.style.display = 'block';\n", " warnings.textContent = (\n", " \"This browser does not support binary websocket messages. \" +\n", " \"Performance may be slow.\");\n", " }\n", " }\n", "\n", " this.imageObj = new Image();\n", "\n", " this.context = undefined;\n", " this.message = undefined;\n", " this.canvas = undefined;\n", " this.rubberband_canvas = undefined;\n", " this.rubberband_context = undefined;\n", " this.format_dropdown = undefined;\n", "\n", " this.image_mode = 'full';\n", "\n", " this.root = \$('
');\n", " this._root_extra_style(this.root)\n", " this.root.attr('style', 'display: inline-block');\n", "\n", " \$(parent_element).append(this.root);\n", "\n", " this._init_header(this);\n", " this._init_canvas(this);\n", " this._init_toolbar(this);\n", "\n", " var fig = this;\n", "\n", " this.waiting = false;\n", "\n", " this.ws.onopen = function () {\n", " fig.send_message(\"supports_binary\", {value: fig.supports_binary});\n", " fig.send_message(\"send_image_mode\", {});\n", " fig.send_message(\"refresh\", {});\n", " }\n", "\n", " this.imageObj.onload = function() {\n", " if (fig.image_mode == 'full') {\n", " // Full images could contain transparency (where diff images\n", " // almost always do), so we need to clear the canvas so that\n", " // there is no ghosting.\n", " fig.context.clearRect(0, 0, fig.canvas.width, fig.canvas.height);\n", " }\n", " fig.context.drawImage(fig.imageObj, 0, 0);\n", " };\n", "\n", " this.imageObj.onunload = function() {\n", " this.ws.close();\n", " }\n", "\n", " this.ws.onmessage = this._make_on_message_function(this);\n", "\n", " this.ondownload = ondownload;\n", "}\n", "\n", "mpl.figure.prototype._init_header = function() {\n", " var titlebar = \$(\n", " '
');\n", " var titletext = \$(\n", " '
');\n", " titlebar.append(titletext)\n", " this.root.append(titlebar);\n", " this.header = titletext[0];\n", "}\n", "\n", "\n", "\n", "mpl.figure.prototype._canvas_extra_style = function(canvas_div) {\n", "\n", "}\n", "\n", "\n", "mpl.figure.prototype._root_extra_style = function(canvas_div) {\n", "\n", "}\n", "\n", "mpl.figure.prototype._init_canvas = function() {\n", " var fig = this;\n", "\n", " var canvas_div = \$('
');\n", "\n", " canvas_div.attr('style', 'position: relative; clear: both; outline: 0');\n", "\n", " function canvas_keyboard_event(event) {\n", " return fig.key_event(event, event['data']);\n", " }\n", "\n", " canvas_div.keydown('key_press', canvas_keyboard_event);\n", " canvas_div.keyup('key_release', canvas_keyboard_event);\n", " this.canvas_div = canvas_div\n", " this._canvas_extra_style(canvas_div)\n", " this.root.append(canvas_div);\n", "\n", " var canvas = \$('');\n", " canvas.addClass('mpl-canvas');\n", " canvas.attr('style', \"left: 0; top: 0; z-index: 0; outline: 0\")\n", "\n", " this.canvas = canvas[0];\n", " this.context = canvas[0].getContext(\"2d\");\n", "\n", " var rubberband = \$('');\n", " rubberband.attr('style', \"position: absolute; left: 0; top: 0; z-index: 1;\")\n", "\n", " var pass_mouse_events = true;\n", "\n", " canvas_div.resizable({\n", " start: function(event, ui) {\n", " pass_mouse_events = false;\n", " },\n", " resize: function(event, ui) {\n", " fig.request_resize(ui.size.width, ui.size.height);\n", " },\n", " stop: function(event, ui) {\n", " pass_mouse_events = true;\n", " fig.request_resize(ui.size.width, ui.size.height);\n", " },\n", " });\n", "\n", " function mouse_event_fn(event) {\n", " if (pass_mouse_events)\n", " return fig.mouse_event(event, event['data']);\n", " }\n", "\n", " rubberband.mousedown('button_press', mouse_event_fn);\n", " rubberband.mouseup('button_release', mouse_event_fn);\n", " // Throttle sequential mouse events to 1 every 20ms.\n", " rubberband.mousemove('motion_notify', mouse_event_fn);\n", "\n", " rubberband.mouseenter('figure_enter', mouse_event_fn);\n", " rubberband.mouseleave('figure_leave', mouse_event_fn);\n", "\n", " canvas_div.on(\"wheel\", function (event) {\n", " event = event.originalEvent;\n", " event['data'] = 'scroll'\n", " if (event.deltaY < 0) {\n", " event.step = 1;\n", " } else {\n", " event.step = -1;\n", " }\n", " mouse_event_fn(event);\n", " });\n", "\n", " canvas_div.append(canvas);\n", " canvas_div.append(rubberband);\n", "\n", " this.rubberband = rubberband;\n", " this.rubberband_canvas = rubberband[0];\n", " this.rubberband_context = rubberband[0].getContext(\"2d\");\n", " this.rubberband_context.strokeStyle = \"#000000\";\n", "\n", " this._resize_canvas = function(width, height) {\n", " // Keep the size of the canvas, canvas container, and rubber band\n", " // canvas in synch.\n", " canvas_div.css('width', width)\n", " canvas_div.css('height', height)\n", "\n", " canvas.attr('width', width);\n", " canvas.attr('height', height);\n", "\n", " rubberband.attr('width', width);\n", " rubberband.attr('height', height);\n", " }\n", "\n", " // Set the figure to an initial 600x600px, this will subsequently be updated\n", " // upon first draw.\n", " this._resize_canvas(600, 600);\n", "\n", " // Disable right mouse context menu.\n", " \$(this.rubberband_canvas).bind(\"contextmenu\",function(e){\n", " return false;\n", " });\n", "\n", " function set_focus () {\n", " canvas.focus();\n", " canvas_div.focus();\n", " }\n", "\n", " window.setTimeout(set_focus, 100);\n", "}\n", "\n", "mpl.figure.prototype._init_toolbar = function() {\n", " var fig = this;\n", "\n", " var nav_element = \$('
')\n", " nav_element.attr('style', 'width: 100%');\n", " this.root.append(nav_element);\n", "\n", " // Define a callback function for later on.\n", " function toolbar_event(event) {\n", " return fig.toolbar_button_onclick(event['data']);\n", " }\n", " function toolbar_mouse_event(event) {\n", " return fig.toolbar_button_onmouseover(event['data']);\n", " }\n", "\n", " for(var toolbar_ind in mpl.toolbar_items) {\n", " var name = mpl.toolbar_items[toolbar_ind][0];\n", " var tooltip = mpl.toolbar_items[toolbar_ind][1];\n", " var image = mpl.toolbar_items[toolbar_ind][2];\n", " var method_name = mpl.toolbar_items[toolbar_ind][3];\n", "\n", " if (!name) {\n", " // put a spacer in here.\n", " continue;\n", " }\n", " var button = \$('