package lastfmvis { // import lastfmvis.LastfmXMLLoader; import com.adobe.serialization.json.JSON; import flare.animate.Transitioner; import flare.util.Dates; import flare.vis.Visualization; import flare.vis.data.Data; import flare.vis.data.DataList; import flare.vis.data.DataSprite; import flare.vis.data.NodeSprite; import flash.events.Event; import flash.net.URLLoader; import flash.net.URLRequest; import flash.net.URLRequestMethod; import flash.net.URLVariables; public class DataProcessor { /* **************** PRIVATE VARIABLES *******************/ // Alex's API key private var API_KEY:String = "c669ec5933241d05076deab833cf48b8"; /** URL for web-application acting as tag-cache */ private var CACHE_URL:String = "http://razzlepuss.com/lastcache/"; /* ***** CONSTANTS ***** */ public static const ARTIST_LIST:String = "artists" public static const TRACK_LIST:String = "tracks"; public static const TAG_LIST:String = "tags"; // Data // EXAMPLE DATA! THIS GETS OVERWRITEN BY REAL DATA private var rawData:Array = [ { date:1221393600, artist:"Squarepusher", playcount:33 }, { date:1221393600, artist:"Air", playcount:16 }, { date:1221393600, artist:"Portishead", playcount:14 }, { date:1221393600, artist:"Ratatat", playcount:14 }, { date:1220788800, artist:"Feist", playcount:22 }, { date:1220788800, artist:"Tricky", playcount:12}, { date:1220788800, artist:"Ratatat", playcount:11}, { date:1220788800, artist:"Modest Mouse", playcount:11} ]; /** Hash based version of rawData, so we can find which weeks we have data for in the first place * Data stored as _artistListeningData = { Radiohead: { 1221393600: 24, ... }, ... } */ private var _artistListeningData:Object = {}; /** Same: * == { Radiohead: { "Karma Police": { 1221393600:10, 1221393601: 2}, "Idioteque": { 1221393600: 2, ... }, ... }, ... } */ private var _trackListeningData:Object = {}; // EXAMPLE DATA // same as rawData, but for tags private var rawTagData:Array = [ { date:1221393600, tag:"Rock", playcount:45 }, // ... ]; // Same data as rawTagData array, but in hash form. // tagCounts = { Rock: { 1221393600: 45}} // (used as intermediary format while assembling tag counts) private var _tagCounts:Object = {}; private var _dates:Array = []; private var allRawData:Array = []; private var _reshapedData:Data; private var _originalData:Data; /** List of nodes for artist data */ private var _artistData:DataList = new DataList(ARTIST_LIST); /** List of nodes for tag data */ private var _tagData:DataList = new DataList(TAG_LIST); /** List of nodes for track data */ private var _trackData:DataList = new DataList(TRACK_LIST); private var _weeks:Array = []; private var _username:String; /** Keeps track of which weeks we have, so we can figure out which ones we don't */ private var foundWeeks:Object = {}; /** Mapping from artist to tag */ private var _artistTags:Object = {}; /** Called to update a progress bar, with a number and status string */ private var _onProgress:Function = function(progress:Number, status:String):void {}; /** Called to signify data loading and processing is complete */ private var _onComplete:Function = function():void {}; /** Called to signify an error in loading data, called with a String containing the error message. */ private var _onError:Function = function(error:String):void {}; /* **************** METHODS *******************/ public function DataProcessor(username:String) { _username = username; } public function get dates():Array { return _dates; } public function get artistTags():Object { return _artistTags; } public function get reshapedData():Data { return _reshapedData; } public function get originalData():Data { return _originalData; } public function get trackData():DataList { return _trackData; } public function get artistData():DataList { return _artistData; } public function get tagData():DataList { return _tagData; } public function get username():String { return _username; } /** onProgress is a function that takes two args: integer progress number, * and string progress */ public function set onProgress(prog:Function):void { _onProgress = prog; } public function set onComplete(prog:Function):void { _onComplete = prog; } public function set onError(prog:Function):void { _onError = prog; } /** * Gets the weeks of available data for a given user, so we know * which week have listening data in the first place. */ public function getData():void { var xmlLoader:LastfmXMLLoader = new LastfmXMLLoader(API_KEY); xmlLoader.onComplete = function(xmlData:Array):void { var weeksXML:XML = xmlData[0]; for each (var week:XML in weeksXML..chart) { var start:uint = uint(week.@from.toString()); // Arbitrary cut-off to avoid endless wait times while debugging: // if (start > 1217764800) more aggressive: 1221998400, looser: 1215345600 // Download last year: 1192363200 // 1217048279 // 20 Weeks in past: var cutoff:uint = Dates.addDays(new Date(), -(7 * 21)).time / 1000; // trace("Downloading data since ", cutoff); if (start > cutoff) { _weeks.push([start, uint(week.@to.toString())]); } } for each (var range:Array in _weeks) { var newDate:Date = new Date(uint(range[0]) * 1000); _dates.push(newDate); foundWeeks[newDate] = true; } if (_dates.length == 0) { _onError("No data recent enough for given user."); } else { getListeningData(); } }; xmlLoader.onError = function(e:Event):void { _onError("Invalid username or network error."); return; } xmlLoader.retrieveAvailableWeeks(_username); } /** * Gets the weekly artist listening data for the weeks * gotten by getWeeks() */ private function getListeningData():void { rawData = []; _onProgress(0, "Loading week 0 of " + _weeks.length); // progressBar.setMessage(); var xmlLoader:LastfmXMLLoader = new LastfmXMLLoader(API_KEY); xmlLoader.retrieveWeeklyTrackListenData(_username, _weeks); xmlLoader.onProgress = function(progress:Number, total:Number):void { _onProgress(progress/total, "Loading week " + progress + " of " + total); // progressBar.progress = progress/total; // progressBar.setMessage("Loading week " + progress + " of " + total); } xmlLoader.onComplete = function(xmlData:Array):void { for each (var listenData:XML in xmlData) { var fromAttr:XMLList = listenData..weeklytrackchart[0].@from; var fromDate:uint = uint(fromAttr.toString()); var date:Date = new Date(fromDate * 1000); for each (var track:XML in listenData..track) { var artistName:String = trim(track.artist.toString()); var trackName:String = trim(track.name.toString()); var playcount:uint = uint(track.playcount.toString()); if (! _trackListeningData[artistName]) { _trackListeningData[artistName] = {}; } if (!_trackListeningData[artistName][trackName]) { _trackListeningData[artistName][trackName] = {}; } _trackListeningData[artistName][trackName][date] = playcount; } // Populate rawData like old: for (var artist:String in _trackListeningData) { var count:int = 0; for each (var aTrack:Object in _trackListeningData[artist]) { if (aTrack[date]) count += aTrack[date]; } rawData.push( { date: date, artist: artist, playcount: count, sort: 0 }); if (! _artistListeningData[artist]) { _artistListeningData[artist] = new Object(); } _artistListeningData[artist][date] = playcount; } } weeklyDataLoaded(); } } /** * Called when all the weekly listening data has been downloaded. */ private function weeklyDataLoaded():void { //progressBar.visible = false; _onProgress(0, "Retrieving artist tags...") // progressBar.progress = 0; // progressBar.setMessage("Retrieving artist tags..."); // Tally up unique artists and get their tags var artists:Array = []; var foundArtists:Object = {}; for each (var row:Object in rawData) { if (! foundArtists[row.artist]) { artists.push(row.artist); foundArtists[row.artist] = true; } } _onProgress(0, "Retrieving artist tags..."); var request:URLRequest = new URLRequest(CACHE_URL); request.method = URLRequestMethod.POST; request.data = new URLVariables(); request.data.artists = JSON.encode(artists); trace("JSON: " + JSON.encode(artists)); var loader:URLLoader = new URLLoader(request); loader.addEventListener(Event.COMPLETE, function(e:Event):void { var tags:Object = JSON.decode(loader.data); _artistTags = tags; _onProgress(100, "Done downloading tags"); countTagListens(); }); } /** * Tallies up the listens for each tag for each week once all the tags * and artists have been retrieved. */ private function countTagListens():void { var artist:String; var date:Date; for each (var row:Object in rawData) { artist = row["artist"]; date = row["date"]; if (_tagCounts[_artistTags[artist]]) { if (_tagCounts[_artistTags[artist]][date]) { _tagCounts[_artistTags[artist]][date] += row["playcount"]; } else { _tagCounts[_artistTags[artist]][date] = row["playcount"]; } } else { _tagCounts[_artistTags[artist]] = {}; _tagCounts[_artistTags[artist]][date] = row["playcount"]; } } rawTagData = []; for (var tagStr:String in _tagCounts) { for (var dateStr:String in _tagCounts[tagStr]) { rawTagData.push({ date:new Date(dateStr), tag:tagStr, playcount:_tagCounts[tagStr][dateStr], sort:0 }); trace("added " + tagStr + " for date: " + new Date(dateStr) + " count: " + _tagCounts[tagStr][dateStr]); } } allRawData = []; var obj:Object; for each (row in rawData) { obj = { date:row.date, name:row.artist, playcount:row.playcount, type:Last_fm_vis.ARTIST_TYPE, sort:0 }; allRawData.push(obj); addNode(obj); } for each (row in rawTagData) { obj = { date:row.date, name:row.tag, playcount:row.playcount, type:Last_fm_vis.TAG_TYPE, sort:0 }; allRawData.push(obj); addNode(obj); } for (artist in _trackListeningData) { for (var track:String in _trackListeningData[artist]) { for (dateStr in _trackListeningData[artist][track]) { obj = { date: new Date(dateStr), name: track, playcount: _trackListeningData[artist][track][dateStr], type: Last_fm_vis.TRACK_TYPE, sort: 0, artistName: artist }; allRawData.push(obj); addNode(obj); } } } // Finding missing weeks and add them, so we know there's no data for that week var newWeeks:Array = []; for each (var aWeek:Date in _dates) { var nextWeek:Date = Dates.addDays(aWeek, 7); if (!foundWeeks[nextWeek]) { newWeeks.push(nextWeek); } } _dates = _dates.concat(newWeeks); _dates.sortOn("time"); // Zero out no-data weeks for each (date in _dates) { for (artist in _artistListeningData) { if (! _artistListeningData[artist][date]) { obj = { date:date, name:artist, playcount:0, type:Last_fm_vis.ARTIST_TYPE, sort:0 }; allRawData.push(obj); addNode(obj); } } for (var tag:String in _tagCounts) { if (! _tagCounts[tag][date]) { obj = { date:date, name:tag, playcount:0, type:Last_fm_vis.TAG_TYPE, sort:0 }; allRawData.push(obj); addNode(obj); } } for (artist in _trackListeningData) { for (track in _trackListeningData[artist]) { if (! _trackListeningData[artist][track][date]) { obj = { date:date, name:track, playcount: 0, type:Last_fm_vis.TRACK_TYPE, sort: 0, artistName: artist }; allRawData.push(obj); addNode(obj); } } } } var combined_data_rotated:Array = reshape(allRawData, ["name", "type", "sort", "artistName"], "date", "playcount", _dates, false); // rotatedTagData = Data.fromArray(tag_data_rotated); // rotatedArtistData = Data.fromArray(artist_data_rotated); _reshapedData = Data.fromArray(combined_data_rotated); // _originalData = Data.fromArray(allRawData); _originalData = Data.fromArray(allRawData); _originalData.addGroup(TAG_LIST, tagData); _originalData.addGroup(ARTIST_LIST, artistData); _originalData.addGroup(TRACK_LIST, trackData); _onComplete(); } private function addNode(data:Object):void { var ns:NodeSprite = new NodeSprite(); ns.data = data; var targetList:DataList; if (data.type == Last_fm_vis.TAG_TYPE) { targetList = tagData; } else if (data.type == Last_fm_vis.ARTIST_TYPE) { targetList = artistData; } else if (data.type == Last_fm_vis.TRACK_TYPE) { targetList = trackData; } else { trace("HEEEELP"); } targetList.applyDefaults(ns); targetList.add(ns); } private function invertDataBelow(node:NodeSprite, vis:Visualization):void { var comparator:Function = vis.data.nodes.sort.comparator; for each (var this_node:DataSprite in vis.data.nodes) { // If node is less than clicked on var key:Object; if (comparator(node, this_node) < 0) { for (key in this_node.data) { var d:Number = Date.parse(key); if ((! isNaN(d) || key == "max" || key == "min") && this_node.data[key] > 0) { this_node.data[key] = -this_node.data[key]; } } } else { for (key in this_node.data) { var date:Date = new Date(key); if (date && this_node.data[key] < 0) { this_node.data[key] = -this_node.data[key]; } } } } var _t:Transitioner = vis.update(Last_fm_vis.timeRange); _t.play(); } /** * Trims whitespace from start and end of string. * Source: http://www.twisty.com/bandwagon/archives/2007/07/23/162642 */ public static function trim(str:String):String { for(var i:uint = 0; str.charCodeAt(i) < 33; i++); for(var j:uint = str.length-1; str.charCodeAt(j) < 33; j--); return str.substring(i, j+1); } /** * Reshapes a data set, pivoting from rows to _dates. For example, if * yearly data is stored in individual rows, this method can be used to * map each year into a column and the full time series into a single * row. This is often needed to use the stacked area layout. * @param tuples an array of data tuples * @param cats the category values to maintain * @param dim the dimension upon which to pivot. The values of this * property should correspond to the names of newly created _dates. * @param measure the numerical value of interest. The values of this * property will be used as the values of the new _dates. * @param cols an ordered array of the new column names. These should * match the values of the dim property. * @param normalize a flag indicating if the data should be normalized */ // var data_rotated:Array = reshape(rawData, ["artist"], "date", "playcount", _dates, false); public static function reshape(tuples:Array, cats:Array, dim:String, measure:String, cols:Array, normalize:Boolean=true):Array { var t:Object, d:Object, val:Object, name:String; var data:Array = [], names:Object = {}; var totals:Object = {}; for each (val in cols) totals[val] = 0; // create data set for each (t in tuples) { // create lookup hash for tuple var hash:String = ""; for each (name in cats) hash += t[name]; if (names[hash] == null) { // create a new data tuple data.push(d = {}); for each (name in cats) d[name] = t[name]; d[t[dim]] = t[measure]; names[hash] = d; } else { // update an existing data tuple names[hash][t[dim]] = t[measure]; } totals[t[dim]] += t[measure]; } /* zero out missing data for each (t in data) { var max:Number = 0; for each (name in cols) { if (!t[name]) t[name] = 0; // zero out null entries if (normalize) t[name] /= totals[name]; // normalize if (t[name] > max) max = t[name]; } t.max = max; } */ return data; } } }