Property Data with Node.js and JSON

For this guide, we're going to assume you're interested in using Datafiniti's property data to do some marketing analysis on residential home inventory. Let's say you're a data scientist that's been tasked with the following:

  1. Collect data on homes.
  2. Sort the data by state.
  3. Find which states have the most properties for sale.

Your environment and data needs:

  1. You're working with Node.js.
  2. You want to work with JSON data.

Here are the steps we'll take:

1. Install the request module for Node

In your terminal, run the following to install the request module for Node:

npm install request

2. Get your API token

The next thing you'll need is your API token. The API token lets you authenticate with Datafiniti API and tells it who you are, what you have access to, and so on. Without it, you can't use the API.

To get your API token, go the Datafiniti Web Portal (https://portal.datafiniti.co), login, and click on your account name and the top-right. From there, you'll see a link to the "My Account" page, which will take you to a page showing your token. Your API token will be a long string of letters and numbers. Copy the API token or store it somewhere you can easily reference.

📘

For the rest of this document, we'll use AAAXXXXXXXXXXXX as a substitute example for your actual API token when showing example API calls.

.

3. Run your first search

The first thing we'll do is do write some code that will run a test search. This test search will give us a sense for what sort of data might be available. Eventually we'll refine our search so that we get back the most relevant data.

Since we want homes in the US, let's try a simple search that will just give us online listings for properties in the US.

Write the following code in your code editor (replace the dummy API token with your real API token):

var request = require('request');

// Set your API parameters here.
var APIToken = 'AAAXXXXXXXXXXXX';
var view = 'properties_all';
var format = 'JSON';
var query = encodeURIComponent('country:US');
var records = '1';
var download = 'false';

// Construct the API call.
var APICall = 'https://' + APIToken + ':@api.datafiniti.co/v3/data/properties?'
				+ 'view=' + view
				+ '&q=' + query
				+ '&format=' + format
				+ '&records=' + records
				+ '&download=' + download;

// Make the API call.
request(
  {
    url : APICall
  },
  // Do something with the response.
  function (error, response, body) {
  	console.log(body);
  }
);

You should get a response similar to this:

{
  "estimated total": 7983205,
  "records": [
    {
      "address": "711 Kent Ave",
      "brokers": [
        {
          "agent": "Raj Singh",
          "company": "YOUR REALTY INC.",
          "dateSeen": [
            "2016-06-06T18:09:28Z"
          ],
          "sourceURLs": [
            "https://www.homepath.com/listing/711-kent-ave-catonsville-md-21228-46273970"
          ]
        }
      ],
      "city": "Catonsville",
      "country": "US",
      "dateAdded": "2016-06-06T18:09:28Z",
      "features": [
        {
          "key": "Air Conditioning",
          "value": [
            "Heat Pumps"
          ]
        },
        {
          "key": "Sewer Type",
          "value": [
            "Public"
          ]
        }
      ],
      "keys": [
        "us/md/catonsville/711kentave",
        "mlsnumber/us/md/bc9677283",
        "https://www.homepath.com/listing/711-kent-ave-catonsville-md-21228-46273970/homepath.com-46273970"
      ],
      "latitude": "39.284462",
      "listingName": "711 Kent Ave, Catonsville, Md 21228",
      "longitude": "-76.734069",
      "lotSizeValue": 0.16,
      "lotSizeUnit": "Acres",
      "mlsNumber": "BC9677283",
      "numBathroom": 2,
      "numBedroom": 4,
      "postalCode": "21228",
      "prices": [
        {
          "amountMax": 199900,
          "amountMin": 199900,
          "currency": "USD",
          "dateSeen": [
            "2016-08-08T00:00:00Z",
            "2016-08-03T00:00:00Z"
          ],
          "isSale": "false",
          "sourceURLs": [
            "https://www.homepath.com/listing/711-kent-ave-catonsville-md-21228-46273970"
          ]
        },
        {
          "amountMax": 212000,
          "amountMin": 212000,
          "currency": "USD",
          "dateSeen": [
            "2016-06-06T00:00:00Z"
          ],
          "isSale": "false",
          "sourceURLs": [
            "https://www.homepath.com/listing/711-kent-ave-catonsville-md-21228-46273970"
          ]
        }
      ],
      "propertyTaxes": [
        {
          "amount": 3195,
          "currency": "USD",
          "dateSeen": [
            "2016-06-06T18:09:28Z"
          ],
          "sourceURLs": [
            "https://www.homepath.com/listing/711-kent-ave-catonsville-md-21228-46273970"
          ]
        }
      ],
      "propertyType": "Single Family Dwelling",
      "province": "MD",
      "sourceURLs": [
        "https://www.homepath.com/listing/711-kent-ave-catonsville-md-21228-46273970"
      ],
      "statuses": [
        {
          "dateSeen": [
            "2016-08-09T09:16:10Z"
          ],
          "isUnderContract": "false",
          "sourceURLs": [
            "https://www.homepath.com/listing/711-kent-ave-catonsville-md-21228-46273970"
          ],
          "type": "For Sale"
        }
      ],
      "websiteIDs": [
        "homepath.com-46273970"
      ],
      "id": "AV9WzHyO_RWkykBuv11F"
    }
  ]
]

Let's break down what the API call is all about:

API Call ComponentDescription
https://This is the communication protocol used by the API. It's the same one used when you visit a secure website.
AAAXXXXXXXXXXXX:@Your API token. You're telling the API who you are so it can respond to your request.
api.datafiniti.coThe location of the API.
/v3You're telling the API which version to use. v3 is our most recent and current API version.
/dataYou're telling the API you're interested in querying data.
/propertiesSpecifically, you're interested in business data.
view=properties_allThe view tells the API in which fields you want your response. properties_all will show all available fields in a record.
format=JSONThe format tells the API which data format you want to see. You can set it to JSON or CSV.
q=country:USThe q tells the API what query you want to use. In this case, you're telling the API you want to search by country. Any property that is in the US will be returned.
records=1The records tells the API how many records to return in the its response. In this case, you just want to see 1 matching record.
download=falseThe download tells the API if you want to initiate a download request or not. Setting it to false means you don't, so it will show the matching records immediately in the response.

Now let's dive through the response the API returned:

Response FieldDescription
"estimated_total"The total number of available records in the database that match your query. If you end up downloading the entire data set, this is how many records you'll use.
"records"The first available matches to your query. For most queries, you'll see 1 to 10 example records. If there are no matches, this field will be empty.

Within each record returned, you'll see multiple fields shown. This is the data for each record.

Within the records field, you'll see a single property returned with multiple fields and their values associated with that business. The JSON response will show all fields that have a value. It won't show any fields that don't have a value.

Each property record will have multiple fields associated with it. You can see a full list of available fields in our Property Data Schema.

4. Refine your search

If you think about the original query we made, you'll realize we didn't really specify we only wanted homes for sale. There are several other types of properties (e.g., commercial, rentals) that may also be in the data. Since we only want homes for sale, we should narrow our search appropriately.

We'll need to refine our search to make sure we're only getting US homes for sale. To do that, we can add additional filters to the q parameter to narrow down the results. For example:

var request = require('request');

// Set your API parameters here.
var APIToken = 'AAAXXXXXXXXXXXX';
var view = 'properties_all';
var format = 'JSON';
var query = encodeURIComponent('country:US AND propertyType:"Single Family Dwelling"');
var records = '10';
var download = 'false';

// Construct the API call.
var APICall = 'https://' + APIToken + ':@api.datafiniti.co/v3/data/properties?'
				+ 'view=' + view
				+ '&q=' + query
				+ '&format=' + format
				+ '&records=' + records
				+ '&download=' + download;

// Make the API call.
request(
  {
    url : APICall
  },
  // Do something with the response.
  function (error, response, body) {
  	console.log(body);
  }
);

This query is different in a couple ways:

  1. It adds AND propertyType:"Single Family Dwelling" to narrow down results to just US hotels.
  2. It changes records=1 to records=10 so we can look at more sample matches.

Datafiniti lets you construct very refined boolean queries. If you wanted to do more complicated searches, you could OR operations, negation, and more.

You can run the Node.js code above to see the difference in the results.

5. Initiate a full download of the data

Once we like what we see from the sample matches, it's time to download the entire data set! To do this, we're going to update our code a fair bit (an explanation follows):

var request = require('request');
var fs = require('fs');

// Set your API parameters here.
var APIToken = 'AAAXXXXXXXXXXXX';
var view = 'properties_all';
var format = 'json';
var query = encodeURIComponent('country:US AND propertyType:"Single Family Dwelling"');
var records = '10';
var download = 'true';

// Construct the API call.
var APICall = 'https://' + APIToken + ':@api.datafiniti.co/v3/data/properties?'
				+ 'view=' + view
				+ '&q=' + query
				+ '&format=' + format
//				+ '&records=' + records
				+ '&download=' + download;

// A function to check if a download request has completed
function checkDownloadUntilComplete(options, callback) {
	var downloadRequestAPICall = 'https://' + APIToken + ':@api.datafiniti.co/v3/requests/' + options.requestID;

	request({url : downloadRequestAPICall}, function(error, response, body) {
		var downloadRequestResponse = JSON.parse(body);
		if (downloadRequestResponse[0].status !== 'COMPLETED') {
			console.log('Checking on status: ' + downloadRequestAPICall);
			checkDownloadUntilComplete(options, callback);
		} else {
			callback(null, downloadRequestResponse);
		}
	});
}

// Initiate the download request.
request({url : APICall}, function (error, response, body) {
  	var downloadResponse = JSON.parse(body);
  	var requestID = downloadResponse[0].id;

  	// Check on status of the download request.
  	checkDownloadUntilComplete ({requestID : requestID}, function (error, response) {
  		var resultsAPICall = 'https://' + APIToken + ':@api.datafiniti.co/v3/results/' + requestID;

  		// Once the download is complete, get all the links to result files and write those to local files.
  		request({url : resultsAPICall}, function(error, response, body) {
			var resultsResponse = JSON.parse(body);
			for (var i = 0; i < resultsResponse.length; i++) {
				var file = fs.createWriteStream(requestID + '_' + i + '.' + format);
				request(resultsResponse[i].url).pipe(file);
			}
		});
  	});
  }
);

A couple things to pay attention to in the above code:

  1. We removed &records=10.
  2. Change download=false to download=true.

Since we've handled multiple steps of the download process in this code, we won't go into the details here, but we do recommend you familiarize yourself with those steps. Checking them out in our Property Data with Web Browser and CSV guide.

6. Parse the JSON data

The download code will save one or more result files to your project folder.

📘

The JSON data will actually be a text file, instead of a single JSON object. Each line in the text file is a JSON object. We format the data this way because most programming languages won't handle parsing the entire data set as a JSON object with their standard system calls very well.

We'll need to parse the file into an array of JSON objects. We can use code similar to this to handle the parsing:

var fs = require('fs');

// Set the location of your file here.
var file = 'xxxx_x.txt';

// A function to read in each line of the file and pass that line to func
function readLines(input, func) {
	var records = [];
	var remaining = '';

	input.on('data', function(data) {
 		remaining += data;
		var index = remaining.indexOf('\n');
		var last  = 0;
		while (index > -1) {
 			var line = remaining.substring(last, index);
 			last = index + 1;
 			func(line, records);
			index = remaining.indexOf('\n', last);
 		}

 		remaining = remaining.substring(last);
	});

	input.on('end', function() {
 		if (remaining.length > 0) {
 			func(remaining, records);
 		}
		processData(records);
	});
}

// A function that converts a line from the file, parses it to JSON, and stores it an array
function func(data, records) {
	var json = JSON.parse(data);
	records.push(json);
}

// This function is called once all the data has been read from the file.
function processData(records) {
	// Edit these lines to do more with the data.
	console.log(records);
}

var records = [];
var input = fs.createReadStream(file);
readLines(input, func);

You can edit the code in processData above to do whatever you'd like with the data, such as store the data in a database, write it out to your console, etc.