From f2eda57c379ae16083cd1d6bd1aa0b2b6100e716 Mon Sep 17 00:00:00 2001 From: "Jana E. Beck" Date: Sat, 12 Jul 2014 00:39:10 -0700 Subject: [PATCH 01/12] add Python script for validating JSON file against schema, including pip requirements file --- requirements.txt | 1 + schema/validate.py | 47 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 requirements.txt create mode 100755 schema/validate.py diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..b2ddf72 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +jsonschema==2.3.0 diff --git a/schema/validate.py b/schema/validate.py new file mode 100755 index 0000000..5490081 --- /dev/null +++ b/schema/validate.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# usage: validate.py [-h] [-s SCHEMA] [-d DATA] +# +# Validate a JSON data file against a specified JSON Schema. +# +# optional arguments: +# -h, --help show this help message and exit +# -s SCHEMA, --schema SCHEMA +# name of JSON Schema file +# -d DATA, --data DATA name of JSON data file +# +# NB: when validation fails, output is much easier to read if you're using Python 3.* +# but in general this works in Python 2 as well + +from __future__ import print_function + +import json +import jsonschema +import argparse + +def main(): + parser = argparse.ArgumentParser(description='Validate a JSON data file against a specified JSON Schema.') + parser.add_argument('-s', '--schema', action='store', dest='schema', help='name of JSON Schema file') + parser.add_argument('-d', '--data', action='store', dest='data', help='name of JSON data file') + args = parser.parse_args() + + if not args.data: + print() + print("You need to pass me a data file to validate, teapot. >:(") + print() + exit() + if not args.schema: + print() + print("You need to pass me a schema to use for validation, teapot. >:(") + print() + exit() + + data = json.load(open(args.data, 'rU')) + + schema = json.load(open(args.schema, 'rU')) + + jsonschema.validate(data, schema) + +if __name__ == '__main__': + main() \ No newline at end of file From dc0dd9cd9be5b9a617c687035148b1664ba1efbd Mon Sep 17 00:00:00 2001 From: "Jana E. Beck" Date: Sat, 12 Jul 2014 00:39:34 -0700 Subject: [PATCH 02/12] draft schema through Section C of EEO report form --- schema/eeo-schema.json | 67 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 schema/eeo-schema.json diff --git a/schema/eeo-schema.json b/schema/eeo-schema.json new file mode 100644 index 0000000..13df0e8 --- /dev/null +++ b/schema/eeo-schema.json @@ -0,0 +1,67 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "title": "opendiversitydata", + "description": "JSON schema for U.S. EEO-1 survey data", + "type": "object", + "properties": { + "meta": { + "type": "object", + "properties": { + "company": { + "type": "string" + } + }, + "required": ["company"] + }, + "section_a": { + "type": "object", + "properties": { + "reporting_type": { + "type": "string", + "enum": [ + "Single-establishement Employer Report", + "Consolidated Report", + "Headquarters Unit Report", + "Individual Establishment Report", + "Special Report" + ] + } + }, + "required": ["reporting_type"] + }, + "section_b": { + "type": "object", + "properties": { + "parent_company": { + "type": "string" + }, + "address": { + "type": "object", + "properties": { + "street_address": { "type": "string" }, + "city": { "type": "string" }, + "state": { + "type": "string", + "enum": ["AL", "AK", "AS", "AZ", "AR", "CA", "CO", "CT", "DE", "DC", "FM", "FL", "GA", "GU", "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MH", "MD", "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", "NM", "NY", "NC", "ND", "MP", "OH", "OK", "OR", "PW", "PA", "PR", "RI", "SC", "SD", "TN", "TX", "UT", "VT", "VI", "VA", "WA", "WV", "WI", "WY"] + }, + "zip_code": { "type": "string" } + }, + "required": ["street_address", "city", "state", "zip_code"] + }, + "previous_year_report": { + "type": "boolean" + } + }, + "required": ["parent_company", "address", "previous_year_report"] + }, + "section_c": { + "type": "object", + "properties": { + "at_least_100": { "type": "boolean" }, + "affiliated": { "type": "boolean" }, + "DUNS": { "type": "boolean" } + }, + "required": ["at_least_100", "affiliated", "DUNS"] + } + } +} \ No newline at end of file From e4361636ad18360f1fb459ccd5e9de273517b9b3 Mon Sep 17 00:00:00 2001 From: "Jana E. Beck" Date: Sat, 12 Jul 2014 00:40:01 -0700 Subject: [PATCH 03/12] example transcribed JSON file for Dell, Inc. through Section C of consolidated report --- json/dell.json | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 json/dell.json diff --git a/json/dell.json b/json/dell.json new file mode 100644 index 0000000..2082d43 --- /dev/null +++ b/json/dell.json @@ -0,0 +1,19 @@ +{ + "meta": { "company": "Dell" }, + "section_a": { "reporting_type": "Consolidated Report" }, + "section_b": { + "parent_company": "DELL INC - (DELL USA) - ROUND ROCK", + "address": { + "street_address": "401 DELL WAY", + "city": "ROUND ROCK", + "state": "TX", + "zip_code": "78682" + }, + "previous_year_report": true + }, + "section_c": { + "at_least_100": true, + "affiliated": false, + "DUNS": true + } +} \ No newline at end of file From 6b347045919433e29b79179059140d8b8ec896a0 Mon Sep 17 00:00:00 2001 From: "Jana E. Beck" Date: Thu, 17 Jul 2014 20:48:00 -0700 Subject: [PATCH 04/12] draft schema through Section D of EEO report form --- schema/eeo-schema.json | 57 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/schema/eeo-schema.json b/schema/eeo-schema.json index 13df0e8..985ce1e 100644 --- a/schema/eeo-schema.json +++ b/schema/eeo-schema.json @@ -62,6 +62,61 @@ "DUNS": { "type": "boolean" } }, "required": ["at_least_100", "affiliated", "DUNS"] + }, + "section_d": { + "type": "array", + "items": { + "type": "object", + "properties": { + "job_category": { + "type": "string", + "enum": [ + "Executive/Senior Level Officials and Managers", + "First/Mid Level Officials and Managers", + "Professionals", + "Technicians", + "Sales Workers", + "Administrative Support Workers", + "Craft Workers", + "Operatives", + "Laborers and Helpers", + "Service Workers" + ] + }, + "data": { + "type": "array", + "items": { + "properties": { + "race": { + "type": "string", + "enum": [ + "Hispanic or Latino", + "White", + "Black or African American", + "Native Hawaiian or Other Pacific Islander", + "Asian", + "American Indian or Alaska Native", + "Two or More Races" + ] + }, + "men": { + "type": "integer", + "minimum": 0 + }, + "women": { + "type": "integer", + "minimum": 0 + } + }, + "required": ["race", "men", "women"] + }, + "minItems": 7 + } + }, + "required": ["job_category", "data"] + }, + "minItems": 10 } - } + }, + "required": ["meta", "section_d"] } \ No newline at end of file From a86a82e93722fc06aed96c78eca83de6b50d30f1 Mon Sep 17 00:00:00 2001 From: "Jana E. Beck" Date: Thu, 17 Jul 2014 20:48:45 -0700 Subject: [PATCH 05/12] example transcribed JSON file for Dell, Inc. through Section D of consolidated report --- json/dell.json | 408 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 406 insertions(+), 2 deletions(-) diff --git a/json/dell.json b/json/dell.json index 2082d43..9390837 100644 --- a/json/dell.json +++ b/json/dell.json @@ -1,5 +1,7 @@ { - "meta": { "company": "Dell" }, + "meta": { + "company": "Dell" + }, "section_a": { "reporting_type": "Consolidated Report" }, "section_b": { "parent_company": "DELL INC - (DELL USA) - ROUND ROCK", @@ -15,5 +17,407 @@ "at_least_100": true, "affiliated": false, "DUNS": true - } + }, + "section_d": [ + { + "job_category": "Executive/Senior Level Officials and Managers", + "data": [ + { + "race": "Hispanic or Latino", + "men": 0, + "women": 0 + }, + { + "race": "White", + "men": 98, + "women": 28 + }, + { + "race": "Black or African American", + "men": 1, + "women": 0 + }, + { + "race": "Native Hawaiian or Other Pacific Islander", + "men": 0, + "women": 0 + }, + { + "race": "Asian", + "men": 8, + "women": 2 + }, + { + "race": "American Indian or Alaska Native", + "men": 0, + "women": 0 + }, + { + "race": "Two or More Races", + "men": 0, + "women": 0 + } + ] + }, + { + "job_category": "First/Mid Level Officials and Managers", + "data": [ + { + "race": "Hispanic or Latino", + "men": 170, + "women": 90 + }, + { + "race": "White", + "men": 2604, + "women": 975 + }, + { + "race": "Black or African American", + "men": 132, + "women": 78 + }, + { + "race": "Native Hawaiian or Other Pacific Islander", + "men": 1, + "women": 1 + }, + { + "race": "Asian", + "men": 244, + "women": 78 + }, + { + "race": "American Indian or Alaska Native", + "men": 8, + "women": 6 + }, + { + "race": "Two or More Races", + "men": 16, + "women": 3 + } + ] + }, + { + "job_category": "Professionals", + "data": [ + { + "race": "Hispanic or Latino", + "men": 904, + "women": 482 + }, + { + "race": "White", + "men": 8164, + "women": 3562 + }, + { + "race": "Black or African American", + "men": 683, + "women": 434 + }, + { + "race": "Native Hawaiian or Other Pacific Islander", + "men": 17, + "women": 7 + }, + { + "race": "Asian", + "men": 1884, + "women": 780 + }, + { + "race": "American Indian or Alaska Native", + "men": 51, + "women": 19 + }, + { + "race": "Two or More Races", + "men": 98, + "women": 41 + } + ] + }, + { + "job_category": "Technicians", + "data": [ + { + "race": "Hispanic or Latino", + "men": 311, + "women": 57 + }, + { + "race": "White", + "men": 2186, + "women": 303 + }, + { + "race": "Black or African American", + "men": 341, + "women": 104 + }, + { + "race": "Native Hawaiian or Other Pacific Islander", + "men": 7, + "women": 0 + }, + { + "race": "Asian", + "men": 201, + "women": 29 + }, + { + "race": "American Indian or Alaska Native", + "men": 33, + "women": 10 + }, + { + "race": "Two or More Races", + "men": 49, + "women": 7 + } + ] + }, + { + "job_category": "Sales Workers", + "data": [ + { + "race": "Hispanic or Latino", + "men": 287, + "women": 80 + }, + { + "race": "White", + "men": 3318, + "women": 964 + }, + { + "race": "Black or African American", + "men": 226, + "women": 71 + }, + { + "race": "Native Hawaiian or Other Pacific Islander", + "men": 6, + "women": 1 + }, + { + "race": "Asian", + "men": 145, + "women": 39 + }, + { + "race": "American Indian or Alaska Native", + "men": 24, + "women": 9 + }, + { + "race": "Two or More Races", + "men": 47, + "women": 17 + } + ] + }, + { + "job_category": "Administrative Support Workers", + "data": [ + { + "race": "Hispanic or Latino", + "men": 232, + "women": 470 + }, + { + "race": "White", + "men": 720, + "women": 2203 + }, + { + "race": "Black or African American", + "men": 192, + "women": 609 + }, + { + "race": "Native Hawaiian or Other Pacific Islander", + "men": 2, + "women": 17 + }, + { + "race": "Asian", + "men": 81, + "women": 180 + }, + { + "race": "American Indian or Alaska Native", + "men": 11, + "women": 36 + }, + { + "race": "Two or More Races", + "men": 9, + "women": 31 + } + ] + }, + { + "job_category": "Craft Workers", + "data": [ + { + "race": "Hispanic or Latino", + "men": 1, + "women": 0 + }, + { + "race": "White", + "men": 0, + "women": 0 + }, + { + "race": "Black or African American", + "men": 0, + "women": 1 + }, + { + "race": "Native Hawaiian or Other Pacific Islander", + "men": 0, + "women": 0 + }, + { + "race": "Asian", + "men": 0, + "women": 0 + }, + { + "race": "American Indian or Alaska Native", + "men": 0, + "women": 0 + }, + { + "race": "Two or More Races", + "men": 0, + "women": 0 + } + ] + }, + { + "job_category": "Operatives", + "data": [ + { + "race": "Hispanic or Latino", + "men": 75, + "women": 40 + }, + { + "race": "White", + "men": 104, + "women": 67 + }, + { + "race": "Black or African American", + "men": 79, + "women": 88 + }, + { + "race": "Native Hawaiian or Other Pacific Islander", + "men": 0, + "women": 0 + }, + { + "race": "Asian", + "men": 28, + "women": 15 + }, + { + "race": "American Indian or Alaska Native", + "men": 1, + "women": 0 + }, + { + "race": "Two or More Races", + "men": 0, + "women": 0 + } + ] + }, + { + "job_category": "Laborers and Helpers", + "data": [ + { + "race": "Hispanic or Latino", + "men": 0, + "women": 0 + }, + { + "race": "White", + "men": 0, + "women": 0 + }, + { + "race": "Black or African American", + "men": 0, + "women": 0 + }, + { + "race": "Native Hawaiian or Other Pacific Islander", + "men": 0, + "women": 0 + }, + { + "race": "Asian", + "men": 0, + "women": 0 + }, + { + "race": "American Indian or Alaska Native", + "men": 0, + "women": 0 + }, + { + "race": "Two or More Races", + "men": 0, + "women": 0 + } + ] + }, + { + "job_category": "Service Workers", + "data": [ + { + "race": "Hispanic or Latino", + "men": 6, + "women": 9 + }, + { + "race": "White", + "men": 45, + "women": 65 + }, + { + "race": "Black or African American", + "men": 7, + "women": 15 + }, + { + "race": "Native Hawaiian or Other Pacific Islander", + "men": 0, + "women": 0 + }, + { + "race": "Asian", + "men": 2, + "women": 1 + }, + { + "race": "American Indian or Alaska Native", + "men": 0, + "women": 0 + }, + { + "race": "Two or More Races", + "men": 1, + "women": 0 + } + ] + } + ] } \ No newline at end of file From 601c6a9030fc107be12a79db91e1151382da0398 Mon Sep 17 00:00:00 2001 From: "Jana E. Beck" Date: Thu, 17 Jul 2014 21:48:24 -0700 Subject: [PATCH 06/12] add year field to 'meta' object --- json/dell.json | 3 ++- schema/eeo-schema.json | 7 ++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/json/dell.json b/json/dell.json index 9390837..43036fb 100644 --- a/json/dell.json +++ b/json/dell.json @@ -1,6 +1,7 @@ { "meta": { - "company": "Dell" + "company": "Dell", + "year": 2010 }, "section_a": { "reporting_type": "Consolidated Report" }, "section_b": { diff --git a/schema/eeo-schema.json b/schema/eeo-schema.json index 985ce1e..74c4877 100644 --- a/schema/eeo-schema.json +++ b/schema/eeo-schema.json @@ -9,9 +9,14 @@ "properties": { "company": { "type": "string" + }, + "year": { + "type": "integer", + "minimum": 1965, + "maximum": 2014 } }, - "required": ["company"] + "required": ["company", "year"] }, "section_a": { "type": "object", From 9d536c898a73ed31d20b3396801085426d05c5bf Mon Sep 17 00:00:00 2001 From: "Jana E. Beck" Date: Thu, 17 Jul 2014 21:48:49 -0700 Subject: [PATCH 07/12] add simple Python script to aggregate totals (helpful for checking manual transcription) --- toolkit/check_totals.py | 101 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100755 toolkit/check_totals.py diff --git a/toolkit/check_totals.py b/toolkit/check_totals.py new file mode 100755 index 0000000..0b5f997 --- /dev/null +++ b/toolkit/check_totals.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# usage: check_totals.py [-h] json +# +# Print the aggregates from an Open Diversity Data JSON file to check the totals +# against the PDF. +# +# positional arguments: +# json Open Diversity Data JSON file to check totals on +# +# optional arguments: +# -h, --help show this help message and exit + +from __future__ import print_function + +import json +import argparse + +def main(): + parser = argparse.ArgumentParser(description='Print the aggregates from an Open Diversity Data JSON file to check the totals against the PDF.') + parser.add_argument('json', action='store', help='Open Diversity Data JSON file to check totals on') + args = parser.parse_args() + + if not args.json: + print() + print("Sorry, you need to provide some data for me to aggregate :(") + print() + exit() + + jsondata = json.load(open(args.json, 'rU'))['section_d'] + + job_categories = [ + "Executive/Senior Level Officials and Managers", + "First/Mid Level Officials and Managers", + "Professionals", + "Technicians", + "Sales Workers", + "Administrative Support Workers", + "Craft Workers", + "Operatives", + "Laborers and Helpers", + "Service Workers" + ] + + races = [ + "Hispanic or Latino", + "White", + "Black or African American", + "Native Hawaiian or Other Pacific Islander", + "Asian", + "American Indian or Alaska Native", + "Two or More Races" + ] + + overall_totals = {} + race_totals = {} + + for cat in job_categories: + data = None + for obj in jsondata: + if obj['job_category'] == cat: + data = obj['data'] + if data: + total = 0 + for datum in data: + total += datum['men'] + datum['women'] + overall_totals[cat] = total + + for race in races: + race_data = None + for inner_obj in data: + if inner_obj['race'] == race: + race_data = inner_obj + try: + current = race_totals[race] + current['men'] += race_data['men'] + current['women'] += race_data['women'] + except KeyError: + race_totals[race] = {'men': race_data['men'], 'women': race_data['women']} + + + print() + print("### Overall Totals ###") + for cat in job_categories: + print("%s: %d" %(cat, overall_totals[cat])) + print("Absolute total: %d" %sum([overall_totals[cat] for cat in job_categories])) + print() + print("### Totals by Race and Gender ###") + hispanic = races.pop(0) + print(hispanic, ': Men %d, Women %d' %(race_totals[hispanic]['men'], race_totals[hispanic]['women'])) + print("Non-Hispanic or Latino, Men:") + for race in races: + print("\t%s: %d" %(race, race_totals[race]['men'])) + print("Non-Hispanic or Latino, Women:") + for race in races: + print("\t%s: %d" %(race, race_totals[race]['women'])) + print() + +if __name__ == '__main__': + main() \ No newline at end of file From 5b66430bcf1f51b4708ddc53255425b738589893 Mon Sep 17 00:00:00 2001 From: "Jana E. Beck" Date: Sat, 2 Aug 2014 15:58:48 -0700 Subject: [PATCH 08/12] add newline at EOF --- json/dell.json | 2 +- schema/validate.py | 2 +- toolkit/check_totals.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/json/dell.json b/json/dell.json index 43036fb..4e5eb6e 100644 --- a/json/dell.json +++ b/json/dell.json @@ -421,4 +421,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/schema/validate.py b/schema/validate.py index 5490081..30c5d27 100755 --- a/schema/validate.py +++ b/schema/validate.py @@ -44,4 +44,4 @@ def main(): jsonschema.validate(data, schema) if __name__ == '__main__': - main() \ No newline at end of file + main() diff --git a/toolkit/check_totals.py b/toolkit/check_totals.py index 0b5f997..de2eba5 100755 --- a/toolkit/check_totals.py +++ b/toolkit/check_totals.py @@ -98,4 +98,4 @@ def main(): print() if __name__ == '__main__': - main() \ No newline at end of file + main() From aac8eb384d7e670f91e02baa9ec82ce3aaae83ec Mon Sep 17 00:00:00 2001 From: "Jana E. Beck" Date: Sat, 2 Aug 2014 15:59:12 -0700 Subject: [PATCH 09/12] add newline at EOF; add version --- schema/eeo-schema.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/schema/eeo-schema.json b/schema/eeo-schema.json index 74c4877..8794a26 100644 --- a/schema/eeo-schema.json +++ b/schema/eeo-schema.json @@ -1,6 +1,7 @@ { "$schema": "http://json-schema.org/draft-04/schema#", "title": "opendiversitydata", + "version": "0.0.1", "description": "JSON schema for U.S. EEO-1 survey data", "type": "object", "properties": { @@ -124,4 +125,4 @@ } }, "required": ["meta", "section_d"] -} \ No newline at end of file +} From 7fa72471f6082a121033f171d9958d0bf42a93f0 Mon Sep 17 00:00:00 2001 From: "Jana E. Beck" Date: Sat, 2 Aug 2014 16:01:38 -0700 Subject: [PATCH 10/12] start on schema README --- schema/README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 schema/README.md diff --git a/schema/README.md b/schema/README.md new file mode 100644 index 0000000..ff50010 --- /dev/null +++ b/schema/README.md @@ -0,0 +1,10 @@ +OpenDiversityData.org JSON Schema +================= + +This directory contains a [JSON Schema](http://json-schema.org/ 'JSON Schema Homepage') for the U.S. government's [EEO-1 Survey](http://www.eeoc.gov/employers/eeo1survey/ 'EEO-1 Survey Homepage'). + +Unfortunately, JSON Schema syntax does not allow comments. Some explanation of choices made in the schema are contained instead within this document. + +At the highest level, there are two required properties of the JSON object composing the data for a single EEO-1 report: the `meta` property and the `section_d` property. Other than the `meta` property, every other property - i.e., those beginning with `section_` describes a section of an EEO-1 report, as represented in the [sample EEO-1 form](http://www.eeoc.gov/employers/eeo1survey/upload/eeo1-2.pdf 'Sample EEO-1 Reporting Form') linked in [the instructions for EEO-1 reporting](http://www.eeoc.gov/employers/eeo1survey/2007instructions.cfm 'Instructions for EEO-1 Reporting') and as represented in the PDFs that many companies release as the public version of their EEO-1 report, as for example [Dell's](http://money.cnn.com/technology/storysupplement/diversity-data/dell-2010-EEO-1.html 'Dell's 2010 EEO-1 Report') (here embedded in the webpage as an image, which is unfortunately quite common). + +Sections A, B, and C of the standard EEO-1 report are not required in the schema because they don't contain the data of interest, and it's possible that some presentations of publicly released EEO-1 data won't include them. From fa47798eae5d65c66b6ce00e66ea8e9ebff9c819 Mon Sep 17 00:00:00 2001 From: "Jana E. Beck" Date: Sat, 2 Aug 2014 16:36:32 -0700 Subject: [PATCH 11/12] add maxItems and uniqueItems to arrays --- schema/eeo-schema.json | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/schema/eeo-schema.json b/schema/eeo-schema.json index 8794a26..38fd805 100644 --- a/schema/eeo-schema.json +++ b/schema/eeo-schema.json @@ -1,7 +1,7 @@ { "$schema": "http://json-schema.org/draft-04/schema#", "title": "opendiversitydata", - "version": "0.0.1", + "version": "0.0.2", "description": "JSON schema for U.S. EEO-1 survey data", "type": "object", "properties": { @@ -116,12 +116,16 @@ }, "required": ["race", "men", "women"] }, - "minItems": 7 + "minItems": 7, + "maxItems": 7, + "uniqueItems": true } }, "required": ["job_category", "data"] }, - "minItems": 10 + "minItems": 10, + "maxItems": 10, + "uniqueItems": true } }, "required": ["meta", "section_d"] From 67009dce6029c7b40bd5075a946f9e94d25e5333 Mon Sep 17 00:00:00 2001 From: "Jana E. Beck" Date: Sat, 2 Aug 2014 16:36:59 -0700 Subject: [PATCH 12/12] finish v1 of README --- schema/README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/schema/README.md b/schema/README.md index ff50010..6f41ecd 100644 --- a/schema/README.md +++ b/schema/README.md @@ -8,3 +8,9 @@ Unfortunately, JSON Schema syntax does not allow comments. Some explanation of c At the highest level, there are two required properties of the JSON object composing the data for a single EEO-1 report: the `meta` property and the `section_d` property. Other than the `meta` property, every other property - i.e., those beginning with `section_` describes a section of an EEO-1 report, as represented in the [sample EEO-1 form](http://www.eeoc.gov/employers/eeo1survey/upload/eeo1-2.pdf 'Sample EEO-1 Reporting Form') linked in [the instructions for EEO-1 reporting](http://www.eeoc.gov/employers/eeo1survey/2007instructions.cfm 'Instructions for EEO-1 Reporting') and as represented in the PDFs that many companies release as the public version of their EEO-1 report, as for example [Dell's](http://money.cnn.com/technology/storysupplement/diversity-data/dell-2010-EEO-1.html 'Dell's 2010 EEO-1 Report') (here embedded in the webpage as an image, which is unfortunately quite common). Sections A, B, and C of the standard EEO-1 report are not required in the schema because they don't contain the data of interest, and it's possible that some presentations of publicly released EEO-1 data won't include them. + +The `section_d` property of the report object contains the data of interest. It describes an array of exactly ten objects because there are ten job categories across which companies must report diversity statistics. The categories themselves are enumerated as possible values of the `job_category` property of each array object. The `data` for each object in the array is itself an array, where each array item is an object containing three fields: `race`, which is a string value, limited to the seven categories defined for the EEO-1 Survey); `men` and `women` as integer values with an minimum of zero. The array value of `data` must contain seven items, since there are seven possible categories of `race`. + +### Caveats + +In its present form, the schema requires ten objects because there are ten job categories in an EEO-1 report and seven objects within each job category's data array because there are seven races for reporting under EEO-1. In both of these data arrays, each object must be unique within the array, but it would still be possible to submit ill-formed data if a job category label is repeated (but one of the other ten is missing) but the data for the (duplicate) category is different. A much more verbose schema would be required to guard against this case, so we are opting to take on the risk of such ill-formed data (at least for now).