Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How to correctly validate array of objects using JustinRainbow/JsonSchema

I have code that correctly validates an article returned from an endpoint that returns single articles. I'm pretty sure it's working correctly as it gives a validation error when I deliberately don't include a required field in the article.

I also have this code that tries to validate an array of articles returned from an endpoint that returns an array of articles. However, I'm pretty sure that isn't working correctly, as it always says the data is valid, even when I deliberately don't include a required field in the articles.

How do I correctly validate an array of data against the schema?

The full test code is below as a standalone runnable test. Both of the tests should fail, however only one of them does.

<?php

declare(strict_types=1);

error_reporting(E_ALL);

require_once __DIR__ . '/vendor/autoload.php';


// Return the definition of the schema, either as an array
// or a PHP object
function getSchema($asArray = false)
{
    $schemaJson = <<< 'JSON'
{
  "swagger": "2.0",
  "info": {
    "termsOfService": "http://swagger.io/terms/",
    "version": "1.0.0",
    "title": "Example api"
  },
  "paths": {
    "/articles": {
      "get": {
        "tags": [
          "article"
        ],
        "summary": "Find all articles",
        "description": "Returns a list of articles",
        "operationId": "getArticleById",
        "produces": [
          "application/json"
        ],
        "responses": {
          "200": {
            "description": "successful operation",
            "schema": {
              "type": "array",
              "items": {
                "$ref": "#/definitions/Article"
              }
            }
          }
        },
        "parameters": [
        ]
      }
    },
    "/articles/{articleId}": {
      "get": {
        "tags": [
          "article"
        ],
        "summary": "Find article by ID",
        "description": "Returns a single article",
        "operationId": "getArticleById",
        "produces": [
          "application/json"
        ],
        "parameters": [
          {
            "name": "articleId",
            "in": "path",
            "description": "ID of article to return",
            "required": true,
            "type": "integer",
            "format": "int64"
          }
        ],
        "responses": {
          "200": {
            "description": "successful operation",
            "schema": {
              "$ref": "#/definitions/Article"
            }
          }
        }
      }
    }
  },
  "definitions": {
    "Article": {
      "type": "object",
      "required": [
        "id",
        "title"
      ],
      "properties": {
        "id": {
          "type": "integer",
          "format": "int64"
        },
        "title": {
          "type": "string",
          "description": "The title for the link of the article"
        }
      }
    }
  },
  "schemes": [
    "http"
  ],
  "host": "example.com",
  "basePath": "/",
  "tags": [],
  "securityDefinitions": {
  },
  "security": [
    {
      "ApiKeyAuth": []
    }
  ]
}
JSON;

    return json_decode($schemaJson, $asArray);
}

// Extract the schema of the 200 response of an api endpoint.
function getSchemaForPath($path)
{
    $swaggerData = getSchema(true);
    if (isset($swaggerData["paths"][$path]['get']["responses"][200]['schema']) !== true) {
        echo "response not defined";
        exit(-1);
    }

    return $swaggerData["paths"][$path]['get']["responses"][200]['schema'];
}

// JsonSchema needs to know about the ID used for the top-level
// schema apparently.
function aliasSchema($prefix, $schemaForPath)
{
    $aliasedSchema = [];

    foreach ($schemaForPath as $key => $value) {
        if ($key === '$ref') {
            $aliasedSchema[$key] = $prefix . $value;
        }
        else if (is_array($value) === true) {
            $aliasedSchema[$key] = aliasSchema($prefix, $value);
        }
        else {
            $aliasedSchema[$key] = $value;
        }
    }
    return $aliasedSchema;
}


// Test the data matches the schema.
function testDataMatches($endpointData, $schemaForPath)
{
    // Setup the top level schema and get a validator from it.
    $schemaStorage = new \JsonSchema\SchemaStorage();
    $id = 'file://example';
    $swaggerClass = getSchema(false);
    $schemaStorage->addSchema($id, $swaggerClass);
    $factory = new \JsonSchema\Constraints\Factory($schemaStorage);
    $jsonValidator = new \JsonSchema\Validator($factory);

    // Alias the schema for the endpoint, so JsonSchema can work with it.
    $schemaForPath = aliasSchema($id, $schemaForPath);

    // Validate the things
    $jsonValidator->check($endpointData, (object)$schemaForPath);

    // Process the result
    if ($jsonValidator->isValid()) {
        echo "The supplied JSON validates against the schema definition: " . \json_encode($schemaForPath) . " \n";
        return;
    }

    $messages = [];
    $messages[] = "End points does not validate. Violations:\n";
    foreach ($jsonValidator->getErrors() as $error) {
        $messages[] = sprintf("[%s] %s\n", $error['property'], $error['message']);
    }

    $messages[] = "Data: " . \json_encode($endpointData, JSON_PRETTY_PRINT);

    echo implode("\n", $messages);
    echo "\n";
}



// We have two data sets to test. A list of articles.

$articleListJson = <<< JSON
[
  {
      "id": 19874
  },
  {
      "id": 19873
  }
]
JSON;
$articleListData = json_decode($articleListJson);


// A single article
$articleJson = <<< JSON
{
  "id": 19874
}
JSON;
$articleData = json_decode($articleJson);


// This passes, when it shouldn't as none of the articles have a title
testDataMatches($articleListData, getSchemaForPath("/articles"));


// This fails correctly, as it is correct for it to fail to validate, as the article doesn't have a title
testDataMatches($articleData, getSchemaForPath("/articles/{articleId}"));

The minimal composer.json is:

{
    "require": {
        "justinrainbow/json-schema": "^5.2"
    }
}
like image 463
Danack Avatar asked May 17 '18 11:05

Danack


2 Answers

Edit-2: 22nd May

I have been digging further turns out that the issue is because of your top level conversion to object

$jsonValidator->check($endpointData, (object)$schemaForPath);

You shouldn't have just done that and it would have all worked

$jsonValidator->check($endpointData, $schemaForPath);

So it doesn't seem to be a bug it was just a wrong usage. If you just remove (object) and run the code

$ php test.php
End points does not validate. Violations:

[[0].title] The property title is required

[[1].title] The property title is required

Data: [
    {
        "id": 19874
    },
    {
        "id": 19873
    }
]
End points does not validate. Violations:

[title] The property title is required

Data: {
    "id": 19874
}

Edit-1

To fix the original code you would need to update the CollectionConstraints.php

/**
 * Validates the items
 *
 * @param array            $value
 * @param \stdClass        $schema
 * @param JsonPointer|null $path
 * @param string           $i
 */
protected function validateItems(&$value, $schema = null, JsonPointer $path = null, $i = null)
{
    if (is_array($schema->items) && array_key_exists('$ref', $schema->items)) {
        $schema->items = $this->factory->getSchemaStorage()->resolveRefSchema((object)$schema->items);
        var_dump($schema->items);
    };

    if (is_object($schema->items)) {

This will handle your use case for sure but if you don't prefer changing code from the dependency then use my original answer

Original Answer

The library has a bug/limitation that in src/JsonSchema/Constraints/CollectionConstraint.php they don't resolve a $ref variable as such. If I updated your code like below

// Alias the schema for the endpoint, so JsonSchema can work with it.
$schemaForPath = aliasSchema($id, $schemaForPath);

if (array_key_exists('items', $schemaForPath))
{
  $schemaForPath['items'] = $factory->getSchemaStorage()->resolveRefSchema((object)$schemaForPath['items']);
}
// Validate the things
$jsonValidator->check($endpointData, (object)$schemaForPath);

and run it again, I get the exceptions needed

$ php test2.php
End points does not validate. Violations:

[[0].title] The property title is required

[[1].title] The property title is required

Data: [
    {
        "id": 19874
    },
    {
        "id": 19873
    }
]
End points does not validate. Violations:

[title] The property title is required

Data: {
    "id": 19874
}

You either need to fix the CollectionConstraint.php or open an issue with developer of the repo. Or else manually replace your $ref in the whole schema, like had shown above. My code will resolve the issue specific to your schema, but fixing any other schema should not be a big issue

Issue fixed

like image 142
Tarun Lalwani Avatar answered Nov 18 '22 23:11

Tarun Lalwani


EDIT: Important thing here is that provided schema document is instance of Swagger Schema, which employs extended subset of JSON Schema to define some cases of request and response. Swagger 2.0 Schema itself can be validated by its JSON Schema, but it can not act as a JSON Schema for API Response structure directly.

In case entity schema is compatible with standard JSON Schema you can perform validation with general purpose validator, but you have to provide all relevant definitions, it can be easy when you have absolute references, but more complicated for local (relative) references that start with #/. IIRC they must be defined in the local schema.


The problem here is that you are trying to use schema references detached from resolution scope. I've added id to make references absolute, therefore not requiring being in scope.

"$ref": "http://example.com/my-schema#/definitions/Article"

The code below works well.

<?php

require_once __DIR__ . '/vendor/autoload.php';

$swaggerSchemaData = json_decode(<<<'JSON'
{
  "id": "http://example.com/my-schema",
  "swagger": "2.0",
  "info": {
    "termsOfService": "http://swagger.io/terms/",
    "version": "1.0.0",
    "title": "Example api"
  },
  "paths": {
    "/articles": {
      "get": {
        "tags": [
          "article"
        ],
        "summary": "Find all articles",
        "description": "Returns a list of articles",
        "operationId": "getArticleById",
        "produces": [
          "application/json"
        ],
        "responses": {
          "200": {
            "description": "successful operation",
            "schema": {
              "type": "array",
              "items": {
                "$ref": "http://example.com/my-schema#/definitions/Article"
              }
            }
          }
        },
        "parameters": [
        ]
      }
    },
    "/articles/{articleId}": {
      "get": {
        "tags": [
          "article"
        ],
        "summary": "Find article by ID",
        "description": "Returns a single article",
        "operationId": "getArticleById",
        "produces": [
          "application/json"
        ],
        "parameters": [
          {
            "name": "articleId",
            "in": "path",
            "description": "ID of article to return",
            "required": true,
            "type": "integer",
            "format": "int64"
          }
        ],
        "responses": {
          "200": {
            "description": "successful operation",
            "schema": {
              "$ref": "http://example.com/my-schema#/definitions/Article"
            }
          }
        }
      }
    }
  },
  "definitions": {
    "Article": {
      "type": "object",
      "required": [
        "id",
        "title"
      ],
      "properties": {
        "id": {
          "type": "integer",
          "format": "int64"
        },
        "title": {
          "type": "string",
          "description": "The title for the link of the article"
        }
      }
    }
  },
  "schemes": [
    "http"
  ],
  "host": "example.com",
  "basePath": "/",
  "tags": [],
  "securityDefinitions": {
  },
  "security": [
    {
      "ApiKeyAuth": []
    }
  ]
}
JSON
);



$schemaStorage = new \JsonSchema\SchemaStorage();
$schemaStorage->addSchema('http://example.com/my-schema', $swaggerSchemaData);
$factory = new \JsonSchema\Constraints\Factory($schemaStorage);
$validator = new \JsonSchema\Validator($factory);

$schemaData = $swaggerSchemaData->paths->{"/articles"}->get->responses->{"200"}->schema;

$data = json_decode('[{"id":1},{"id":2,"title":"Title2"}]');
$validator->validate($data, $schemaData);
var_dump($validator->isValid()); // bool(false)
$data = json_decode('[{"id":1,"title":"Title1"},{"id":2,"title":"Title2"}]');
$validator->validate($data, $schemaData);
var_dump($validator->isValid()); // bool(true)
like image 3
vearutop Avatar answered Nov 18 '22 21:11

vearutop