プロジェクトの1つでは、サードパーティのシステムからマイクロサービスアーキテクチャにデータをインポートするプロセスを転送する必要がありました。ツールとしてApacheNiFiが選択されています。連邦税務局の法人の統一国家登録簿の輸入が最初の実験対象として選ばれました。
前の記事AVROスキーマを使用してJSONにXMLを変換する方法を説明しました。
この記事では、JOLT仕様を使用してJSONを変換する方法について説明します。
使用済みのプロセッサとコントローラ
JSONを断片に分割する
前の段階で取得したFlowFileには、さまざまな組織のUSRLEステートメントの配列を含むJSONが含まれています。まず、各FlowFileに1つのステートメントが含まれるように、それをパーツに分割しましょう。
このために、SplitJsonプロセッサを使用します。設定から-jsonをパーツに分割するには、JsonPath式を指定する必要があります。この場合、$。*
JsonPathのドキュメントはこちら
JSON変換
結果のJSONは、後で保存して処理するために、不必要に複雑な構造になっています。アドレスとフルネームを1行にまとめ、いくつかの要素を階層の上位に移動することをお勧めします。
変換前のJSON
{
"reportDate" : "2020-05-20",
"ogrn" : "1234567890123",
"ogrnDate" : "2002-12-30",
"inn" : "1234567890",
"kpp" : "123456789",
"opfCode" : "12300",
"opfName" : " ",
"name" : {
"fullName" : " ",
"shortName" : ""
},
"address" : {
"addressRF" : {
"region" : {
"type" : "",
"name" : ""
},
"district" : null,
"town" : {
"type" : "",
"name" : ""
},
"settlement" : null,
"street" : {
"type" : "",
"name" : ""
},
"index" : "143500",
"regionCode" : "50",
"kladr" : "500000570000011",
"house" : null,
"building" : null,
"apartment" : null
}
},
"termination" : null,
"capital" : null,
"manageOrg" : null,
"director" : [ {
"fl" : {
"lastName" : "",
"firstName" : "",
"patronymic" : "",
"inn" : "123456789012"
},
"position" : {
"ogrnip" : null,
"typeCode" : "02",
"typeName" : " ",
"name" : " "
},
"disqualification" : null
} ],
"founders" : {
"founderULRF" : null,
"founderULForeign" : null,
"founderFL" : [ {
"fl" : {
"lastName" : "",
"firstName" : "",
"patronymic" : "",
"inn" : "123456789012"
},
"capitalPart" : {
"nominal" : 20000.0,
"size" : {
"percent" : 50.0,
"decimalPart" : null,
"simplePart" : null
}
}
}, {
"fl" : {
"lastName" : "",
"firstName" : "",
"patronymic" : "",
"inn" : "123456789021"
},
"capitalPart" : {
"nominal" : 20000.0,
"size" : {
"percent" : 50.0,
"decimalPart" : null,
"simplePart" : null
}
}
} ],
"founderGov" : null,
"founderPIF" : null
},
"capitalPart" : null,
"holderReestrAO" : null,
"okved" : {
"mainOkved" : {
"code" : "47.11",
"name" : " , , "
},
"addOkved" : null
}
}
JSON JoltTransformJSON.
:
Jolt Transformation DSL - . Chain -
Jolt Specification - .
JOLT
- shift - modify-overwrite-beta. . Modifier.java, . jolt-demo.appspot.com . .
JOLT
[
{
"operation": "modify-overwrite-beta",
"spec": {
"address": {
"addressRF": {
"region": "=concat(@(type), ' ', @(name))",
"district": "=concat(@(type), ' ', @(name))",
"town": "=concat(@(type), ' ', @(name))",
"settlement": "=concat(@(type), ' ', @(name))",
"street": "=concat(@(type), ' ', @(name))"
}
},
"director": {
"*": {
"fl": {
"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
}
}
},
"founders": {
"founderFL": {
"*": {
"fl": {
"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
}
}
},
"founderGov": {
"*": {
"founderImplFL": {
"fl": {
"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
}
}
}
}
}
}
},
{
"operation": "modify-overwrite-beta",
"spec": {
"address": {
"addressRF": {
"value": "=concat(@(1,index), ', ', @(1,region), ', ', @(1,district), ', ', @(1,town), ', ', @(1,settlement), ', ', @(1,street), ', ', @(1,house), ', ', @(1,building), ', ', @(1,apartment))",
"fias": null
}
}
}
},
{
"operation": "shift",
"spec": {
"reportDate|ogrn|ogrnDate|inn|kpp|opfCode|opfName": "&",
"name": {
"*": "&"
},
"address": {
"addressRF": {
"kladr|regionCode|value|fias": "&2.&"
}
},
"termination": {
"method": {
"*": "&2.&"
},
"*": "&1.&"
},
"capital": "&",
"manageOrg": {
"egrulData": {
"*": "&2.&"
}
},
"director": {
"*": {
"fl": {
"fio|inn": "&3[&2].&"
},
"position": {
"name": "&3[&2].&1",
"*": "&3[&2].&"
},
"disqualification": "&2[&1].&"
}
},
"founders": {
"founderULRF|founderULForeign": {
"*": {
"egrulData|foreignReg": {
"*": "&4.&3[&2].&"
},
"*": "&3.&2[&1].&"
}
},
"founderFL": {
"*": {
"fl": {
"fio|inn": "&4.&3[&2].&"
},
"*": "&3.&2[&1].&"
}
},
"founderGov": {
"*": {
"govOrg": {
"*": "&4.&3[&2].&"
},
"capitalPart": "&3.&2[&1].&",
"founderImplUL": {
"egrulData": {
"*": "&5.&4[&3].&2.&"
}
},
"founderImplFL": {
"fl": {
"fio|inn": "&5.&4[&3].&2.&"
}
}
}
},
"founderPIF": {
"*": {
"PIFName": {
"name": "&4.&3[&2].&1"
},
"manageOrg": {
"egrulData": {
"*": "&5.&4[&3].&"
}
},
"capitalPart": "&3.&2[&1].&"
}
}
},
"capitalPart": "&",
"holderReestrAO": {
"egrulData": {
"*": "&2.&"
}
},
"okved": "&"
}
}
]
modify-overwrite-beta , .. .
, : - modify-overwrite-beta - shift. - operation - spec.
, .
modify-overwrite-beta
. , . , .
.
(. modify-overwrite-beta) - type name region, district, town, settlement street. "=concat(@(type), ' ', @(name))"
.
"address": {
"addressRF": {
"region": "=concat(@(type), ' ', @(name))",
"district": "=concat(@(type), ' ', @(name))",
"town": "=concat(@(type), ' ', @(name))",
"settlement": "=concat(@(type), ' ', @(name))",
"street": "=concat(@(type), ' ', @(name))"
}
}
. , "region": "=concat(@(type), ' ', @(name))",
: region, type name. region, @(type)
.
(. modify-overwrite-beta) - value.
"address": {
"addressRF": {
"value": "=concat(@(1,index), ', ', @(1,region), ', ', @(1,district), ', ', @(1,town), ', ', @(1,settlement), ', ', @(1,street), ', ', @(1,house), ', ', @(1,building), ', ', @(1,apartment))",
"fias": null
}
}
, @(1,index)
. , index . .. value addressRF, addressRF index.
, =
concat
, @(1,index)
.
fias - .
. shift .
. "*"
. , .. director , .
"director": {
"*": {
"fl": {
"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
}
}
}
shift
shift JSON.
JSON
{
"reportDate" : "2020-05-20",
"ogrn" : "1234567890123",
"ogrnDate" : "2002-12-30",
"inn" : "1234567890",
"kpp" : "123456789",
"opfCode" : "12300",
"opfName" : " ",
"name" : {
"fullName" : " ",
"shortName" : ""
},
"address" : {
"addressRF" : {
"region" : " ",
"district" : " ",
"town" : " ",
"settlement" : " ",
"street" : " ",
"index" : "143500",
"regionCode" : "50",
"kladr" : "500000570000011",
"house" : null,
"building" : null,
"apartment" : null,
"value" : "143500, , , , , , , , ",
"fias" : null
}
},
"termination" : null,
"capital" : null,
"manageOrg" : null,
"director" : [ {
"fl" : {
"lastName" : "",
"firstName" : "",
"patronymic" : "",
"inn" : "123456789012",
"fio" : " "
},
"position" : {
"ogrnip" : null,
"typeCode" : "02",
"typeName" : " ",
"name" : " "
},
"disqualification" : null
} ],
"founders" : {
"founderULRF" : null,
"founderULForeign" : null,
"founderFL" : [ {
"fl" : {
"lastName" : "",
"firstName" : "",
"patronymic" : "",
"inn" : "123456789012",
"fio" : " "
},
"capitalPart" : {
"nominal" : 20000,
"size" : {
"percent" : 50,
"decimalPart" : null,
"simplePart" : null
}
}
}, {
"fl" : {
"lastName" : "",
"firstName" : "",
"patronymic" : "",
"inn" : "123456789021",
"fio" : " "
},
"capitalPart" : {
"nominal" : 20000,
"size" : {
"percent" : 50,
"decimalPart" : null,
"simplePart" : null
}
}
} ],
"founderGov" : null,
"founderPIF" : null
},
"capitalPart" : null,
"holderReestrAO" : null,
"okved" : {
"mainOkved" : {
"code" : "47.11",
"name" : " , , "
},
"addOkved" : null
}
}
, - , , , . , modify-overwrite-beta , . , shift - , .
shift
{
"operation": "shift",
"spec": {
"reportDate|ogrn|ogrnDate|inn|kpp|opfCode|opfName": "&",
"name": {
"*": "&"
},
"address": {
"addressRF": {
"kladr|regionCode|value|fias": "&2.&"
}
},
"termination": {
"method": {
"*": "&2.&"
},
"*": "&1.&"
},
"capital": "&",
"manageOrg": {
"egrulData": {
"*": "&2.&"
}
},
"director": {
"*": {
"fl": {
"fio|inn": "&3[&2].&"
},
"position": {
"name": "&3[&2].&1",
"*": "&3[&2].&"
},
"disqualification": "&2[&1].&"
}
},
"founders": {
"founderULRF|founderULForeign": {
"*": {
"egrulData|foreignReg": {
"*": "&4.&3[&2].&"
},
"*": "&3.&2[&1].&"
}
},
"founderFL": {
"*": {
"fl": {
"fio|inn": "&4.&3[&2].&"
},
"*": "&3.&2[&1].&"
}
},
"founderGov": {
"*": {
"govOrg": {
"*": "&4.&3[&2].&"
},
"capitalPart": "&3.&2[&1].&",
"founderImplUL": {
"egrulData": {
"*": "&5.&4[&3].&2.&"
}
},
"founderImplFL": {
"fl": {
"fio|inn": "&5.&4[&3].&2.&"
}
}
}
},
"founderPIF": {
"*": {
"PIFName": {
"name": "&4.&3[&2].&1"
},
"manageOrg": {
"egrulData": {
"*": "&5.&4[&3].&"
}
},
"capitalPart": "&3.&2[&1].&"
}
}
},
"capitalPart": "&",
"holderReestrAO": {
"egrulData": {
"*": "&2.&"
}
},
"okved": "&"
}
}
shift . , , , . , . &
. , , &0
. . &1
, .. &
- , pre-&-post
. .. &
name, pre-name-post. . .
- "reportDate|ogrn|ogrnDate|inn|kpp|opfCode|opfName": "&"
. , . |
.
fullName shortName "name": { "*": "&" }
.
"*"
, , name
.
"&"
, .
- .
"address": {
"addressRF": {
"kladr|regionCode|value|fias": "&2.&"
}
}
. . - "&2.&"
. , , . &2
address, &
- . &1
addressRF, . .. : address.kladr, address.regionCode, address.value address.fias. JSON.
"director" : [ {
"fl" : {
"lastName" : "",
"firstName" : "",
"patronymic" : "",
"inn" : "123456789012",
"fio" : " "
},
"position" : {
"ogrnip" : null,
"typeCode" : "02",
"typeName" : " ",
"name" : " "
},
"disqualification" : null
} ]
lastName, firstName patronymic.
inn fio .
ogrnip, typeCode typeName .
name position.
disqualification .
- , , - . , &
- [&]
.
"director": {
"*": {
"fl": {
"fio|inn": "&3[&2].&"
},
"position": {
"name": "&3[&2].&1",
"*": "&3[&2].&"
},
"disqualification": "&2[&1].&"
}
}
, fio inn. &3[&2].&
. . : &3
- director, [&2]
- , &
- fio inn.
name position. &3
- director, [&2]
- , &1
- position. &
, name , position.
位置にある残りの要素は、単に1つのレベルにまとめられます。失格は変わりません。
さらに、同様の構造が使用されます。
例
そして最後に、元のJSON、JOLT仕様、および結果のJSONを複製します
生のJSON
{
"reportDate": "2020-05-20",
"ogrn": "1234567890123",
"ogrnDate": "2002-12-30",
"inn": "1234567890",
"kpp": "123456789",
"opfCode": "12300",
"opfName": " ",
"name": {
"fullName": " ",
"shortName": ""
},
"address": {
"addressRF": {
"region": {
"type": "",
"name": ""
},
"district": null,
"town": {
"type": "",
"name": ""
},
"settlement": null,
"street": {
"type": "",
"name": ""
},
"index": "143500",
"regionCode": "50",
"kladr": "500000570000011",
"house": null,
"building": null,
"apartment": null
}
},
"termination": null,
"capital": null,
"manageOrg": null,
"director": [
{
"fl": {
"lastName": "",
"firstName": "",
"patronymic": "",
"inn": "123456789012"
},
"position": {
"ogrnip": null,
"typeCode": "02",
"typeName": " ",
"name": " "
},
"disqualification": null
}
],
"founders": {
"founderULRF": null,
"founderULForeign": null,
"founderFL": [
{
"fl": {
"lastName": "",
"firstName": "",
"patronymic": "",
"inn": "123456789012"
},
"capitalPart": {
"nominal": 20000,
"size": {
"percent": 50,
"decimalPart": null,
"simplePart": null
}
}
},
{
"fl": {
"lastName": "",
"firstName": "",
"patronymic": "",
"inn": "123456789021"
},
"capitalPart": {
"nominal": 20000,
"size": {
"percent": 50,
"decimalPart": null,
"simplePart": null
}
}
}
],
"founderGov": null,
"founderPIF": null
},
"capitalPart": null,
"holderReestrAO": null,
"okved": {
"mainOkved": {
"code": "47.11",
"name": " , , "
},
"addOkved": null
}
}
JOLT仕様
[
{
"operation": "modify-overwrite-beta",
"spec": {
"address": {
"addressRF": {
"region": "=concat(@(type), ' ', @(name))",
"district": "=concat(@(type), ' ', @(name))",
"town": "=concat(@(type), ' ', @(name))",
"settlement": "=concat(@(type), ' ', @(name))",
"street": "=concat(@(type), ' ', @(name))"
}
},
"director": {
"*": {
"fl": {
"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
}
}
},
"founders": {
"founderFL": {
"*": {
"fl": {
"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
}
}
},
"founderGov": {
"*": {
"founderImplFL": {
"fl": {
"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
}
}
}
}
}
}
},
{
"operation": "modify-overwrite-beta",
"spec": {
"address": {
"addressRF": {
"value": "=concat(@(1,index), ', ', @(1,region), ', ', @(1,district), ', ', @(1,town), ', ', @(1,settlement), ', ', @(1,street), ', ', @(1,house), ', ', @(1,building), ', ', @(1,apartment))",
"fias": null
}
}
}
},
{
"operation": "shift",
"spec": {
"reportDate|ogrn|ogrnDate|inn|kpp|opfCode|opfName": "&",
"name": {
"*": "&"
},
"address": {
"addressRF": {
"kladr|regionCode|value|fias": "&2.&"
}
},
"termination": {
"method": {
"*": "&2.&"
},
"*": "&1.&"
},
"capital": "&",
"manageOrg": {
"egrulData": {
"*": "&2.&"
}
},
"director": {
"*": {
"fl": {
"fio|inn": "&3[&2].&"
},
"position": {
"name": "&3[&2].&1",
"*": "&3[&2].&"
},
"disqualification": "&2[&1].&"
}
},
"founders": {
"founderULRF|founderULForeign": {
"*": {
"egrulData|foreignReg": {
"*": "&4.&3[&2].&"
},
"*": "&3.&2[&1].&"
}
},
"founderFL": {
"*": {
"fl": {
"fio|inn": "&4.&3[&2].&"
},
"*": "&3.&2[&1].&"
}
},
"founderGov": {
"*": {
"govOrg": {
"*": "&4.&3[&2].&"
},
"capitalPart": "&3.&2[&1].&",
"founderImplUL": {
"egrulData": {
"*": "&5.&4[&3].&2.&"
}
},
"founderImplFL": {
"fl": {
"fio|inn": "&5.&4[&3].&2.&"
}
}
}
},
"founderPIF": {
"*": {
"PIFName": {
"name": "&4.&3[&2].&1"
},
"manageOrg": {
"egrulData": {
"*": "&5.&4[&3].&"
}
},
"capitalPart": "&3.&2[&1].&"
}
}
},
"capitalPart": "&",
"holderReestrAO": {
"egrulData": {
"*": "&2.&"
}
},
"okved": "&"
}
}
]
結果のJSON
{
"reportDate" : "2020-05-20",
"ogrn" : "1234567890123",
"ogrnDate" : "2002-12-30",
"inn" : "1234567890",
"kpp" : "123456789",
"opfCode" : "12300",
"opfName" : " ",
"fullName" : " ",
"shortName" : "",
"address" : {
"kladr" : "500000570000011",
"regionCode" : "50",
"value" : "143500, , , , , , , , ",
"fias" : null
},
"capital" : null,
"director" : [ {
"fio" : " ",
"inn" : "123456789012",
"ogrnip" : null,
"typeCode" : "02",
"typeName" : " ",
"position" : " ",
"disqualification" : null
} ],
"founders" : {
"founderFL" : [ {
"fio" : " ",
"inn" : "123456789012",
"capitalPart" : {
"nominal" : 20000,
"size" : {
"percent" : 50,
"decimalPart" : null,
"simplePart" : null
}
}
}, {
"fio" : " ",
"inn" : "123456789021",
"capitalPart" : {
"nominal" : 20000,
"size" : {
"percent" : 50,
"decimalPart" : null,
"simplePart" : null
}
}
} ]
},
"capitalPart" : null,
"okved" : {
"mainOkved" : {
"code" : "47.11",
"name" : " , , "
},
"addOkved" : null
}
}
さらに
さらに、結果のJSONは、保存してさらに使用するためにどこかに配置する必要があります。しかし、それは物語を超えています。誰かにとって便利なものがあります。