ApacheNiFiを使用した連邦税務局の法人の統一国家登録のインポート。ステップ3-JOLTを使用してJSONを変換する

プロジェクトの1つでは、サードパーティのシステムからマイクロサービスアーキテクチャにデータをインポートするプロセスを転送する必要がありました。ツールとしてApacheNiFiが選択されています。連邦税務局の法人の統一国家登録簿の輸入が最初の実験対象として選ばれました。





前の記事AVROスキーマを使用してJSONにXMLを変換する方法を説明しました。





この記事では、JOLT仕様を使用してJSONを変換する方法について説明します。





使用済みのプロセッサとコントローラ

JSONを断片に分割する

前の段階で取得したFlowFileには、さまざまな組織のUSRLEステートメントの配列を含むJSONが含まれています。まず、各FlowFileに1つのステートメントが含まれるように、それをパーツに分割しましょう。





このために、SplitJsonプロセッサを使用します設定から-jsonをパーツに分割するには、JsonPath式を指定する必要があります。この場合、$。*





JsonPathのドキュメントはこちら





ここで練習できます





JSON変換

結果のJSONは、後で保存して処理するために、不必要に複雑な構造になっています。アドレスとフルネームを1行にまとめ、いくつかの要素を階層の上位に移動することをお勧めします。





変換前のJSON
{
  "reportDate" : "2020-05-20",
  "ogrn" : "1234567890123",
  "ogrnDate" : "2002-12-30",
  "inn" : "1234567890",
  "kpp" : "123456789",
  "opfCode" : "12300",
  "opfName" : "   ",
  "name" : {
    "fullName" : "   ",
    "shortName" : ""
  },
  "address" : {
    "addressRF" : {
      "region" : {
        "type" : "",
        "name" : ""
      },
      "district" : null,
      "town" : {
        "type" : "",
        "name" : ""
      },
      "settlement" : null,
      "street" : {
        "type" : "",
        "name" : ""
      },
      "index" : "143500",
      "regionCode" : "50",
      "kladr" : "500000570000011",
      "house" : null,
      "building" : null,
      "apartment" : null
    }
  },
  "termination" : null,
  "capital" : null,
  "manageOrg" : null,
  "director" : [ {
    "fl" : {
      "lastName" : "",
      "firstName" : "",
      "patronymic" : "",
      "inn" : "123456789012"
    },
    "position" : {
      "ogrnip" : null,
      "typeCode" : "02",
      "typeName" : "  ",
      "name" : " "
    },
    "disqualification" : null
  } ],
  "founders" : {
    "founderULRF" : null,
    "founderULForeign" : null,
    "founderFL" : [ {
      "fl" : {
        "lastName" : "",
        "firstName" : "",
        "patronymic" : "",
        "inn" : "123456789012"
      },
      "capitalPart" : {
        "nominal" : 20000.0,
        "size" : {
          "percent" : 50.0,
          "decimalPart" : null,
          "simplePart" : null
        }
      }
    }, {
      "fl" : {
        "lastName" : "",
        "firstName" : "",
        "patronymic" : "",
        "inn" : "123456789021"
      },
      "capitalPart" : {
        "nominal" : 20000.0,
        "size" : {
          "percent" : 50.0,
          "decimalPart" : null,
          "simplePart" : null
        }
      }
    } ],
    "founderGov" : null,
    "founderPIF" : null
  },
  "capitalPart" : null,
  "holderReestrAO" : null,
  "okved" : {
    "mainOkved" : {
      "code" : "47.11",
      "name" : "    ,  ,      "
    },
    "addOkved" : null
  }
}
      
      



JSON JoltTransformJSON.





:





  • Jolt Transformation DSL - . Chain -





  • Jolt Specification - .





JOLT

, - .





.





- shift - modify-overwrite-beta. . Modifier.java, . jolt-demo.appspot.com . .





JOLT
[
	{
		"operation": "modify-overwrite-beta",
		"spec": {
			"address": {
				"addressRF": {
					"region": "=concat(@(type), ' ', @(name))",
					"district": "=concat(@(type), ' ', @(name))",
					"town": "=concat(@(type), ' ', @(name))",
					"settlement": "=concat(@(type), ' ', @(name))",
					"street": "=concat(@(type), ' ', @(name))"
				}
			},
			"director": {
				"*": {
					"fl": {
						"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
					}
				}
			},
			"founders": {
				"founderFL": {
					"*": {
						"fl": {
							"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
						}
					}
				},
				"founderGov": {
					"*": {
						"founderImplFL": {
							"fl": {
								"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
							}
						}
					}
				}
			}
		}
	},
	{
		"operation": "modify-overwrite-beta",
		"spec": {
			"address": {
				"addressRF": {
					"value": "=concat(@(1,index), ', ', @(1,region), ', ', @(1,district), ', ', @(1,town), ', ', @(1,settlement), ', ', @(1,street), ', ', @(1,house), ', ', @(1,building), ', ', @(1,apartment))",
					"fias": null
				}
			}
		}
	},
	{
		"operation": "shift",
		"spec": {
			"reportDate|ogrn|ogrnDate|inn|kpp|opfCode|opfName": "&",
			"name": {
				"*": "&"
			},
			"address": {
				"addressRF": {
					"kladr|regionCode|value|fias": "&2.&"
				}
			},
			"termination": {
				"method": {
					"*": "&2.&"
				},
				"*": "&1.&"
			},
			"capital": "&",
			"manageOrg": {
				"egrulData": {
					"*": "&2.&"
				}
			},
			"director": {
				"*": {
					"fl": {
						"fio|inn": "&3[&2].&"
					},
					"position": {
						"name": "&3[&2].&1",
						"*": "&3[&2].&"
					},
					"disqualification": "&2[&1].&"
				}
			},
			"founders": {
				"founderULRF|founderULForeign": {
					"*": {
						"egrulData|foreignReg": {
							"*": "&4.&3[&2].&"
						},
						"*": "&3.&2[&1].&"
					}
				},
				"founderFL": {
					"*": {
						"fl": {
							"fio|inn": "&4.&3[&2].&"
						},
						"*": "&3.&2[&1].&"
					}
				},
				"founderGov": {
					"*": {
						"govOrg": {
							"*": "&4.&3[&2].&"
						},
						"capitalPart": "&3.&2[&1].&",
						"founderImplUL": {
							"egrulData": {
								"*": "&5.&4[&3].&2.&"
							}
						},
						"founderImplFL": {
							"fl": {
								"fio|inn": "&5.&4[&3].&2.&"
							}
						}
					}
				},
				"founderPIF": {
					"*": {
						"PIFName": {
							"name": "&4.&3[&2].&1"
						},
						"manageOrg": {
							"egrulData": {
								"*": "&5.&4[&3].&"
							}
						},
						"capitalPart": "&3.&2[&1].&"
					}
				}
			},
			"capitalPart": "&",
			"holderReestrAO": {
				"egrulData": {
					"*": "&2.&"
				}
			},
			"okved": "&"
		}
	}
]
      
      



modify-overwrite-beta , .. .





, : - modify-overwrite-beta - shift. - operation - spec.





, .





modify-overwrite-beta

. , . , .





.





(. modify-overwrite-beta) - type name region, district, town, settlement street. "=concat(@(type), ' ', @(name))"



.





"address": {
				"addressRF": {
					"region": "=concat(@(type), ' ', @(name))",
					"district": "=concat(@(type), ' ', @(name))",
					"town": "=concat(@(type), ' ', @(name))",
					"settlement": "=concat(@(type), ' ', @(name))",
					"street": "=concat(@(type), ' ', @(name))"
				}
			}
      
      



. , "region": "=concat(@(type), ' ', @(name))",



: region, type name. region, @(type)



.





(. modify-overwrite-beta) - value.





"address": {
				"addressRF": {
					"value": "=concat(@(1,index), ', ', @(1,region), ', ', @(1,district), ', ', @(1,town), ', ', @(1,settlement), ', ', @(1,street), ', ', @(1,house), ', ', @(1,building), ', ', @(1,apartment))",
					"fias": null
				}
			}
      
      



, @(1,index)



. , index . .. value addressRF, addressRF index.





, =



concat



, @(1,index)



.





fias - .





. shift .





. "*"



. , .. director , .





"director": {
				"*": {
					"fl": {
						"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
					}
				}
			}
      
      



shift

shift JSON.





JSON
{
  "reportDate" : "2020-05-20",
  "ogrn" : "1234567890123",
  "ogrnDate" : "2002-12-30",
  "inn" : "1234567890",
  "kpp" : "123456789",
  "opfCode" : "12300",
  "opfName" : "   ",
  "name" : {
    "fullName" : "   ",
    "shortName" : ""
  },
  "address" : {
    "addressRF" : {
      "region" : " ",
      "district" : " ",
      "town" : " ",
      "settlement" : " ",
      "street" : " ",
      "index" : "143500",
      "regionCode" : "50",
      "kladr" : "500000570000011",
      "house" : null,
      "building" : null,
      "apartment" : null,
      "value" : "143500,  ,  ,  ,  ,  , , , ",
      "fias" : null
    }
  },
  "termination" : null,
  "capital" : null,
  "manageOrg" : null,
  "director" : [ {
    "fl" : {
      "lastName" : "",
      "firstName" : "",
      "patronymic" : "",
      "inn" : "123456789012",
      "fio" : "  "
    },
    "position" : {
      "ogrnip" : null,
      "typeCode" : "02",
      "typeName" : "  ",
      "name" : " "
    },
    "disqualification" : null
  } ],
  "founders" : {
    "founderULRF" : null,
    "founderULForeign" : null,
    "founderFL" : [ {
      "fl" : {
        "lastName" : "",
        "firstName" : "",
        "patronymic" : "",
        "inn" : "123456789012",
        "fio" : "  "
      },
      "capitalPart" : {
        "nominal" : 20000,
        "size" : {
          "percent" : 50,
          "decimalPart" : null,
          "simplePart" : null
        }
      }
    }, {
      "fl" : {
        "lastName" : "",
        "firstName" : "",
        "patronymic" : "",
        "inn" : "123456789021",
        "fio" : "  "
      },
      "capitalPart" : {
        "nominal" : 20000,
        "size" : {
          "percent" : 50,
          "decimalPart" : null,
          "simplePart" : null
        }
      }
    } ],
    "founderGov" : null,
    "founderPIF" : null
  },
  "capitalPart" : null,
  "holderReestrAO" : null,
  "okved" : {
    "mainOkved" : {
      "code" : "47.11",
      "name" : "    ,  ,      "
    },
    "addOkved" : null
  }
}
      
      



, - , , , . , modify-overwrite-beta , . , shift - , .





shift
{
		"operation": "shift",
		"spec": {
			"reportDate|ogrn|ogrnDate|inn|kpp|opfCode|opfName": "&",
			"name": {
				"*": "&"
			},
			"address": {
				"addressRF": {
					"kladr|regionCode|value|fias": "&2.&"
				}
			},
			"termination": {
				"method": {
					"*": "&2.&"
				},
				"*": "&1.&"
			},
			"capital": "&",
			"manageOrg": {
				"egrulData": {
					"*": "&2.&"
				}
			},
			"director": {
				"*": {
					"fl": {
						"fio|inn": "&3[&2].&"
					},
					"position": {
						"name": "&3[&2].&1",
						"*": "&3[&2].&"
					},
					"disqualification": "&2[&1].&"
				}
			},
			"founders": {
				"founderULRF|founderULForeign": {
					"*": {
						"egrulData|foreignReg": {
							"*": "&4.&3[&2].&"
						},
						"*": "&3.&2[&1].&"
					}
				},
				"founderFL": {
					"*": {
						"fl": {
							"fio|inn": "&4.&3[&2].&"
						},
						"*": "&3.&2[&1].&"
					}
				},
				"founderGov": {
					"*": {
						"govOrg": {
							"*": "&4.&3[&2].&"
						},
						"capitalPart": "&3.&2[&1].&",
						"founderImplUL": {
							"egrulData": {
								"*": "&5.&4[&3].&2.&"
							}
						},
						"founderImplFL": {
							"fl": {
								"fio|inn": "&5.&4[&3].&2.&"
							}
						}
					}
				},
				"founderPIF": {
					"*": {
						"PIFName": {
							"name": "&4.&3[&2].&1"
						},
						"manageOrg": {
							"egrulData": {
								"*": "&5.&4[&3].&"
							}
						},
						"capitalPart": "&3.&2[&1].&"
					}
				}
			},
			"capitalPart": "&",
			"holderReestrAO": {
				"egrulData": {
					"*": "&2.&"
				}
			},
			"okved": "&"
		}
	}
      
      



shift . , , , . , . &



. , , &0



. . &1



, .. &



- , pre-&-post



. .. &



name, pre-name-post. . .





- "reportDate|ogrn|ogrnDate|inn|kpp|opfCode|opfName": "&"



. , . |



.





fullName shortName "name": { "*": "&" }



.

"*"



, , name



.

"&"



, .





- .





"address": {
				"addressRF": {
					"kladr|regionCode|value|fias": "&2.&"
				}
			}
      
      



. . - "&2.&"



. , , . &2



address, &



- . &1



addressRF, . .. : address.kladr, address.regionCode, address.value address.fias. JSON.









"director" : [ {
    "fl" : {
      "lastName" : "",
      "firstName" : "",
      "patronymic" : "",
      "inn" : "123456789012",
      "fio" : "  "
    },
    "position" : {
      "ogrnip" : null,
      "typeCode" : "02",
      "typeName" : "  ",
      "name" : " "
    },
    "disqualification" : null
  } ]
      
      



lastName, firstName patronymic.

inn fio .

ogrnip, typeCode typeName .

name position.

disqualification .





- , , - . , &



- [&]



.





"director": {
				"*": {
					"fl": {
						"fio|inn": "&3[&2].&"
					},
					"position": {
						"name": "&3[&2].&1",
						"*": "&3[&2].&"
					},
					"disqualification": "&2[&1].&"
				}
			}
      
      



, fio inn. &3[&2].&



. . : &3



- director, [&2]



- , &



- fio inn.





name position. &3



- director, [&2]



- , &1



- position. &



, name , position.





位置にある残りの要素は、単に1つのレベルにまとめられます。失格は変わりません。





さらに、同様の構造が使用されます。





そして最後に、元のJSON、JOLT仕様、および結果のJSONを複製します





生のJSON
{
  "reportDate": "2020-05-20",
  "ogrn": "1234567890123",
  "ogrnDate": "2002-12-30",
  "inn": "1234567890",
  "kpp": "123456789",
  "opfCode": "12300",
  "opfName": "   ",
  "name": {
    "fullName": "   ",
    "shortName": ""
  },
  "address": {
    "addressRF": {
      "region": {
        "type": "",
        "name": ""
      },
      "district": null,
      "town": {
        "type": "",
        "name": ""
      },
      "settlement": null,
      "street": {
        "type": "",
        "name": ""
      },
      "index": "143500",
      "regionCode": "50",
      "kladr": "500000570000011",
      "house": null,
      "building": null,
      "apartment": null
    }
  },
  "termination": null,
  "capital": null,
  "manageOrg": null,
  "director": [
    {
      "fl": {
        "lastName": "",
        "firstName": "",
        "patronymic": "",
        "inn": "123456789012"
      },
      "position": {
        "ogrnip": null,
        "typeCode": "02",
        "typeName": "  ",
        "name": " "
      },
      "disqualification": null
    }
  ],
  "founders": {
    "founderULRF": null,
    "founderULForeign": null,
    "founderFL": [
      {
        "fl": {
          "lastName": "",
          "firstName": "",
          "patronymic": "",
          "inn": "123456789012"
        },
        "capitalPart": {
          "nominal": 20000,
          "size": {
            "percent": 50,
            "decimalPart": null,
            "simplePart": null
          }
        }
      },
      {
        "fl": {
          "lastName": "",
          "firstName": "",
          "patronymic": "",
          "inn": "123456789021"
        },
        "capitalPart": {
          "nominal": 20000,
          "size": {
            "percent": 50,
            "decimalPart": null,
            "simplePart": null
          }
        }
      }
    ],
    "founderGov": null,
    "founderPIF": null
  },
  "capitalPart": null,
  "holderReestrAO": null,
  "okved": {
    "mainOkved": {
      "code": "47.11",
      "name": "    ,  ,      "
    },
    "addOkved": null
  }
}
      
      



JOLT仕様
[
	{
		"operation": "modify-overwrite-beta",
		"spec": {
			"address": {
				"addressRF": {
					"region": "=concat(@(type), ' ', @(name))",
					"district": "=concat(@(type), ' ', @(name))",
					"town": "=concat(@(type), ' ', @(name))",
					"settlement": "=concat(@(type), ' ', @(name))",
					"street": "=concat(@(type), ' ', @(name))"
				}
			},
			"director": {
				"*": {
					"fl": {
						"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
					}
				}
			},
			"founders": {
				"founderFL": {
					"*": {
						"fl": {
							"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
						}
					}
				},
				"founderGov": {
					"*": {
						"founderImplFL": {
							"fl": {
								"fio": "=concat(@(1,lastName), ' ', @(1,firstName), ' ', @(1,patronymic))"
							}
						}
					}
				}
			}
		}
	},
	{
		"operation": "modify-overwrite-beta",
		"spec": {
			"address": {
				"addressRF": {
					"value": "=concat(@(1,index), ', ', @(1,region), ', ', @(1,district), ', ', @(1,town), ', ', @(1,settlement), ', ', @(1,street), ', ', @(1,house), ', ', @(1,building), ', ', @(1,apartment))",
					"fias": null
				}
			}
		}
	},
	{
		"operation": "shift",
		"spec": {
			"reportDate|ogrn|ogrnDate|inn|kpp|opfCode|opfName": "&",
			"name": {
				"*": "&"
			},
			"address": {
				"addressRF": {
					"kladr|regionCode|value|fias": "&2.&"
				}
			},
			"termination": {
				"method": {
					"*": "&2.&"
				},
				"*": "&1.&"
			},
			"capital": "&",
			"manageOrg": {
				"egrulData": {
					"*": "&2.&"
				}
			},
			"director": {
				"*": {
					"fl": {
						"fio|inn": "&3[&2].&"
					},
					"position": {
						"name": "&3[&2].&1",
						"*": "&3[&2].&"
					},
					"disqualification": "&2[&1].&"
				}
			},
			"founders": {
				"founderULRF|founderULForeign": {
					"*": {
						"egrulData|foreignReg": {
							"*": "&4.&3[&2].&"
						},
						"*": "&3.&2[&1].&"
					}
				},
				"founderFL": {
					"*": {
						"fl": {
							"fio|inn": "&4.&3[&2].&"
						},
						"*": "&3.&2[&1].&"
					}
				},
				"founderGov": {
					"*": {
						"govOrg": {
							"*": "&4.&3[&2].&"
						},
						"capitalPart": "&3.&2[&1].&",
						"founderImplUL": {
							"egrulData": {
								"*": "&5.&4[&3].&2.&"
							}
						},
						"founderImplFL": {
							"fl": {
								"fio|inn": "&5.&4[&3].&2.&"
							}
						}
					}
				},
				"founderPIF": {
					"*": {
						"PIFName": {
							"name": "&4.&3[&2].&1"
						},
						"manageOrg": {
							"egrulData": {
								"*": "&5.&4[&3].&"
							}
						},
						"capitalPart": "&3.&2[&1].&"
					}
				}
			},
			"capitalPart": "&",
			"holderReestrAO": {
				"egrulData": {
					"*": "&2.&"
				}
			},
			"okved": "&"
		}
	}
]

      
      



結果のJSON
{
  "reportDate" : "2020-05-20",
  "ogrn" : "1234567890123",
  "ogrnDate" : "2002-12-30",
  "inn" : "1234567890",
  "kpp" : "123456789",
  "opfCode" : "12300",
  "opfName" : "   ",
  "fullName" : "   ",
  "shortName" : "",
  "address" : {
    "kladr" : "500000570000011",
    "regionCode" : "50",
    "value" : "143500,  ,  ,  ,  ,  , , , ",
    "fias" : null
  },
  "capital" : null,
  "director" : [ {
    "fio" : "  ",
    "inn" : "123456789012",
    "ogrnip" : null,
    "typeCode" : "02",
    "typeName" : "  ",
    "position" : " ",
    "disqualification" : null
  } ],
  "founders" : {
    "founderFL" : [ {
      "fio" : "  ",
      "inn" : "123456789012",
      "capitalPart" : {
        "nominal" : 20000,
        "size" : {
          "percent" : 50,
          "decimalPart" : null,
          "simplePart" : null
        }
      }
    }, {
      "fio" : "  ",
      "inn" : "123456789021",
      "capitalPart" : {
        "nominal" : 20000,
        "size" : {
          "percent" : 50,
          "decimalPart" : null,
          "simplePart" : null
        }
      }
    } ]
  },
  "capitalPart" : null,
  "okved" : {
    "mainOkved" : {
      "code" : "47.11",
      "name" : "    ,  ,      "
    },
    "addOkved" : null
  }
}
      
      



さらに

さらに、結果のJSONは、保存してさらに使用するためにどこかに配置する必要があります。しかし、それは物語を超えています。誰かにとって便利なものがあります。








All Articles