Skip to content

Commit 3b9fcb5

Browse files
authored
Enabling 'fixed-length2' schema in 'omni.2.1' schema handler; adding samples and creating parity comparison against original 'fixed-length' and their benchmarks (no degradation); adding 'fixed-length2' to heroku server sample collection. (#171)
1 parent 1b4997b commit 3b9fcb5

16 files changed

+888
-32
lines changed

cli/cmd/serverCmd.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ func httpPostTransform(w http.ResponseWriter, r *http.Request) {
154154

155155
var (
156156
sampleDir = "../../extensions/omniv21/samples/"
157-
sampleFormats = []string{"csv", "json", "xml", "fixedlength", "edi"}
157+
sampleFormats = []string{"csv", "json", "xml", "fixedlength", "fixedlength2", "edi"}
158158
sampleInputFilenamePattern = regexp.MustCompile("^([0-9]+[_a-zA-Z0-9]+)\\.input\\.[a-z]+$")
159159
)
160160

extensions/omniv21/samples/fixedlength/fixedlength_test.go

Lines changed: 74 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -8,52 +8,68 @@ import (
88

99
"github.com/bradleyjkemp/cupaloy"
1010
"github.com/jf-tech/go-corelib/jsons"
11-
1211
"github.com/jf-tech/omniparser"
12+
1313
"github.com/jf-tech/omniparser/extensions/omniv21/samples"
1414
"github.com/jf-tech/omniparser/transformctx"
1515
)
1616

17-
func Test1_Single_Row(t *testing.T) {
18-
cupaloy.SnapshotT(t, jsons.BPJ(samples.SampleTestCommon(
19-
t, "./1_single_row.schema.json", "./1_single_row.input.txt")))
17+
type testCase struct {
18+
schemaFile string
19+
inputFile string
20+
schema omniparser.Schema
21+
input []byte
2022
}
2123

22-
func Test2_Multi_Rows(t *testing.T) {
23-
cupaloy.SnapshotT(t, jsons.BPJ(samples.SampleTestCommon(
24-
t, "./2_multi_rows.schema.json", "./2_multi_rows.input.txt")))
25-
}
24+
const (
25+
test1_Single_Row = iota
26+
test2_Multi_Rows
27+
test3_Header_Footer
28+
)
2629

27-
func Test3_Header_Footer(t *testing.T) {
28-
cupaloy.SnapshotT(t, jsons.BPJ(samples.SampleTestCommon(
29-
t, "./3_header_footer.schema.json", "./3_header_footer.input.txt")))
30+
var tests = []testCase{
31+
{
32+
// test1_Single_Row
33+
schemaFile: "./1_single_row.schema.json",
34+
inputFile: "./1_single_row.input.txt",
35+
},
36+
{
37+
// test2_Multi_Rows
38+
schemaFile: "./2_multi_rows.schema.json",
39+
inputFile: "./2_multi_rows.input.txt",
40+
},
41+
{
42+
// test3_Header_Footer
43+
schemaFile: "./3_header_footer.schema.json",
44+
inputFile: "./3_header_footer.input.txt",
45+
},
3046
}
3147

32-
var benchSchemaFile = "./3_header_footer.schema.json"
33-
var benchInputFile = "./3_header_footer.input.txt"
34-
var benchSchema omniparser.Schema
35-
var benchInput []byte
36-
3748
func init() {
38-
schema, err := ioutil.ReadFile(benchSchemaFile)
39-
if err != nil {
40-
panic(err)
41-
}
42-
benchSchema, err = omniparser.NewSchema("bench", bytes.NewReader(schema))
43-
if err != nil {
44-
panic(err)
45-
}
46-
benchInput, err = ioutil.ReadFile(benchInputFile)
47-
if err != nil {
48-
panic(err)
49+
for i := range tests {
50+
schema, err := ioutil.ReadFile(tests[i].schemaFile)
51+
if err != nil {
52+
panic(err)
53+
}
54+
tests[i].schema, err = omniparser.NewSchema("bench", bytes.NewReader(schema))
55+
if err != nil {
56+
panic(err)
57+
}
58+
tests[i].input, err = ioutil.ReadFile(tests[i].inputFile)
59+
if err != nil {
60+
panic(err)
61+
}
4962
}
5063
}
5164

52-
// Benchmark3_Header_Footer-8 2798 385303 ns/op 77909 B/op 1891 allocs/op
53-
func Benchmark3_Header_Footer(b *testing.B) {
65+
func (tst testCase) doTest(t *testing.T) {
66+
cupaloy.SnapshotT(t, jsons.BPJ(samples.SampleTestCommon(t, tst.schemaFile, tst.inputFile)))
67+
}
68+
69+
func (tst testCase) doBenchmark(b *testing.B) {
5470
for i := 0; i < b.N; i++ {
55-
transform, err := benchSchema.NewTransform(
56-
"bench", bytes.NewReader(benchInput), &transformctx.Ctx{})
71+
transform, err := tst.schema.NewTransform(
72+
"bench", bytes.NewReader(tst.input), &transformctx.Ctx{})
5773
if err != nil {
5874
b.FailNow()
5975
}
@@ -68,3 +84,30 @@ func Benchmark3_Header_Footer(b *testing.B) {
6884
}
6985
}
7086
}
87+
88+
func Test1_Single_Row(t *testing.T) {
89+
tests[test1_Single_Row].doTest(t)
90+
}
91+
92+
func Test2_Multi_Rows(t *testing.T) {
93+
tests[test2_Multi_Rows].doTest(t)
94+
}
95+
96+
func Test3_Header_Footer(t *testing.T) {
97+
tests[test3_Header_Footer].doTest(t)
98+
}
99+
100+
// Benchmark1_Single_Row-8 25869 45576 ns/op 27721 B/op 644 allocs/op
101+
func Benchmark1_Single_Row(b *testing.B) {
102+
tests[test1_Single_Row].doBenchmark(b)
103+
}
104+
105+
// Benchmark2_Multi_Rows-8 18813 63901 ns/op 29167 B/op 635 allocs/op
106+
func Benchmark2_Multi_Rows(b *testing.B) {
107+
tests[test2_Multi_Rows].doBenchmark(b)
108+
}
109+
110+
// Benchmark3_Header_Footer-8 5857 197326 ns/op 82234 B/op 2009 allocs/op
111+
func Benchmark3_Header_Footer(b *testing.B) {
112+
tests[test3_Header_Footer].doBenchmark(b)
113+
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
[
2+
{
3+
"RawRecord": "{\"DATE\":\"2019/01/31T12:34:56-0800\",\"HIGH_TEMP_C\":\"10.5\",\"LAT\":\" 37.7749\",\"LONG\":\"122.4194\",\"LOW_TEMP_F\":\"30.2\",\"WIND_DIR\":\" N\",\"WIND_SPEED_KMH\":\"31\"}",
4+
"RawRecordHash": "96a5ae79-8a86-366a-aa48-434653bcf4f7",
5+
"TransformedRecord": {
6+
"date": "2019-01-31T12:34:56-08:00",
7+
"high_temperature_fahrenheit": 50.9,
8+
"latitude": 37.7749,
9+
"longitude": 122.4194,
10+
"low_temperature_fahrenheit": 30.2,
11+
"wind": "North 19.26 mph"
12+
}
13+
},
14+
{
15+
"RawRecord": "{\"DATE\":\"2020/07/31T01:23:45-0500\",\"HIGH_TEMP_C\":\" 39\",\"LAT\":\" 32.7767\",\"LONG\":\" 96.7970\",\"LOW_TEMP_F\":\" 95\",\"WIND_DIR\":\"SE\",\"WIND_SPEED_KMH\":\"31\"}",
16+
"RawRecordHash": "a700355d-f877-3971-85d1-bc6ce9a4f094",
17+
"TransformedRecord": {
18+
"date": "2020-07-31T01:23:45-05:00",
19+
"high_temperature_fahrenheit": 102.2,
20+
"latitude": 32.7767,
21+
"longitude": 96.797,
22+
"low_temperature_fahrenheit": 95,
23+
"wind": "South East 19.26 mph"
24+
}
25+
}
26+
]
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
[
2+
{
3+
"RawRecord": "{\"destination_country\":\"US\",\"event_city\":\"HAPPYVALLEY \",\"event_date\":\"20190826\",\"event_state\":\"FL\",\"event_time\":\"124704 \",\"event_timezone\":\"EST \",\"guaranteed_delivery_date\":\"20190827\",\"scan_facility_zip\":\"54321 \",\"tracking_number_h001\":\"W841206858 \",\"tracking_number_h002_cn\":\"100000103732\"}",
4+
"RawRecordHash": "0c5ddcef-aa71-30dd-bf70-829ce9b92d2b",
5+
"TransformedRecord": {
6+
"destination_country": "US",
7+
"events": [
8+
{
9+
"event_date": "2019-08-26T12:47:04-05:00",
10+
"location": {
11+
"city": "HAPPYVALLEY",
12+
"state": "FL",
13+
"zip": "54321"
14+
}
15+
}
16+
],
17+
"guaranteed_delivery_date": "2019-08-27T00:00:00",
18+
"tracking_number": "100000103732"
19+
}
20+
},
21+
{
22+
"RawRecord": "{\"destination_country\":\"US\",\"event_city\":\"MAGIC BEACH \",\"event_date\":\"20190826\",\"event_state\":\"FL\",\"event_time\":\"124704 \",\"event_timezone\":\"EST \",\"guaranteed_delivery_date\":\"20190827\",\"scan_facility_zip\":\"12345 \",\"tracking_number_h001\":\"W938003272 \",\"tracking_number_h002_cn\":\"\"}",
23+
"RawRecordHash": "592afde9-d28e-3751-92bf-15e35c7af534",
24+
"TransformedRecord": {
25+
"destination_country": "US",
26+
"events": [
27+
{
28+
"event_date": "2019-08-26T12:47:04-05:00",
29+
"location": {
30+
"city": "MAGIC BEACH",
31+
"state": "FL",
32+
"zip": "12345"
33+
}
34+
}
35+
],
36+
"guaranteed_delivery_date": "2019-08-27T00:00:00",
37+
"tracking_number": "W938003272"
38+
}
39+
}
40+
]
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
[
2+
{
3+
"RawRecord": "{\"city_name_addressee\":\"NIEUWEGEIN\",\"country_code_addressee\":\"NE\",\"date_observation\":\"20191105\",\"observation_type\":\"J\",\"postal_code_addressee\":\"3436JA\",\"reason_for_observation\":\"10\",\"time_observation\":\"043523\",\"tracking_number\":\"3SDMNN0129791\",\"weight_in_grams\":\"19780\"}",
4+
"RawRecordHash": "a287788f-785a-3cd0-bbd1-5fe8c6b07186",
5+
"TransformedRecord": {
6+
"carrier": "postnl",
7+
"events": [
8+
{
9+
"event_date": "2019-11-05T04:35:23",
10+
"location": {
11+
"country": "NE",
12+
"zip": "0000FF"
13+
}
14+
}
15+
],
16+
"tracking_number": "3SDMNN0129791",
17+
"weight_in_kg": 19.78
18+
}
19+
},
20+
{
21+
"RawRecord": "{\"city_name_addressee\":\"'S-HERTOGENBOSCH\",\"country_code_addressee\":\"NL\",\"date_observation\":\"20191105\",\"delivery_date\":\"20160825\",\"observation_type\":\"M\",\"postal_code_addressee\":\"5211EK\",\"reason_for_observation\":\"02\",\"time_observation\":\"033000\",\"tracking_number\":\"3SDMNN0129552\",\"weight_in_grams\":\"19780\"}",
22+
"RawRecordHash": "abc1fab4-7284-3363-8d29-cae063c76310",
23+
"TransformedRecord": {
24+
"carrier": "postnl",
25+
"estimated_delivery_date": "2016-08-25T00:00:00",
26+
"events": [
27+
{
28+
"event_date": "2019-11-05T03:30:00",
29+
"location": {
30+
"city": "'S-HERTOGENBOSCH",
31+
"country": "NL",
32+
"zip": "5211EK"
33+
}
34+
}
35+
],
36+
"tracking_number": "3SDMNN0129552",
37+
"weight_in_kg": 19.78
38+
}
39+
},
40+
{
41+
"RawRecord": "{\"city_name_addressee\":\"RENKUM\",\"country_code_addressee\":\"NE\",\"date_observation\":\"20191105\",\"delivery_date\":\"20160825\",\"observation_type\":\"M\",\"postal_code_addressee\":\"6871ZS\",\"reason_for_observation\":\"02\",\"time_observation\":\"033000\",\"tracking_number\":\"3SDMNN0129857\",\"weight_in_grams\":\"1234\"}",
42+
"RawRecordHash": "87021dbf-fc09-3c4c-bdb7-d8e55dab6457",
43+
"TransformedRecord": {
44+
"carrier": "postnl",
45+
"estimated_delivery_date": "2016-08-25T00:00:00",
46+
"events": [
47+
{
48+
"event_date": "2019-11-05T03:30:00",
49+
"location": {
50+
"country": "NE",
51+
"zip": "0000FF"
52+
}
53+
}
54+
],
55+
"tracking_number": "3SDMNN0129857",
56+
"weight_in_kg": 1.23
57+
}
58+
},
59+
{
60+
"RawRecord": "{\"city_name_addressee\":\"ROTTERDAM\",\"country_code_addressee\":\"NL\",\"date_observation\":\"20191105\",\"observation_type\":\"M\",\"postal_code_addressee\":\"3043ME\",\"reason_for_observation\":\"02\",\"time_observation\":\"033000\",\"tracking_number\":\"3SDMNN0129501\"}",
61+
"RawRecordHash": "c03f86aa-65cf-3255-9ad2-6c57b22f55a8",
62+
"TransformedRecord": {
63+
"carrier": "postnl",
64+
"events": [
65+
{
66+
"event_date": "2019-11-05T03:30:00",
67+
"location": {
68+
"city": "ROTTERDAM",
69+
"country": "NL",
70+
"zip": "3043ME"
71+
}
72+
}
73+
],
74+
"tracking_number": "3SDMNN0129501",
75+
"weight_in_kg": 0
76+
}
77+
}
78+
]
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
[
2+
{
3+
"RawRecord": "{\"SPU\":{\"SPT\":{\"terrory_id\":\"2136\"},\"publisher_name\":\"Publisher 10 E 000000\"},\"title\":\"Song 1 - 1 Pub1/ 0 Wrt \"}",
4+
"RawRecordHash": "2fe6aabf-2ddf-3fd6-ae74-f595c529f9cb",
5+
"TransformedRecord": {
6+
"SPU": {
7+
"SPT": {
8+
"terrory_id": "2136"
9+
},
10+
"publisher_name": "Publisher 10 E 000000"
11+
},
12+
"title": "Song 1 - 1 Pub1/ 0 Wrt "
13+
}
14+
},
15+
{
16+
"RawRecord": "{\"SPU\":[{\"publisher_name\":\"Publisher 20 E 000000\"},{\"SPT\":{\"terrory_id\":\"2136\"},\"publisher_name\":\"SAMPLE MEDIA MUSIC AM000000\"}],\"title\":\"Song 2 - 1 Pub2/ 0 Wrt // 1 SPU-AM \"}",
17+
"RawRecordHash": "11af21e8-872e-3b9c-94ea-608bb1e4f541",
18+
"TransformedRecord": {
19+
"SPU": [
20+
{
21+
"publisher_name": "Publisher 20 E 000000"
22+
},
23+
{
24+
"SPT": {
25+
"terrory_id": "2136"
26+
},
27+
"publisher_name": "SAMPLE MEDIA MUSIC AM000000"
28+
}
29+
],
30+
"title": "Song 2 - 1 Pub2/ 0 Wrt // 1 SPU-AM "
31+
}
32+
},
33+
{
34+
"RawRecord": "{\"SPU\":{\"SPT\":{\"terrory_id\":\"2136\"},\"publisher_name\":\"Publisher 10 E 000000\"},\"SWR\":{\"SWT\":{\"terrory_id\":\"2136\"},\"last_name\":\"Writer 100 \",\"writer_id\":\"Wrt100 \"},\"title\":\"Song 4 - 1 Pub1/ 1 Wrt1 \"}",
35+
"RawRecordHash": "48b79f3a-6aca-3e49-b30d-ccf79af3766d",
36+
"TransformedRecord": {
37+
"SPU": {
38+
"SPT": {
39+
"terrory_id": "2136"
40+
},
41+
"publisher_name": "Publisher 10 E 000000"
42+
},
43+
"SWR": {
44+
"SWT": {
45+
"terrory_id": "2136"
46+
},
47+
"last_name": "Writer 100 ",
48+
"writer_id": "Wrt100 "
49+
},
50+
"title": "Song 4 - 1 Pub1/ 1 Wrt1 "
51+
}
52+
},
53+
{
54+
"RawRecord": "{\"SPU\":[{\"SPT\":{\"terrory_id\":\"2136\"},\"publisher_name\":\"Publisher 10 E 000000\"},{\"SPT\":{\"terrory_id\":\"2136\"},\"publisher_name\":\"Publisher 50 E 000000\"}],\"SWR\":[{\"SWT\":{\"terrory_id\":\"2136\"},\"last_name\":\"Writer 100 \",\"writer_id\":\"Wrt100 \"},{\"SWT\":{\"terrory_id\":\"2136\"},\"last_name\":\"Writer 500 \",\"writer_id\":\"Wrt500 \"}],\"title\":\"Song 5 - 2 Pub1,2 / 2 Wrt - 1 new 1 old \"}",
55+
"RawRecordHash": "984402a3-3025-3617-9f5c-f6e0b96916a1",
56+
"TransformedRecord": {
57+
"SPU": [
58+
{
59+
"SPT": {
60+
"terrory_id": "2136"
61+
},
62+
"publisher_name": "Publisher 10 E 000000"
63+
},
64+
{
65+
"SPT": {
66+
"terrory_id": "2136"
67+
},
68+
"publisher_name": "Publisher 50 E 000000"
69+
}
70+
],
71+
"SWR": [
72+
{
73+
"SWT": {
74+
"terrory_id": "2136"
75+
},
76+
"last_name": "Writer 100 ",
77+
"writer_id": "Wrt100 "
78+
},
79+
{
80+
"SWT": {
81+
"terrory_id": "2136"
82+
},
83+
"last_name": "Writer 500 ",
84+
"writer_id": "Wrt500 "
85+
}
86+
],
87+
"title": "Song 5 - 2 Pub1,2 / 2 Wrt - 1 new 1 old "
88+
}
89+
}
90+
]
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
2019/01/31T12:34:56-0800 10.5 30.2 N 33 37.7749 122.4194
2+
DO NOT PROCESS
3+
2020/07/31T01:23:45-0500 39 95 SE 8 32.7767 96.7970

0 commit comments

Comments
 (0)