Skip to content

Commit 2a466fc

Browse files
authored
Adding new flatfile.csv decls (#173)
* Adding new flatfile.csv decls. * Adding a few design/implementation notes; adding csv2 json schema validation
1 parent b680b87 commit 2a466fc

File tree

7 files changed

+427
-1
lines changed

7 files changed

+427
-1
lines changed
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
package csv
2+
3+
import (
4+
"fmt"
5+
"regexp"
6+
7+
"github.com/jf-tech/go-corelib/maths"
8+
9+
"github.com/jf-tech/omniparser/extensions/omniv21/fileformat/flatfile"
10+
)
11+
12+
// ColumnDecl describes a column of an csv record column.
13+
type ColumnDecl struct {
14+
Name string `json:"name,omitempty"`
15+
}
16+
17+
// Design note: given currently ColumnDecl contains only Name field, we could've simply
18+
// change RecordDecl.Columns into a []string. But in the future, if we ever need to add
19+
// anything to a column decl, we'd have to introduce a breaking schema change.
20+
21+
const (
22+
typeRecord = "record"
23+
typeGroup = "record_group"
24+
)
25+
26+
// RecordDecl describes an record of a csv/delimited input.
27+
// If Rows/Header/Footer are all nil, then it defaults to Rows = 1.
28+
// If Rows specified, then Header/Footer must be nil. (JSON schema validation will ensure this.)
29+
// If Header is specified, Rows must be nil. (JSON schema validation will ensure this.)
30+
// Footer is optional; If not specified, Header will be used for a single-line record matching.
31+
type RecordDecl struct {
32+
Name string `json:"name,omitempty"`
33+
Rows *int `json:"rows,omitempty"`
34+
Header *string `json:"header,omitempty"`
35+
Footer *string `json:"footer,omitempty"`
36+
Type *string `json:"type,omitempty"`
37+
IsTarget bool `json:"is_target,omitempty"`
38+
Min *int `json:"min,omitempty"`
39+
Max *int `json:"max,omitempty"`
40+
Columns []*ColumnDecl `json:"columns,omitempty"`
41+
Children []*RecordDecl `json:"child_records,omitempty"`
42+
43+
fqdn string // fully hierarchical name to the record.
44+
childRecDecls []flatfile.RecDecl
45+
headerRegexp *regexp.Regexp
46+
footerRegexp *regexp.Regexp
47+
}
48+
49+
func (r *RecordDecl) DeclName() string {
50+
return r.Name
51+
}
52+
53+
func (r *RecordDecl) Target() bool {
54+
return r.IsTarget
55+
}
56+
57+
func (r *RecordDecl) Group() bool {
58+
return r.Type != nil && *r.Type == typeGroup
59+
}
60+
61+
// MinOccurs defaults to 0. CSV/delimited input most common scenario is min=0/max=unbounded.
62+
func (r *RecordDecl) MinOccurs() int {
63+
switch r.Min {
64+
case nil:
65+
return 0
66+
default:
67+
return *r.Min
68+
}
69+
}
70+
71+
// MaxOccurs defaults to unbounded. CSV/delimited input most common scenario is min=0/max=unbounded.
72+
func (r *RecordDecl) MaxOccurs() int {
73+
switch {
74+
case r.Max == nil:
75+
fallthrough
76+
case *r.Max < 0:
77+
return maths.MaxIntValue
78+
default:
79+
return *r.Max
80+
}
81+
}
82+
83+
func (r *RecordDecl) ChildDecls() []flatfile.RecDecl {
84+
return r.childRecDecls
85+
}
86+
87+
func (r *RecordDecl) rowsBased() bool {
88+
if r.Group() {
89+
panic("record_group is neither rows based nor header/footer based")
90+
}
91+
// for header/footer based record, header must be specified; otherwise, it's rows based.
92+
return r.Header == nil
93+
}
94+
95+
// rows() defaults to 1. csv/delimited most common scenario is rows-based single line record.
96+
func (r *RecordDecl) rows() int {
97+
if !r.rowsBased() {
98+
panic(fmt.Sprintf("record '%s' is not rows based", r.fqdn))
99+
}
100+
if r.Rows == nil {
101+
return 1
102+
}
103+
return *r.Rows
104+
}
105+
106+
func (r *RecordDecl) matchHeader(line []byte) bool {
107+
if r.headerRegexp == nil {
108+
panic(fmt.Sprintf("record '%s' is not header/footer based", r.fqdn))
109+
}
110+
return r.headerRegexp.Match(line)
111+
}
112+
113+
// Footer is optional. If not specified, it always matches. Thus for a header/footer record,
114+
// if the footer isn't specified, it effectively becomes a single-row record matched by header,
115+
// given that after the header matches a line, matchFooter is called on the same line.
116+
func (r *RecordDecl) matchFooter(line []byte) bool {
117+
if r.footerRegexp == nil {
118+
return true
119+
}
120+
return r.footerRegexp.Match(line)
121+
}
122+
123+
func toFlatFileRecDecls(rs []*RecordDecl) []flatfile.RecDecl {
124+
if len(rs) == 0 {
125+
return nil
126+
}
127+
ret := make([]flatfile.RecDecl, len(rs))
128+
for i, r := range rs {
129+
ret[i] = r
130+
}
131+
return ret
132+
}
133+
134+
// FileDecl describes csv/delimited schema `file_declaration` setting.
135+
type FileDecl struct {
136+
Delimiter string `json:"delimiter,omitempty"`
137+
ReplaceDoubleQuotes bool `json:"replace_double_quotes,omitempty"`
138+
Records []*RecordDecl `json:"records,omitempty"`
139+
}
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
package csv
2+
3+
import (
4+
"regexp"
5+
"testing"
6+
7+
"github.com/jf-tech/go-corelib/maths"
8+
"github.com/jf-tech/go-corelib/strs"
9+
"github.com/jf-tech/go-corelib/testlib"
10+
"github.com/jf-tech/omniparser/extensions/omniv21/fileformat/flatfile"
11+
"github.com/stretchr/testify/assert"
12+
)
13+
14+
func TestRecordDecl(t *testing.T) {
15+
// DeclName()
16+
r := &RecordDecl{Name: "r1"}
17+
assert.Equal(t, "r1", r.DeclName())
18+
r.fqdn = r.DeclName()
19+
20+
// Target()
21+
assert.False(t, r.Target())
22+
r.IsTarget = true
23+
assert.True(t, r.Target())
24+
25+
// Group()
26+
assert.False(t, r.Group())
27+
r.Type = strs.StrPtr(typeRecord)
28+
assert.False(t, r.Group())
29+
r.Type = strs.StrPtr(typeGroup)
30+
assert.True(t, r.Group())
31+
32+
// MinOccurs()
33+
assert.Equal(t, 0, r.MinOccurs())
34+
r.Min = testlib.IntPtr(42)
35+
assert.Equal(t, 42, r.MinOccurs())
36+
37+
// MaxOccurs()
38+
assert.Equal(t, maths.MaxIntValue, r.MaxOccurs())
39+
r.Max = testlib.IntPtr(-1)
40+
assert.Equal(t, maths.MaxIntValue, r.MaxOccurs())
41+
r.Max = testlib.IntPtr(42)
42+
assert.Equal(t, 42, r.MaxOccurs())
43+
44+
// ChildDecls()
45+
assert.Nil(t, r.ChildDecls())
46+
r.childRecDecls = []flatfile.RecDecl{}
47+
assert.Equal(t, r.childRecDecls, r.ChildDecls())
48+
49+
// rowsBased()
50+
assert.PanicsWithValue(t, "record_group is neither rows based nor header/footer based",
51+
func() { r.rowsBased() })
52+
r.Type = strs.StrPtr(typeRecord)
53+
assert.True(t, r.rowsBased())
54+
r.Header = strs.StrPtr("^ABC$")
55+
assert.False(t, r.rowsBased())
56+
57+
// rows()
58+
assert.PanicsWithValue(t, "record 'r1' is not rows based", func() { r.rows() })
59+
r.Header = nil
60+
assert.Equal(t, 1, r.rows())
61+
r.Rows = testlib.IntPtr(42)
62+
assert.Equal(t, 42, r.rows())
63+
64+
// matchHeader()
65+
assert.PanicsWithValue(
66+
t, "record 'r1' is not header/footer based", func() { r.matchHeader(nil) })
67+
r.headerRegexp = regexp.MustCompile("^ABC$")
68+
assert.False(t, r.matchHeader([]byte("ABCD")))
69+
assert.True(t, r.matchHeader([]byte("ABC")))
70+
71+
// matchFooter()
72+
assert.True(t, r.matchFooter([]byte("ABCD")))
73+
r.footerRegexp = regexp.MustCompile("^ABC$")
74+
assert.False(t, r.matchFooter([]byte("ABCD")))
75+
assert.True(t, r.matchFooter([]byte("ABC")))
76+
}
77+
78+
func TestToFlatFileRecDecls(t *testing.T) {
79+
assert.Nil(t, toFlatFileRecDecls(nil))
80+
assert.Nil(t, toFlatFileRecDecls([]*RecordDecl{}))
81+
rs := []*RecordDecl{
82+
{},
83+
{},
84+
}
85+
ds := toFlatFileRecDecls(rs)
86+
for i := range ds {
87+
assert.Same(t, rs[i], ds[i].(*RecordDecl))
88+
}
89+
}

extensions/omniv21/fileformat/flatfile/fixedlength/decl.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ type EnvelopeDecl struct {
7676
Columns []*ColumnDecl `json:"columns,omitempty"`
7777
Children []*EnvelopeDecl `json:"child_envelopes,omitempty"`
7878

79-
fqdn string // fullly hierarchical name to the envelope.
79+
fqdn string // fully hierarchical name to the envelope.
8080
childRecDecls []flatfile.RecDecl
8181
headerRegexp *regexp.Regexp
8282
footerRegexp *regexp.Regexp

extensions/omniv21/fileformat/flatfile/recdecl.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,19 @@ type RecDecl interface {
1212
ChildDecls() []RecDecl
1313
}
1414

15+
// Design note: flatfile.fixedlength, flatfile.csv, etc all have similar structs that contain name,
16+
// is_target, type, min, max, etc and implementation of RecDecl interface all very similar. So why
17+
// not just change RecDecl into a struct and embed that struct into format specific decl structs?
18+
// We chose not to do that and rather take a small hit of code duplication in preference to
19+
// flexibility. Note RecDecl's ChildDecls need to return child decls and each format specific decl
20+
// is different, so that's the first incompatibility here that ChildDecls cannot be even be included
21+
// in the struct if we go down the RecDecl being struct route. Yes, generics can do that but we
22+
// don't want to move omniparser 1.14 dependency up all the way to 1.18 simply because of this.
23+
// Second, depending on formats, the default values for min/max are different: csv/fixed-length
24+
// min/max default to 0/-1, but for EDI min/max default to 1/1. Given these incompatibility and
25+
// loss of flexibility, we chose to stick with the RecDecl interface route and have each format
26+
// somewhat duplicate a small amount of trivial code.
27+
1528
const (
1629
rootName = "#root"
1730
)

extensions/omniv21/validation/csv2FileDeclaration.go

Lines changed: 97 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)