Skip to content

Commit fa74a27

Browse files
authored
Adding line_index to columns of rows based fixed-length envelope; JSON schema adjustment and validation code update. (#169)
Also some comment updates to flatfile.RecReader methods
1 parent 881ab61 commit fa74a27

File tree

8 files changed

+144
-129
lines changed

8 files changed

+144
-129
lines changed

extensions/omniv21/fileformat/flatfile/fixedlength/decl.go

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,22 @@ import (
1313
// ColumnDecl describes a column of an envelope.
1414
type ColumnDecl struct {
1515
Name string `json:"name,omitempty"`
16-
StartPos int `json:"start_pos,omitempty"` // 1-based. and rune-based.
17-
Length int `json:"length,omitempty"` // rune-based length.
16+
StartPos int `json:"start_pos,omitempty"` // 1-based. and rune-based.
17+
Length int `json:"length,omitempty"` // rune-based length.
18+
LineIndex *int `json:"line_index,omitempty"` // 1-based.
1819
LinePattern *string `json:"line_pattern,omitempty"`
1920

2021
linePatternRegexp *regexp.Regexp
2122
}
2223

23-
func (c *ColumnDecl) lineMatch(line []byte) bool {
24-
if c.linePatternRegexp == nil {
25-
return true
24+
func (c *ColumnDecl) lineMatch(lineIndex int, line []byte) bool {
25+
if c.LineIndex != nil {
26+
return *c.LineIndex == lineIndex+1 // c.LineIndex is 1 based.
27+
}
28+
if c.linePatternRegexp != nil {
29+
return c.linePatternRegexp.Match(line)
2630
}
27-
return c.linePatternRegexp.Match(line)
31+
return true
2832
}
2933

3034
func (c *ColumnDecl) lineToColumnValue(line []byte) string {
@@ -117,6 +121,9 @@ func (e *EnvelopeDecl) ChildDecls() []flatfile.RecDecl {
117121
}
118122

119123
func (e *EnvelopeDecl) rowsBased() bool {
124+
if e.Group() {
125+
panic("envelope_group is neither rows based nor header/footer based")
126+
}
120127
// for header/footer based envelope, header must be specified; otherwise, it's rows based.
121128
return e.Header == nil
122129
}

extensions/omniv21/fileformat/flatfile/fixedlength/decl_test.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,13 @@ import (
1212
)
1313

1414
func TestColumnDecl_LineMatch(t *testing.T) {
15-
assert.True(t, (&ColumnDecl{}).lineMatch([]byte("test")))
15+
assert.True(t, (&ColumnDecl{}).lineMatch(0, []byte("test")))
16+
assert.False(t, (&ColumnDecl{LineIndex: testlib.IntPtr(2)}).lineMatch(0, []byte("test")))
17+
assert.True(t, (&ColumnDecl{LineIndex: testlib.IntPtr(2)}).lineMatch(1, []byte("test")))
1618
assert.False(t, (&ColumnDecl{linePatternRegexp: regexp.MustCompile("^ABC.*$")}).
17-
lineMatch([]byte("test")))
19+
lineMatch(0, []byte("test")))
1820
assert.True(t, (&ColumnDecl{linePatternRegexp: regexp.MustCompile("^ABC.*$")}).
19-
lineMatch([]byte("ABCDEFG")))
21+
lineMatch(0, []byte("ABCDEFG")))
2022
}
2123

2224
func TestColumnDecl_LineToColumnValue(t *testing.T) {
@@ -64,6 +66,9 @@ func TestEnvelopeDecl(t *testing.T) {
6466
assert.Equal(t, e.childRecDecls, e.ChildDecls())
6567

6668
// rowsBased()
69+
assert.PanicsWithValue(t, "envelope_group is neither rows based nor header/footer based",
70+
func() { e.rowsBased() })
71+
e.Type = strs.StrPtr(typeEnvelope)
6772
assert.True(t, e.rowsBased())
6873
e.Header = strs.StrPtr("^ABC$")
6974
assert.False(t, e.rowsBased())

extensions/omniv21/fileformat/flatfile/fixedlength/validate.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,11 @@ func (ctx *validateCtx) validateEnvelopeDecl(fqdn string, envelopeDecl *Envelope
7777
}
7878

7979
func (ctx *validateCtx) validateColumnDecl(fqdn string, colDecl *ColumnDecl) (err error) {
80+
if colDecl.LineIndex != nil && colDecl.LinePattern != nil {
81+
return fmt.Errorf(
82+
"envelope '%s' column '%s' cannot have both `line_index` and `line_pattern` specified at the same time",
83+
fqdn, colDecl.Name)
84+
}
8085
if colDecl.LinePattern != nil {
8186
if colDecl.linePatternRegexp, err = caches.GetRegex(*colDecl.LinePattern); err != nil {
8287
return fmt.Errorf(

extensions/omniv21/fileformat/flatfile/fixedlength/validate_test.go

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,19 @@ func TestValidateFileDecl_MinGreaterThanMax(t *testing.T) {
9696
assert.Equal(t, `envelope/envelope_group 'A/B' has 'min' value 2 > 'max' value 1`, err.Error())
9797
}
9898

99+
func TestValidateFileDecl_ColumnLineIndexAndLinePatternSameTime(t *testing.T) {
100+
err := (&validateCtx{}).validateFileDecl(&FileDecl{
101+
Envelopes: []*EnvelopeDecl{
102+
{Name: "A", Columns: []*ColumnDecl{
103+
{Name: "c", LineIndex: testlib.IntPtr(2), LinePattern: strs.StrPtr(".")}}},
104+
},
105+
})
106+
assert.Error(t, err)
107+
assert.Equal(t,
108+
"envelope 'A' column 'c' cannot have both `line_index` and `line_pattern` specified at the same time",
109+
err.Error())
110+
}
111+
99112
func TestValidateFileDecl_InvalidColumnLinePattern(t *testing.T) {
100113
err := (&validateCtx{}).validateFileDecl(&FileDecl{
101114
Envelopes: []*EnvelopeDecl{
@@ -110,7 +123,7 @@ func TestValidateFileDecl_InvalidColumnLinePattern(t *testing.T) {
110123
}
111124

112125
func TestValidateFileDecl_Success(t *testing.T) {
113-
col1 := &ColumnDecl{Name: "c1"}
126+
col1 := &ColumnDecl{Name: "c1", LineIndex: testlib.IntPtr(1)}
114127
col2 := &ColumnDecl{Name: "c2"}
115128
col3 := &ColumnDecl{Name: "c3", LinePattern: strs.StrPtr("^C$")}
116129
fd := &FileDecl{
@@ -137,5 +150,6 @@ func TestValidateFileDecl_Success(t *testing.T) {
137150
assert.Same(t, fd.Envelopes[0].Children[0], fd.Envelopes[0].childRecDecls[0].(*EnvelopeDecl))
138151
assert.Equal(t, "A/B", fd.Envelopes[0].Children[0].fqdn)
139152
assert.Equal(t, []*ColumnDecl{col1, col2, col3}, fd.Envelopes[0].Children[0].Columns)
140-
assert.True(t, fd.Envelopes[0].Children[0].Columns[2].lineMatch([]byte("C")))
153+
assert.True(t, fd.Envelopes[0].Children[0].Columns[0].lineMatch(0, []byte("C")))
154+
assert.True(t, fd.Envelopes[0].Children[0].Columns[2].lineMatch(0, []byte("C")))
141155
}

extensions/omniv21/fileformat/flatfile/hierarchyReader.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -132,16 +132,18 @@ func (r *HierarchyReader) Release(n *idr.Node) {
132132

133133
// readRec tries to read/match unprocessed data against the passed-in record decl.
134134
func (r *HierarchyReader) readRec(recDecl RecDecl) (*idr.Node, error) {
135-
// If the decl is a solid non-group decl, then we will ask RecReader to match and create IDR.
136-
// If the decl is a group, then we'll only ask RecReader to match but not creating IDR - instead
137-
// we'll create an IDR node for the group decl here.
135+
// If the decl is a Group(), the matching should be using the recursive algorithm
136+
// to match the first-heir-in-line non-group descendent decl. If matched, the returned
137+
// IDR node should be of this group node. This logic is similar to the EDI segment
138+
// matching logic, essentially a greedy algo:
139+
// https://github.com/jf-tech/omniparser/blob/6802ed98d0e5325a6908ebbc6d2da0e4655ed125/extensions/omniv21/fileformat/edi/seg.go#L87
138140
nonGroupDecl := recDecl
139141
for nonGroupDecl.Group() && len(nonGroupDecl.ChildDecls()) > 0 {
140142
nonGroupDecl = nonGroupDecl.ChildDecls()[0]
141143
}
142144
if nonGroupDecl.Group() {
143-
// We must have a non-group solid record to perform the match; if not, it's a no
144-
// match, thus returning nil for IDR and nil for error.
145+
// We must have a non-group solid record to perform the match; if not, return
146+
// nil for IDR, indicating no match, and nil for error.
145147
return nil, nil
146148
}
147149
// Now we have a solid record to perform match, let's call RecReader to do that.

extensions/omniv21/fileformat/flatfile/recreader.go

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,17 +20,11 @@ type RecReader interface {
2020
// return (false, nil).
2121
MoreUnprocessedData() (more bool, err error)
2222

23-
// ReadAndMatch matches the passed-in RecDecl to unprocessed data and creates a
23+
// ReadAndMatch matches the passed-in *non-group* RecDecl to unprocessed data and creates a
2424
// corresponding IDR node if data matches and createIDR flag is turned on.
2525
// Implementation notes:
2626
// - If io.EOF is encountered while there is still unmatched thus unprocessed data,
2727
// io.EOF shouldn't be returned.
2828
// - If a non io.EOF error encountered during IO, return (false, nil, err).
29-
// - If the decl is a Group(), the matching should be using the recursive algorithm
30-
// to match the first-heir-in-line non-group descendent decl. If matched, the returned
31-
// IDR node should be of this group node, and the actual matched record data should be
32-
// internally cached for the next call(s). This logic is similar to the EDI segment
33-
// matching logic:
34-
// https://github.com/jf-tech/omniparser/blob/6802ed98d0e5325a6908ebbc6d2da0e4655ed125/extensions/omniv21/fileformat/edi/seg.go#L87
3529
ReadAndMatch(decl RecDecl, createIDR bool) (matched bool, node *idr.Node, err error)
3630
}

extensions/omniv21/validation/fixedlength2FileDeclaration.go

Lines changed: 47 additions & 53 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)