Skip to content

Commit

Permalink
adding ndjson format (#218)
Browse files Browse the repository at this point in the history
by @jose-sherpa 

While the omniparser tool outputs JSON format currently, you will often need another tool or package to stream the JSON output. While I am aware this tool will only be used for JSON output, there is a type of JSON called NDJSON which stands for new line delimited JSON. This makes it easy to stream parse and process a JSON array with no added packages or complexity since you just read each line and parse them one by one. Since a strength of omniparser is to stream parse large files, we think it makes sense to make the output easily streamable without violating the output of JSON. It also results in a smaller file size.

http://ndjson.org/
  • Loading branch information
jose-sherpa authored Oct 9, 2023
1 parent 79a540b commit dd04a11
Showing 1 changed file with 26 additions and 5 deletions.
31 changes: 26 additions & 5 deletions cli/cmd/transformCmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ var (
}
schema string
input string
stream bool
)

func init() {
Expand All @@ -39,6 +40,8 @@ func init() {

transformCmd.Flags().StringVarP(
&input, "input", "i", "", "input file (optional; if not specified, stdin/pipe is used)")
transformCmd.Flags().BoolVarP(
&stream, "stream", "", false, "if specified, each record will be a standalone/full JSON blob and printed out immediately once transform is done")
}

func openFile(label string, filepath string) (io.ReadCloser, error) {
Expand Down Expand Up @@ -86,22 +89,40 @@ func doTransform() error {
if err != nil {
return "", err
}

s := string(b)
if stream {
return s, nil
}

return strings.Join(
strs.NoErrMapSlice(
strings.Split(jsons.BPJ(string(b)), "\n"),
strings.Split(jsons.BPJ(s), "\n"),
func(s string) string { return "\t" + s }),
"\n"), nil
}

record, err := doOne()
if err == io.EOF {
fmt.Println("[]")
if !stream {
fmt.Println("[]")
}
return nil
}
if err != nil {
return err
}
fmt.Printf("[\n%s", record)

lparen := "[\n%s"
delim := ",\n%s"
rparen := "\n]"
if stream {
lparen = "%s"
delim = "\n%s"
rparen = ""
}

fmt.Printf(lparen, record)
for {
record, err = doOne()
if err == io.EOF {
Expand All @@ -110,8 +131,8 @@ func doTransform() error {
if err != nil {
return err
}
fmt.Printf(",\n%s", record)
fmt.Printf(delim, record)
}
fmt.Println("\n]")
fmt.Println(rparen)
return nil
}

0 comments on commit dd04a11

Please sign in to comment.