Skip to content

Commit

Permalink
Merge branch 'main' into feature/github_workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
thatstoasty committed Mar 21, 2024
2 parents 16c28a5 + 18ea05b commit ea9ceab
Show file tree
Hide file tree
Showing 46 changed files with 2,193 additions and 903 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
.DS_Store

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
62 changes: 37 additions & 25 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# gojo

Experiments in porting over Golang stdlib into Mojo. This is not intended to be a full port, but rather a learning exercise and a way to experiment with Mojo's capabilities. Please feel free to contribute or use this as a starting point for your own projects! The codebase will remain in flux and will evolve with Mojo as future releases are created.
Experiments in porting over Golang stdlib into Mojo and extra goodies that make use of it. This is not intended to be a full port, but rather a learning exercise and a way to experiment with Mojo's capabilities. Please feel free to contribute or use this as a starting point for your own projects! The codebase will remain in flux and will evolve with Mojo as future releases are created.

NOTE: Readers and writers have some sharp edges until Mojo can handle returning Error as part of a tuple. It can be returned by a function, but not unpacked by the receiver. This leads to some cases where EOF is not handled how it should be. It's fairly usable in it's current state, but you might run into issues with large data!

## Projects that use Gojo

Expand All @@ -11,7 +13,9 @@ Experiments in porting over Golang stdlib into Mojo. This is not intended to be

## What this includes

All of these packages are partially implemented.
All of these packages are partially implemented and do not support unicode characters until Mojo supports them.

### Gojo

- `builtins`
- `Bytes` struct (backed by DynamicVector[Int8])
Expand All @@ -24,17 +28,26 @@ All of these packages are partially implemented.
- `io`
- Traits: `Reader`, `Writer`, `Seeker`, `Closer`, `ReadWriter`, `ReadCloser`, `WriteCloser`, `ReadWriteCloser`, `ReadSeeker`, `ReadSeekCloser`, `WriteSeeker`, `ReadWriteSeeker`, `ReaderFrom`, `WriterReadFrom`, `WriterTo`, `ReaderWriteTo`, `ReaderAt`, `WriterAt`, `ByteReader`, `ByteScanner`, `ByteWriter`, `StringWriter`
- `Reader` and `Writer` wrapper functions.
- `STDOUT/STDERR` Writer (leveraging `libc`).
- `FileWrapper`: `FileHandle` Wrapper Reader/Writer
- `strings`
- `StringBuilder`: String builder for fast string concatenation.
- `Reader`: String reader.
- `fmt`
- Basic `sprintf` function.

### Goodies

- `FileWrapper`: `FileHandle` Wrapper Reader/Writer
- `STDOUT/STDERR` Writer (leveraging `libc`).
- `CSV` Buffered Reader/Writer Wrapper around Maxim's `mojo-csv` library.

## Usage

Some basic usage examples. For now, check out the tests for usage of the various packages!
Some basic usage examples. These examples may fall out of sync, so please check out the tests for usage of the various packages!
Most of the `Reader` and `Writer` traits return a `Result[T]` struct which contains the result value and an `Optional[WrappedError]` struct. In the future, this will be switched to returning a Tuple with the result and an `Optional[Error]`.

You can copy over the modules you want to use from the `gojo` or `goodies` directories, or you can build the package by running:
For `gojo`: `mojo package gojo -I .`
For `goodies`: `mojo package goodies -I .`

`builtins.Bytes`

Expand All @@ -44,8 +57,8 @@ from gojo.builtins._bytes import Bytes


fn test_bytes() raises:
var test = MojoTest("Testing bytes")
var bytes = Bytes(s="hello")
var test = MojoTest("Testing builtins.Bytes extend, append, and iadd")
var bytes = Bytes("hello")
test.assert_equal(str(bytes), "hello")

bytes.append(102)
Expand Down Expand Up @@ -167,16 +180,16 @@ from gojo.io import FileWrapper


fn test_reader() raises:
var test = MojoTest("Testing reader")
var test = MojoTest("Testing bufio.Reader.read")

# Create a reader from a string buffer
var s: String = "Hello"
var buf = buffer.new_buffer(s)
var r = Reader(buf)
var reader = Reader(buf)

# Read the buffer into Bytes and then add more to Bytes
var dest = Bytes(256)
_ = r.read(dest)
_ = reader.read(dest)
dest.extend(" World!")

test.assert_equal(dest, "Hello World!")
Expand Down Expand Up @@ -236,15 +249,13 @@ fn test_reader() raises:
test.assert_equal(str(dest), s)
```

`io.FileWrapper`
`goodies.FileWrapper`

```py
from tests.wrapper import MojoTest
from gojo.io.file import File, FileWrapper
from gojo.io.reader import Reader
from gojo.io.std_writer import STDWriter
from gojo.external.libc import FD_STDOUT, FD_STDIN, FD_STDERR
from gojo.builtins._bytes import Bytes
from goodies import FileWrapper
from gojo.builtins import Bytes


fn test_file_wrapper() raises:
Expand All @@ -255,15 +266,14 @@ fn test_file_wrapper() raises:
test.assert_equal(String(dest), String(Bytes("12345")))
```

`io.STDWriter`
`goodies.STDWriter`

```py
from tests.wrapper import MojoTest
from gojo.io.file import File, FileWrapper
from gojo.io.reader import Reader
from gojo.io.std_writer import STDWriter
from goodies import STDWriter
from gojo.external.libc import FD_STDOUT, FD_STDIN, FD_STDERR
from gojo.builtins._bytes import Bytes
from gojo.builtins import Bytes


fn test_writer() raises:
var test = MojoTest("Testing io.STDWriter")
Expand Down Expand Up @@ -316,17 +326,17 @@ fn test_string_reader() raises:
var buffer = Bytes()
var bytes_read = reader.read(buffer)

test.assert_equal(bytes_read, len(example))
test.assert_equal(bytes_read.value, len(example))
test.assert_equal(str(buffer), "Hello, World!")

# Seek to the beginning of the reader.
var position = reader.seek(0, io.SEEK_START)
test.assert_equal(position, 0)
test.assert_equal(position.value, 0)

# Read the first byte from the reader.
buffer = Bytes()
var byte = reader.read_byte()
test.assert_equal(byte, 72)
test.assert_equal(byte.value, 72)

# Unread the first byte from the reader. Remaining bytes to be read should be the same as the length of the example string.
reader.unread_byte()
Expand Down Expand Up @@ -363,7 +373,7 @@ fn test_string_builder() raises:
),
)

# Create a string from the builder by writing bytes to it.
# Create a string from the builder by writing bytes to it. In this case, we throw away the Result response and don't check if has an error.
builder = StringBuilder()
_ = builder.write(Bytes("Hello"))
_ = builder.write_byte(32)
Expand All @@ -372,4 +382,6 @@ fn test_string_builder() raises:

## Sharp Edges & Bugs

- TODO: `bufio.Reader.read_line` is broken until Mojo support unpacking Memory only types from return Tuples.
- `bufio.Reader.read_line` is broken until Mojo support unpacking Memory only types from return Tuples.
- `Result[T, Error]` is being used in the meantime until Mojo supports unpacking tuples that contain Memory only types. There can be some memory issues with accessing errors from `Result`.
- Unicode characters are not supported until Mojo supports them. Sometimes it happens to work, but it's not guaranteed due to length discrepanices with ASCII and Unicode characters. If the character has a length of 2 or more, it probably will not work.
4 changes: 4 additions & 0 deletions external/csv/__init__.mojo
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
"""https://github.com/mzaks/mojo-csv/tree/main all sourced from Maxim's mojo-csv repository!"""

from .csv_builder import CsvBuilder
from .csv_table import CsvTable
137 changes: 137 additions & 0 deletions external/csv/csv_builder.mojo
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
"""https://github.com/mzaks/mojo-csv/tree/main all sourced from Maxim's mojo-csv repository!"""

from memory.memory import memcpy
from memory.buffer import Buffer, Dim
from .string_utils import find_indices, contains_any_of, string_from_pointer

alias BufferType = Buffer[DType.int8]
alias CR_CHAR = "\r"
alias CR = ord(CR_CHAR)
alias LF_CHAR = "\n"
alias LF = ord(LF_CHAR)
alias COMMA_CHAR = ","
alias COMMA = ord(COMMA_CHAR)
alias QUOTE_CHAR = '"'
alias QUOTE = Int8(ord(QUOTE_CHAR))


struct CsvBuilder:
var _buffer: DTypePointer[DType.int8]
var _capacity: Int
var num_bytes: Int
var _column_count: Int
var _elements_count: Int
var _finished: Bool

fn __init__(inout self, column_count: Int):
self._capacity = 1024
self._buffer = DTypePointer[DType.int8].alloc(self._capacity)
self._column_count = column_count
self._elements_count = 0
self._finished = False
self.num_bytes = 0

fn __init__(inout self, *coulmn_names: StringLiteral):
self._capacity = 1024
self._buffer = DTypePointer[DType.int8].alloc(self._capacity)
self._elements_count = 0
self._finished = False
self.num_bytes = 0

var column_name_list: VariadicList[StringLiteral] = coulmn_names
self._column_count = len(column_name_list)
for i in range(len(column_name_list)):
self.push(coulmn_names[i])

fn __del__(owned self):
if not self._finished:
self._buffer.free()

fn push[D: DType](inout self, value: SIMD[D, 1]):
var s = String(value)
var size = len(s)
self.push(s, False)

fn push_stringabel[
T: Stringable
](inout self, value: T, consider_escaping: Bool = False):
self.push(str(value), consider_escaping)

fn push_empty(inout self):
self.push("", False)

fn fill_up_row(inout self):
var num_empty = self._column_count - (self._elements_count % self._column_count)
if num_empty < self._column_count:
for _ in range(num_empty):
self.push_empty()

fn push(inout self, s: String, consider_escaping: Bool = True):
if consider_escaping and contains_any_of(
s, CR_CHAR, LF_CHAR, COMMA_CHAR, QUOTE_CHAR
):
return self.push(QUOTE_CHAR + escape_quotes_in(s) + QUOTE_CHAR, False)

var size = len(s)
self._extend_buffer_if_needed(size + 2)
if self._elements_count > 0:
if self._elements_count % self._column_count == 0:
self._buffer.offset(self.num_bytes).store(CR)
self._buffer.offset(self.num_bytes + 1).store(LF)
self.num_bytes += 2
else:
self._buffer.offset(self.num_bytes).store(COMMA)
self.num_bytes += 1

memcpy(self._buffer.offset(self.num_bytes), s._as_ptr(), size)
s._strref_keepalive()

self.num_bytes += size
self._elements_count += 1

@always_inline
fn _extend_buffer_if_needed(inout self, size: Int):
if self.num_bytes + size < self._capacity:
return
var new_size = self._capacity
while new_size < self.num_bytes + size:
new_size *= 2
var p = DTypePointer[DType.int8].alloc(new_size)
memcpy(p, self._buffer, self.num_bytes)
self._buffer.free()
self._capacity = new_size
self._buffer = p

fn finish(owned self) -> String:
self._finished = True
self.fill_up_row()
self._buffer.offset(self.num_bytes).store(CR)
self._buffer.offset(self.num_bytes + 1).store(LF)
self.num_bytes += 3
return string_from_pointer(self._buffer, self.num_bytes)


fn escape_quotes_in(s: String) -> String:
var indices = find_indices(s, QUOTE_CHAR)
var i_size = len(indices)
if i_size == 0:
return s

var size = len(s._buffer)
var p_current = s._as_ptr()
var p_result = DTypePointer[DType.int8].alloc(size + i_size)
var first_index = indices[0].to_int()
memcpy(p_result, p_current, first_index)
p_result.offset(first_index).store(QUOTE)
var offset = first_index + 1
for i in range(1, len(indices)):
var c_offset = indices[i - 1].to_int()
var length = indices[i].to_int() - c_offset
memcpy(p_result.offset(offset), p_current.offset(c_offset), length)
offset += length
p_result.offset(offset).store(QUOTE)
offset += 1

var last_index = indices[i_size - 1].to_int()
memcpy(p_result.offset(offset), p_current.offset(last_index), size - last_index)
return string_from_pointer(p_result, size + i_size)
Loading

0 comments on commit ea9ceab

Please sign in to comment.