AI RFC 1 (for RFC 9402)

Attempt 1

system> Hello Python 3.11 expert programming helper! Can you help me translate a grammar from ABNF notation to a python Lark parser?

assistant> Sure thing! What is the ABNF notation you’re working with?

user>

SEQUENCE  =  POSITION / POSITION "=>" SEQUENCE
POSITION  =  ADJACENT
ADJACENT  =  OVER / ADJACENT "+" OVER
OVER      =  MULTIPLE / MULTIPLE "/" POSITION
MULTIPLE  =  CONCAT / NUMBER [ "*" ] MULTIPLE / NUMBER "/" MULTIPLE
CONCAT    =  SUBJECT [ NUMBER ] / [ PARTIAL ] CONTAINER [ PARTIAL ]
CONTAINER =  "[" OPT-POS "]" / "(" OPT-POS ")"
CONTAINER =/ "{" OPT-POS "}" / "<" OPT-POS ">"
OPT-POS   =  [ POSITION ]
SUBJECT   =  CAT / 1*ALPHA / "@"
CAT       =  "cat" / PARTIAL
PARTIAL   =  "c" / "a" / "t" / "ca" / "at"
ALPHA     =   %x41-5A / %x61-7A
NUMBER    =  1*DIGIT
DIGIT     =  "0" / "1" / "2" / "3" / "4"
DIGIT     =/ "5" / "6" / "7" / "8" / "9"

assistant> Certainly! Here’s a Lark parser based on the ABNF notation you provided:

from lark import Lark, Transformer

grammar = r"""
    SEQUENCE  :  POSITION | POSITION "=>" SEQUENCE
    POSITION  :  ADJACENT
    ADJACENT  :  OVER | ADJACENT "+" OVER
    OVER      :  MULTIPLE | MULTIPLE "/" POSITION
    MULTIPLE  :  CONCAT | NUMBER [ "*" ] MULTIPLE | NUMBER "/" MULTIPLE
    CONCAT    :  SUBJECT [ NUMBER ] | [ PARTIAL ] CONTAINER [ PARTIAL ]
    CONTAINER :  "[" OPT_POS "]" | "(" OPT_POS ")" | "{" OPT_POS "}" | "<" OPT_POS ">"
    OPT_POS   :  [ POSITION ]
    SUBJECT   :  CAT | /[a-zA-Z]+/ | "@"
    CAT       :  "cat" | PARTIAL
    PARTIAL   :  "c" | "a" | "t" | "ca" | "at"
    NUMBER    :  /[0-9]+/
    %import common.CNAME -> NAME
    %import common.WS_INLINE
    %ignore WS_INLINE
"""

class ABNFTransformer(Transformer):
    def start(self, items):
        return items[0]

    def sequence(self, items):
        if len(items) == 2:
            return (items[0], items[1])
        else:
            return items[0]

    def position(self, items):
        return items[0]

    def adjacent(self, items):
        if len(items) == 1:
            return items[0]
        else:
            return (items[0], items[1])

    def over(self, items):
        if len(items) == 1:
            return items[0]
        else:
            return (items[0], items[1])

    def multiple(self, items):
        if len(items) == 1:
            return items[0]
        elif len(items) == 2:
            return (int(items[0]), items[1])
        else:
            return (int(items[0]), items[2])

    def concat(self, items):
        if len(items) == 1:
            return items[0]
        elif len(items) == 2:
            return (items[0], int(items[1]))
        elif len(items) == 3:
            return (items[1], None)
        else:
            return (items[1], int(items[2]))

    def container(self, items):
        return items[0]

    def opt_pos(self, items):
        if items:
            return items[0]
        else:
            return None

    def subject(self, items):
        return items[0]

    def cat(self, items):
        return "cat"

    def partial(self, items):
        return "".join(items)

    def number(self, items):
        return items[0]


parser = Lark(grammar, transformer=ABNFTransformer())

This parser defines the grammar rules, and also includes a Transformer class that processes the AST produced by the parser.

user> Can you generate some test cases for this grammar? Here are some examples and their meanings (include comments in your test examples):

[cat]

Figure 1: A Cat in a Box

[cat] + cat

Figure 2: A Cat in a Box Next to a Cat Not in a Box

cat / [cat]

Figure 3: A Cat over a Box Containing Another Cat

[c]at

Figure 4: A Cat with Its Head inside a Box

3 * cat

Figure 5: 3 Cats Side by Side

3 / cat

Figure 6: 3 Cats on Top of Each Other

cat + cat / [cat]

Figure 7: A Cat Standing Next to a Box That Has a Cat on Top and inside of It

<cat + cat> / [cat]

Figure 8: Two Cats Standing on a Box with Another Cat inside of It

cat1 + [cat2] => cat2 + [cat1]

Figure 9: A Cat inside a Box and a Cat outside Swap Places

assistant> Sure, here are some test cases you can use to test the parser:

def test_grammar():
    assert parser.parse("[cat]").children == ["cat"]
    assert parser.parse("[cat]+cat").children == [("cat", None), "cat"]
    assert parser.parse("cat/[cat]").children == ["cat", ("[cat]", None)]
    assert parser.parse("[c]at").children == [("c", None), "at"]
    assert parser.parse("3*cat").children == [(3, "cat")]
    assert parser.parse("3/cat").children == [(3, None), "cat"]
    assert parser.parse("cat+cat/[cat]").children == [("cat", None), ("cat", None), ("[cat]", None)]
    assert parser.parse("<cat+cat>/[cat]").children == [(("<", ("cat", None), "+", ("cat", None), ">"), None), ("[cat]", None)]
    assert parser.parse("cat1+[cat2]=>cat2+[cat1]").children == [("cat1", None), ("[cat2]", None), ("=>", None), ("cat2", None), ("[cat1]", None)]

These test cases cover all the example cases you provided, and should ensure that the parser is able to correctly parse the input and generate the expected AST.

user> Please comment your tests better.

assistant> Sure, here are the same test cases, with comments explaining what each test case is checking:

def test_parser():

    # Check that a single subject is correctly parsed
    assert parser.parse("[cat]").children == ["cat"]

    # Check that an adjacent subject is parsed as a tuple
    assert parser.parse("[cat]+cat").children == [("cat", None), "cat"]

    # Check that a subject over a container is parsed as a tuple
    assert parser.parse("cat/[cat]").children == ["cat", ("[cat]", None)]

    # Check that a partial subject is correctly parsed
    assert parser.parse("[c]at").children == [("c", None), "at"]

    # Check that a multiple subject is correctly parsed as a tuple
    assert parser.parse("3*cat").children == [(3, "cat")]

    # Check that a multiple subject over a position is correctly parsed as a tuple
    assert parser.parse("3/cat").children == [(3, None), "cat"]

    # Check that a concatenated subject with container is correctly parsed as a tuple
    assert parser.parse("cat+cat/[cat]").children == [("cat", None), ("cat", None), ("[cat]", None)]

    # Check that a concatenated subject with nested container is correctly parsed as a tuple
    assert parser.parse("<cat+cat>/[cat]").children == [
        (("<", ("cat", None), "+", ("cat", None), ">"), None), 
        ("[cat]", None)
    ]

    # Check that a sequence of positions and containers is correctly parsed as a tuple
    assert parser.parse("cat1+[cat2]=>cat2+[cat1]").children == [
        ("cat1", None), 
        ("[cat2]", None), 
        ("=>", None), 
        ("cat2", None), 
        ("[cat1]", None)
    ]

I hope that makes it clear what each test is checking!

user> Please use more useful comments. Saying “parsed as a tuple” doesn’t provide information.

assistant> I apologize for the unhelpful comments earlier. Here are the same test cases with more informative comments:

def test_parser():

    # Check that a single subject is correctly parsed
    assert parser.parse("[cat]").children == ["cat"]  # Expected output is the subject "cat" in a container

    # Check that an adjacent subject is correctly parsed as a tuple
    assert parser.parse("[cat]+cat").children == [("cat", None), "cat"]  # Expected output is a tuple of adjacent subjects: "cat" in a container and "cat" outside a container

    # Check that a subject over a container is correctly parsed as a tuple
    assert parser.parse("cat/[cat]").children == ["cat", ("[cat]", None)]  # Expected output is a tuple of a subject "cat" and a container containing another subject "cat"

    # Check that a partial subject is correctly parsed
    assert parser.parse("[c]at").children == [("c", None), "at"]  # Expected output is the subject "cat" in a container with the partial "c"

    # Check that a multiple subject is correctly parsed as a tuple
    assert parser.parse("3*cat").children == [(3, "cat")]  # Expected output is a tuple of 3 subjects "cat" in a row

    # Check that a multiple subject over a position is correctly parsed as a tuple
    assert parser.parse("3/cat").children == [(3, None), "cat"]  # Expected output is a tuple of 3 subjects "cat" stacked vertically

    # Check that a concatenated subject with container is correctly parsed as a tuple
    assert parser.parse("cat+cat/[cat]").children == [("cat", None), ("cat", None), ("[cat]", None)]  # Expected output is a tuple of concatenated subjects "cat" next to each other, followed by a container containing another subject "cat"

    # Check that a concatenated subject with nested container is correctly parsed as a tuple
    assert parser.parse("<cat+cat>/[cat]").children == [
        (("<", ("cat", None), "+", ("cat", None), ">"), None), 
        ("[cat]", None)
    ]  # Expected output is a tuple of concatenated subjects "cat" in a container, enclosed in angled brackets "<" ">"; followed by a container containing another subject "cat"

    # Check that a sequence of positions and containers is correctly parsed as a tuple
    assert parser.parse("cat1+[cat2]=>cat2+[cat1]").children == [
        ("cat1", None), 
        ("[cat2]", None), 
        ("=>", None), 
        ("cat2", None), 
        ("[cat1]", None)
    ]  # Expected output is a tuple representing a sequence of movements of subjects "cat" between two containers, where "cat1" and "cat2" are swapped between the two containers

(out of tokens)

Attempt 2

system> Hello Python 3.11 expert programming helper! Can you help me translate a grammar from ABNF notation to a python Lark parser?

assistant> Of course! Please provide me with the ABNF notation grammar and any additional specifications.

user> Here is the definition and some use cases: 2.1. Terminology

This document uses specific terms to refer to items being depicted by the notation described herein. To avoid ambiguity, such terms are defined as follows:

Subject:

    The term "subject" is used in this document to refer to the object that is the focus in the media to be annotated. This usually is an animate object, specifically a cat. An annotation can have multiple subjects interacting in various ways.

Cat:

    A cat is a special kind of subject of feline origin. This document will assume a house cat is present in the source media; however, other felines are also acceptable.
Container:

    The term "container" is used to refer to inanimate objects inside of which one or more subjects can be located. Most commonly, this will be a cardboard box; however, a variety of containers can be used.

2.2. Grammar

The grammar is defined using the ABNF notation [RFC5234].

SEQUENCE  =  POSITION / POSITION "=>" SEQUENCE
POSITION  =  ADJACENT
ADJACENT  =  OVER / ADJACENT "+" OVER
OVER      =  MULTIPLE / MULTIPLE "/" POSITION
MULTIPLE  =  CONCAT / NUMBER [ "*" ] MULTIPLE / NUMBER "/" MULTIPLE
CONCAT    =  SUBJECT [ NUMBER ] / [ PARTIAL ] CONTAINER [ PARTIAL ]
CONTAINER =  "[" OPT-POS "]" / "(" OPT-POS ")"
CONTAINER =/ "{" OPT-POS "}" / "<" OPT-POS ">"
OPT-POS   =  [ POSITION ]
SUBJECT   =  CAT / 1*ALPHA / "@"
CAT       =  "cat" / PARTIAL
PARTIAL   =  "c" / "a" / "t" / "ca" / "at"
ALPHA     =   %x41-5A / %x61-7A
NUMBER    =  1*DIGIT
DIGIT     =  "0" / "1" / "2" / "3" / "4"
DIGIT     =/ "5" / "6" / "7" / "8" / "9"

3. Elements
3.1. Subjects
3.1.1. Cats

The standard notation for a cat is the word cat.
3.1.2. Partial Cats

When referencing cats partly inside a container, the annotation MUST contain the full cat mark adequately split inside and outside the container.
If a cat is only partly visible in the frame of the picture or video, the annotation MAY only reference the visible portion of the cat.The partial cat notations are as follows:

c:
    marks the head of the cat.

a:
    marks the body of the cat.
t:
    marks the tail of the cat.
ca:
    marks the head and body of the cat.
at:
    marks the body and tail of the cat.

The annotation for a partial cat SHOULD use the terms mentioned above that best describe the portion of the cat that is being referenced.
3.1.3. Other Animals

Other animals or animate objects SHOULD be represented with a suitable word describing the species of such animal. The cat-specific words described in this document MUST NOT be used for non-feline subjects.
3.1.4. Balls of Yarn

Balls of yarn SHOULD be represented with @.
3.2. Containers

When a cat or other subject is inside a container, the container notation MUST be used. Such notation is denoted by its subject being between brackets. The type of bracket depends on the shape of the container as follows:
Square brackets represent boxes or other containers with a rectangular opening.
Parentheses represent containers with a round opening or shape.Curly braces SHALL be used to represent soft containers without a fixed shape.Additionally, angle brackets MAY be used to group subjects outside a container. Such annotations MUST NOT contain partial cats.

assistant> Here’s a Lark parser for the provided ABNF notation:

grammar = """
    SEQUENCE : POSITION | POSITION "=>" SEQUENCE

    POSITION : ADJACENT

    ADJACENT : OVER | ADJACENT "+" OVER

    OVER : MULTIPLE | MULTIPLE "/" POSITION

    MULTIPLE : CONCAT | NUMBER [ "*" ] MULTIPLE | NUMBER "/" MULTIPLE

    CONCAT : SUBJECT [ NUMBER ] | [ PARTIAL ] CONTAINER [ PARTIAL ]

    CONTAINER : "[" OPT_POS "]" | "(" OPT_POS ")" | "{" OPT_POS "}" | "<" OPT_POS ">"

    OPT_POS : [ POSITION ]

    SUBJECT : CAT | /[a-zA-Z]+/ | "@"

    CAT : "cat" | PARTIAL

    PARTIAL : "c" | "a" | "t" | "ca" | "at"

    NUMBER : /[0-9]+/

    %import common.DIGIT
    %import common.WS_INLINE
    %ignore WS_INLINE
"""

This Lark grammar includes all the existing productions of the ABNF notation along with the required rules. Please let me know if you need any further help.

user> Test cases please? Well commented with what the test is testing.

assistant> Here are a few test cases along with comments explaining what they are testing:

from lark import Lark, Transformer

# Create the parser
parser = Lark(grammar, start='SEQUENCE')

# Test parsing a simple position
def test_position_only():
    input_str = "OVER"
    expected_output = "(expression (position (adjacent (over MULTIPLE))))"
    tree = parser.parse(input_str)
    assert str(tree) == expected_output

# Test parsing a position followed by a sequence
def test_position_then_sequence():
    input_str = "OVER=>OVER"
    expected_output = "(expression (position (adjacent (over MULTIPLE ))) =>"\
                      " (sequence (position (adjacent (over MULTIPLE))))"
    tree = parser.parse(input_str)
    assert str(tree) == expected_output

# Test parsing a concatenated expression with a subject inside a container
def test_concatenation_with_container():
    input_str = "[OVER][ca]cat"
    expected_output = "(expression (concatenation (container [ (opt_pos (position "\
                      "(adjacent (over MULTIPLE )))) ]) (partial ca) cat))"
    tree = parser.parse(input_str)
    assert str(tree) == expected_output

# Test parsing a multiple expression with a partial cat
def test_multiple_with_partial_cat():
    input_str = "3*c"
    expected_output = "(expression (multiple (concatenation (partial c)) 3))"
    tree = parser.parse(input_str)
    assert str(tree) == expected_output

# Test parsing a sequence with multiple positions and concatenation
def test_sequence_with_positions_and_concatenation():
    input_str = "OVER+OVER=>[OVER][t]cat"
    expected_output = "(expression (position (adjacent (over MULTIPLE + (adjacent (over "\
                      "MULTIPLE ))))) => (sequence (concatenation (container [ (opt_pos "\
                      "(position (adjacent (over MULTIPLE )))) ]) (partial t) cat)))"
    tree = parser.parse(input_str)
    assert str(tree) == expected_output

These test cases cover various scenarios of the provided ABNF notation, including a simple position, a position followed by a sequence, a concatenated expression with a subject inside a container, a multiple expression with a partial cat, and a sequence with multiple positions and concatenation.

user> Please incoroprate these into new test cases too:

This appendix provides some examples of the Concat notation.

[cat]

Figure 1: A Cat in a Box

[cat] + cat

Figure 2: A Cat in a Box Next to a Cat Not in a Box

cat / [cat]

Figure 3: A Cat over a Box Containing Another Cat

[c]at

Figure 4: A Cat with Its Head inside a Box

3 * cat

Figure 5: 3 Cats Side by Side

3 / cat

Figure 6: 3 Cats on Top of Each Other

cat + cat / [cat]

Figure 7: A Cat Standing Next to a Box That Has a Cat on Top and inside of It

<cat + cat> / [cat]

Figure 8: Two Cats Standing on a Box with Another Cat inside of It

cat1 + [cat2] => cat2 + [cat1]

Figure 9: A Cat inside a Box and a Cat outside Swap Places

assistant> Here are the amended test cases:

from lark import Lark, Transformer

# Create the parser
parser = Lark(grammar, start='SEQUENCE')

# Test parsing a subject inside a container
def test_subject_inside_container():
    input_str = "[cat]"
    expected_output = "(expression (concatenation (container [ (opt_pos None) ]) "\
                      "cat))"
    tree = parser.parse(input_str)
    assert str(tree) == expected_output

# Test parsing multiple subjects
def test_multiple_subjects():
    input_str = "[cat] + cat"
    expected_output = "(expression (concatenation (container [ (opt_pos None) ])"\
                      " cat ) + SUBJECT )"
    tree = parser.parse(input_str)
    assert str(tree) == expected_output

# Test parsing a subject over a container
def test_subject_over_container():
    input_str = "cat / [cat]"
    expected_output = "(expression SUBJECT / (concatenation (container [ "\
                      "(opt_pos None) ]) cat ))"
    tree = parser.parse(input_str)
    assert str(tree) == expected_output

# Test parsing partial cat inside a container
def test_partial_cat_inside_container():
    input_str = "[c]at"
    expected_output = "(expression (concatenation (container [ (opt_pos None) ]) "\
                      "(partial c) cat))"
    tree = parser.parse(input_str)
    assert str(tree) == expected_output

# Test parsing multiplication of subjects
def test_multiplication_of_subjects():
    input_str = "3 * cat"
    expected_output = "(expression (multiple (concatenation SUBJECT) 3))"
    tree = parser.parse(input_str)
    assert str(tree) == expected_output

# Test parsing division of subjects
def test_division_of_subjects():
    input_str = "3 / cat"
    expected_output = "(expression 3 / SUBJECT)"
    tree = parser.parse(input_str)
    assert str(tree) == expected_output

# Test parsing multiple positions and concatenation
def test_multiple_positions_and_concatenation():
    input_str = "cat + cat / [cat]"
    expected_output = "(expression (concatenation SUBJECT + SUBJECT / (concatenation "\
                      "(container [ (opt_pos None) ]) cat )))"
    tree = parser.parse(input_str)
    assert str(tree) == expected_output

# Test parsing with angle brackets
def test_with_angle_brackets():
    input_str = "<cat + cat> / [cat]"
    expected_output = "(expression (concatenation (container < (opt_pos None) >) "\
                      "(concatenation SUBJECT + SUBJECT) / (concatenation (container "\
                      "[ (opt_pos None) ]) cat )))"
    tree = parser.parse(input_str)
    assert str(tree) == expected_output

# Test swapping subjects within a sequence
def test_swapping_within_sequence():
    input_str = "cat1 + [cat2] => cat2 + [cat1]"
    expected_output = "(expression (position (adjacent (concatenation (container "\
                      "[ (opt_pos None) ]) cat1 ) + (concatenation (container [ "\
                      "(opt_pos None) ]) cat2 ))) => (sequence (position (adjacent "\
                      "(concatenation (container [ (opt_pos None) ]) cat2 ) + (concatenation "\
                      "(container [ (opt_pos None) ]) cat1 )))))"
    tree = parser.parse(input_str)
    assert str(tree) == expected_output

# Test more complex sequence
def test_complex_sequence():
    input_str = "[cat] + cat => <[cat] + cat> / [cat]"
    expected_output = "(expression (position (adjacent (concatenation (container [ "\
                      "(opt_pos None) ]) cat ) + SUBJECT )) => (sequence (position "\
                      "(adjacent (concatenation (container < (opt_pos None) >) "\
                      "(concatenation (container [ (opt_pos None) ]) cat ) + SUBJECT / "\
                      "(concatenation (container [ (opt_pos None) ]) cat ))))))"
    tree = parser.parse(input_str)
    assert str(tree) == expected_output

# Test cats on top of each other
def test_cats_on_top_of_each_other():
    input_str = "3 / cat"
    expected_output = "(expression 3 / SUBJECT)"
    tree = parser.parse(input_str)
    assert str(tree) == expected_output

# Test swapping places
def test_swapping_places():
    input_str = "cat1 + [cat2] => cat2 + [cat1]"
    expected_output = "(expression (position (adjacent (concatenation (container [ "\
                      "(opt_pos None) ]) cat1 ) + (concatenation (container [ "\
                      "(opt_pos None) ]) cat2 ))) => (sequence (position (adjacent "\
                      "(concatenation (container [ (opt_pos None) ]) cat2 ) + (concatenation "\
                      "(container [ (opt_pos None) ]) cat1 )))))"
    tree = parser.parse(input_str)
    assert str(tree) == expected_output

These test cases exercise a variety of string inputs and expected outputs, such as cats inside and outside of containers, concatenation, multiplication, division, and swapping of subjects.