4b: Text Processing

    ++at

        ++  at
          |_  a=@
    

    XX document

    ++r:at

          ++  r
            ?:  ?&  (gte (met 3 a) 2)
                    |-
                    ?:  =(0 a)
                      &
                    =+  vis=(end 3 1 a)
                    ?&  ?|(=('-' vis) ?&((gte vis 'a') (lte vis 'z')))
                        $(a (rsh 3 1 a))
                    ==
                ==
              rtam
            ?:  (lte (met 3 a) 2)
              rud
            rux
    

    XX document

    ++rf:at

          ++  rf    `tape`[?-(a $& '&', $| '|', * !!) ~]
    

    XX document

    ++rn:at

            ++  rn    `tape`[?>(=(0 a) '~') ~]
    

    XX document

    ++rt:at

            ++  rt    `tape`['\'' (weld (mesc (trip a)) `tape`['\'' ~])]
    

    XX document

    ++rta:at

            ++  rta   rt
    

    XX document

    ++rtam:at

          ++  rtam  `tape`['%' (trip a)]
    

    XX document

    ++rub:at

          ++  rub   `tape`['0' 'b' (rum 2 ~ |=(b/@ (add '0' b)))]
    

    XX document

    ++rud:at

          ++  rud   (rum 10 ~ |=(b/@ (add '0' b)))
    

    XX document

    ++rum:at

          ++  rum
            |=  {b/@ c/tape d/$-(@ @)}
            ^-  tape
            ?:  =(0 a)
              [(d 0) c]
            =+  e=0
            |-  ^-  tape
            ?:  =(0 a)
              c
            =+  f=&(!=(0 e) =(0 (mod e ?:(=(10 b) 3 4))))
            %=  $
              a  (div a b)
              c  [(d (mod a b)) ?:(f [?:(=(10 b) ',' '-') c] c)]
              e  +(e)
            ==
          ::
    

    XX document

    ++rup:at

          ++  rup
            =+  b=(met 3 a)
            ^-  tape
            :-  '-'
            |-  ^-  tape
            ?:  (gth (met 5 a) 1)
              %+  weld
                $(a (rsh 5 1 a), b (sub b 4))
              `tape`['-' '-' $(a (end 5 1 a), b 4)]
            ?:  =(0 b)
              ['~' ~]
            ?:  (lte b 1)
              (trip (tos:po a))
            |-  ^-  tape
            ?:  =(2 b)
              =+  c=(rsh 3 1 a)
              =+  d=(end 3 1 a)
              (weld (trip (tod:po c)) (trip (tos:po (mix c d))))
            =+  c=(rsh 3 2 a)
            =+  d=(end 3 2 a)
            (weld ^$(a c, b (met 3 c)) `tape`['-' $(a (mix c d), b 2)])
          ::
    

    XX document

    ++ruv:at

          ++  ruv
            ^-  tape
            :+  '0'
              'v'
            %^    rum
                64
              ~
            |=  b/@
            ?:  =(63 b)
              '+'
            ?:  =(62 b)
              '-'
            ?:((lth b 26) (add 65 b) ?:((lth b 52) (add 71 b) (sub b 4)))
          ::
    

    XX document

    ++rux:at

          ++  rux  `tape`['0' 'x' (rum 16 ~ |=(b/@ (add b ?:((lth b 10) 48 87))))]
          --
          ::::::::::::::::::::::::::::::::::::::::::::::::::::::  ::
    

    XX document



    ++cass

    To lowercase

    Turn all occurences of uppercase letters in any ++tape into lowercase letters. Returns a ++tape.

    Accepts

    vib is a ++tape.

    Produces

    A ++tape.

    Source

        ++  cass                                                ::  lowercase
          |=  vib/tape
          ^-  tape
          (turn vib |=(a/@ ?.(&((gte a 'A') (lte a 'Z')) a (add 32 a))))
    

    Examples

        > (cass "JOHN DOE")
        "john doe"
    
        > (cass "abc ABC 123 !@#")
        "abc abc 123 !@#"
    
        > (cass "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsQqRrVvWwXxYyZz")
        "aabbccddeeffgghhiijjkkllmmnnooppqqrrssqqrrvvwwxxyyzz"
    

    ++crip

    Tape to cord

    Produce a ++cord from a ++tape.

    Accepts

    a is a ++tape.

    Produces

    A ++cord.

    Source

        ++  crip  |=(a=tape `@t`(rap 3 a))                      ::  tape to cord
    

    Examples

        > (crip "john doe")
        'john doe'
    
        > (crip "abc 123 !@#")
        'abc 123 !@#'
    
        > `@ud`(crip "abc")
        6.513.249
    

    ++cuss

    To uppercase

    Turn all occurences of lowercase letters in any ++tape into uppercase letters. Returns a ++tape.

    Accepts

    vib is a ++tape.

    Produces

    A ++tape.

    Source

        ++  cuss                                                ::  uppercase
          |=  vib/tape
          ^-  tape
          (turn vib |=(a/@ ?.(&((gte a 'a') (lte a 'z')) a (sub a 32))))
    

    Examples

        > (cuss "john doe")
        "JOHN DOE"
    
        > (cuss "abc ABC 123 !@#")
        "ABC ABC 123 !@#"
    
        > (cuss "AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsQqRrVvWwXxYyZz")
        "AABBCCDDEEFFGGHHIIJJKKLLMMNNOOPPQQRRSSQQRRVVWWXXYYZZ"
    

    ++mesc

    Escape special chars

    Escape special characters, used in ++show.

    Accepts

    vib is a ++tape.

    Produces

    A ++tape.

    Source

        ++  mesc                                                ::  ctrl code escape
          |=  vib/tape
          ^-  tape
          ?~  vib
            ~
          ?:  =('\\' i.vib)
            ['\\' '\\' $(vib t.vib)]
          ?:  ?|((gth i.vib 126) (lth i.vib 32) =(`@`39 i.vib))
            ['\\' (welp ~(rux at i.vib) '/' $(vib t.vib))]
          [i.vib $(vib t.vib)]
    

    Examples

        > (mesc "ham lus")
        "ham lus"
        > (mesc "bas\\hur")
        "bas\\\\hur"
        > (mesc "as'saß")
        "as\0x27/sa\0xc3/\0x9f/"
    

    ++runt

    Prepend n times

    Add a repetitions of character b to the head of ++tape c.

    Accepts

    a and b are atoms.

    c is a ++tape.

    Produces

    A ++tape.

    Source

        ++  runt                                                ::  prepend repeatedly
          |=  [[a/@ b/@] c/tape]
          ^-  tape
          ?:  =(0 a)
            c
          [b $(a (dec a))]
        ::
    

    Examples

        > (runt [2 '/'] "ham")
        "//ham"
        > (runt [10 'a'] "")
        "aaaaaaaaaa"
    

    ++sand

    Soft-cast by odor

    Soft-cast validity by odor.

    Accepts

    a is a ++span (@ta).

    b is an atom.

    Produces

    A (unit @).

    Source

        ++  sand                                                ::  atom sanity
          |=  a/@ta
          |=  b/@  ^-  (unit @)
          ?.(((sane a) b) ~ [~ b])
        ::
    

    Examples

        > `(unit @ta)`((sand %ta) 'sym-som')
        [~ ~.sym-som]
        > `(unit @ta)`((sand %ta) 'err!')
        ~
    

    ++sane

    Check odor validity

    Check validity by odor. Produces a gate.

    Accepts

    a is a ++span (@ta).

    b is an atom.

    Produces

    A boolean.

    Source

        ++  sane                                                ::  atom sanity
          |=  a/@ta
          |=  b/@  ^-  ?
          ?.  =(%t (end 3 1 a))
            ~|(%sane-stub !!)
          =+  [inx=0 len=(met 3 b)]
          ?:  =(%tas a)
            |-  ^-  ?
            ?:  =(inx len)  &
            =+  cur=(cut 3 [inx 1] b)
            ?&  ?|  &((gte cur 'a') (lte cur 'z'))
                    &(=('-' cur) !=(0 inx) !=(len inx))
                    &(&((gte cur '0') (lte cur '9')) !=(0 inx))
                ==
                $(inx +(inx))
            ==
          ?:  =(%ta a)
            |-  ^-  ?
            ?:  =(inx len)  &
            =+  cur=(cut 3 [inx 1] b)
            ?&  ?|  &((gte cur 'a') (lte cur 'z'))
                    &((gte cur '0') (lte cur '9'))
                    |(=('-' cur) =('~' cur) =('_' cur) =('.' cur))
                ==
                $(inx +(inx))
            ==
          |-  ^-  ?
          ?:  =(0 b)  &
          =+  cur=(end 3 1 b)
          ?:  &((lth cur 32) !=(10 cur))  |
          =+  len=(teff cur)
          ?&  |(=(1 len) =+(i=1 |-(|(=(i len) &((gte (cut 3 [i 1] b) 128) $(i +(i)))))))
              $(b (rsh 3 len b))
          ==
        ::
    

    Examples

        > ((sane %tas) %mol)
        %.y
        > ((sane %tas) 'lam')
        %.y
        > ((sane %tas) 'more ace')
        %.n
    

    ++teff

    UTF8 Length

    Produces the number of utf8 bytes.

    Accepts

    a is a @t.

    Produces

    An atom.

    Source

          |=  a/@t  ^-  @
          =+  b=(end 3 1 a)
          ~|  %bad-utf8
          ?:  =(0 b)
            ?>(=(`@`0 a) 0)
          ?>  |((gte b 32) =(10 b))
          ?:((lte b 127) 1 ?:((lte b 223) 2 ?:((lte b 239) 3 4)))
    

    Examples

        > (teff 'a')
        1
        > (teff 'ß')
        2
    

    ++trim

    Tape split

    Split first a characters off ++tape b.

    Accepts

    a is an atom.

    b is a ++tape.

    Produces

    A cell of ++tapes, p and q.

    Source

          ++  trim                                              ::  31-bit nonzero
            |=  key/@
            =+  syd=0xcafe.babe
            |-  ^-  @
            =+  haz=(spec syd key)
            =+  ham=(mix (rsh 0 31 haz) (end 0 31 haz))
            ?.(=(0 ham) ham $(syd +(syd)))
          --
        ::
    

    Examples

        > (trim 5 "lasok termun")
        [p="lasok" q=" termun"]
        > (trim 5 "zam")
        [p="zam" q=""]
    

    ++trip

    Cord to tape

    Produce a ++tape from ++cord.

    Accepts

    a is an atom.

    Produces

    A ++tape.

    Source

        ++  trip                                                ::  cord to tape
          ~/  %trip
          |=  a/@  ^-  tape
          ?:  =(0 (met 3 a))
            ~
          [^-(@ta (end 3 1 a)) $(a (rsh 3 1 a))]
        ::
    

    Examples

        > (trip 'john doe')
        "john doe"
        > (trip 'abc 123 !@#')
        "abc 123 !@#"
        > (trip 'abc')
        "abc"
    

    ++tuba

    UTF8 to UTF32 tape

    Convert ++tape to a ++list of codepoints (@c).

    Accepts

    a is a ++tape.

    Produces

    A ++list of codepoints @c.

    Source

        ++  tuba                                                ::  utf8 to utf32 tape
          |=  a/tape
          ^-  (list @c)
          (rip 5 (turf (rap 3 a)))                              ::  XX horrible
        ::
    

    Examples

        > (tuba "я тут")
        ~[~-~44f. ~-. ~-~442. ~-~443. ~-~442.]
        > (tuba "chars")
        ~[~-c ~-h ~-a ~-r ~-s]
    

    ++tufa

    UTF32 to UTF8 tape

    Wrap a ++list of utf32 codepoints into a utf8 ++tape.

    Accepts

    a is a ++list of @c.

    Produces

    A ++tape.

    Source

        ++  tufa                                                ::  utf32 to utf8 tape
          |=  a=(list @c)
          ^-  tape
          ?~  a  ""
          (weld (rip 3 (tuft i.a)) $(a t.a))
        ::
    

    Examples

        > (tufa ~[~-~44f. ~-. ~-~442. ~-~443. ~-~442.])
        "я тут"
        > (tufa ((list @c) ~[%a %b 0xb1 %c]))
        "ab±c"
    

    ++tuft

    UTF32 to UTF8 text

    Convert utf32 glyph to LSB utf8 ++cord.

    Accepts

    a is a codepoint (@c).

    Produces

    A ++cord.

    Source

        ++  tuft                                                ::  utf32 to utf8 text
          |=  a/@c
          ^-  @t
          %+  rap  3
          |-  ^-  (list @)
          ?:  =(`@`0 a)
            ~
          =+  b=(end 5 1 a)
          =+  c=$(a (rsh 5 1 a))
          ?:  (lte b 0x7f)
            [b c]
          ?:  (lte b 0x7ff)
            :*  (mix 0b1100.0000 (cut 0 [6 5] b))
                (mix 0b1000.0000 (end 0 6 b))
                c
            ==
          ?:  (lte b 0xffff)
            :*  (mix 0b1110.0000 (cut 0 [12 4] b))
                (mix 0b1000.0000 (cut 0 [6 6] b))
                (mix 0b1000.0000 (end 0 6 b))
                c
            ==
          :*  (mix 0b1111.0000 (cut 0 [18 3] b))
              (mix 0b1000.0000 (cut 0 [12 6] b))
              (mix 0b1000.0000 (cut 0 [6 6] b))
              (mix 0b1000.0000 (end 0 6 b))
              c
          ==
    

    Examples

        > (tuft `@c`%a)
        'a'
        > (tuft `@c`0xb6)
        '¶'
    

    ++turf

    UTF8 to UTF32 cord

    Convert utf8 (++cord) to utf32 codepoints.

    Accepts

    a is a @t.

    Produces

    A @c, UTF-32 codepoint.

    Source

        ++  turf                                                ::  utf8 to utf32
          |=  a/@t
          ^-  @c
          %+  rap  5
          |-  ^-  (list @c)
          =+  b=(teff a)
          ?:  =(0 b)  ~
          =+  ^=  c
              %+  can  0
              %+  turn
                ^-  (list {p/@ q/@})
                ?+  b  !!
                  $1  [[0 7] ~]
                  $2  [[8 6] [0 5] ~]
                  $3  [[16 6] [8 6] [0 4] ~]
                  $4  [[24 6] [16 6] [8 6] [0 3] ~]
                ==
              |=({p/@ q/@} [q (cut 0 [p q] a)])
          ?.  =((tuft c) (end 3 b a))  ~|(%bad-utf8 !!)
          [c $(a (rsh 3 b a))]
        ::
    

    Examples

        > (turf 'my ßam')
        ~-my.~df.am
        > 'я тут'
        'я тут'
        > (turf 'я тут')
        ~-~44f..~442.~443.~442.
        > `@ux`'я тут'
        0x82.d183.d182.d120.8fd1
        > `@ux`(turf 'я тут')
        0x442.0000.0443.0000.0442.0000.0020.0000.044f
    

    ++wack

    Coin format encode

    Escape ++span ~ as ~~ and _ as ~-. Used for printing.

    Accepts

    a is a ++span (@ta).

    Produces

    A ++span (@ta).

    Source

        ++  wack                                                ::  coin format
          |=  a/@ta
          ^-  @ta
          =+  b=(rip 3 a)
          %+  rap  3
          |-  ^-  tape
          ?~  b
            ~
          ?:  =('~' i.b)  ['~' '~' $(b t.b)]
          ?:  =('_' i.b)  ['~' '-' $(b t.b)]
          [i.b $(b t.b)]
        ::
    

    Examples

        > (wack '~20_sam~')
        ~.~~20~-sam~~
    
        > `@t`(wack '~20_sam~')
        '~~20~-sam~~'
    
        > ~(rend co %many ~[`ud/5 `ta/'~20_sam'])
        "._5_~~.~~20~-sam__"
    
        > ._5_~~.~~20~-sam__
        [5 ~.~20_sam]
    

    ++wick

    Coin format decode

    Unescape ++span ~~ as ~ and ~- as _.

    Accepts

    a is a an atom.

    Produces

    A ++span @ta.

    Source

        ++  wick                                                ::  knot format
          |=  a/@
          ^-  (unit @ta)
          =+  b=(rip 3 a)
          =-  ?^(b ~ (some (rap 3 (flop c))))
          =|  c/tape
          |-  ^-  {b/tape c/tape}
          ?~  b  [~ c]
          ?.  =('~' i.b)
            $(b t.b, c [i.b c])
          ?~  t.b  [b ~]
          ?-  i.t.b
            $'~'  $(b t.t.b, c ['~' c])
            $'-'  $(b t.t.b, c ['_' c])
            @     [b ~]
          ==
        ::
    

    Examples

        > `@t`(wick '~-ams~~lop')
        '_ams~lop'
    
        > `@t`(wick (wack '~20_sam~'))
        '~20_sam~'
    

    ++woad

    Unescape cord

    Unescape ++cord codepoints.

    Accepts

    a is a @ta.

    Produces

    A ++cord.

    Source

        ++  woad                                                ::  cord format
          |=  a/@ta
          ^-  @t
          %+  rap  3
          |-  ^-  (list @)
          ?:  =(`@`0 a)
            ~
          =+  b=(end 3 1 a)
          =+  c=(rsh 3 1 a)
          ?:  =('.' b)
            [' ' $(a c)]
          ?.  =('~' b)
            [b $(a c)]
          =>  .(b (end 3 1 c), c (rsh 3 1 c))
          ?+  b  =-  (weld (rip 3 (tuft p.d)) $(a q.d))
                 ^=  d
                 =+  d=0
                 |-  ^-  {p/@ q/@}
                 ?:  =('.' b)
                   [d c]
                 ?<  =(0 c)
                 %=    $
                    b  (end 3 1 c)
                    c  (rsh 3 1 c)
                    d  %+  add  (mul 16 d)
                       %+  sub  b
                       ?:  &((gte b '0') (lte b '9'))  48
                       ?>(&((gte b 'a') (lte b 'z')) 87)
                 ==
            $'.'  ['.' $(a c)]
            $'~'  ['~' $(a c)]
          ==
        ::
    

    Examples

        > (woad ~.~b6.20.as)
        '¶20 as'
    

    ++wood

    Escape cord

    Escape ++cord codepoints.

    Accepts

    a is a ++span (@ta).

    Produces

    A ++span (@ta).

    Source

        ++  wood                                                ::  cord format
          |=  a/@t
          ^-  @ta
          %+  rap  3
          |-  ^-  (list @)
          ?:  =(`@`0 a)
            ~
          =+  b=(teff a)
          =+  c=(turf (end 3 b a))
          =+  d=$(a (rsh 3 b a))
          ?:  ?|  &((gte c 'a') (lte c 'z'))
                  &((gte c '0') (lte c '9'))
                  =(`@`'-' c)
              ==
            [c d]
          ?+  c
            :-  '~'
            =+  e=(met 2 c)
            |-  ^-  tape
            ?:  =(0 e)
              ['.' d]
            =.  e  (dec e)
            =+  f=(rsh 2 e c)
            [(add ?:((lte f 9) 48 87) f) $(c (end 2 e c))]
          ::
    

    Examples

        > (wood 'my ßam')
        ~.my.~df.am