
*&---------------------------------------------------------------------*
*& Report ZGP_EMOJI_CONV
*&---------------------------------------------------------------------*
*& Convert Emoji Characters in a Unicode String to Unicode Codepoints
*& Author: Gopu Packirisamy
*&---------------------------------------------------------------------*
REPORT zgp_emoji_conv NO STANDARD PAGE HEADING.
* Constants
CONSTANTS c_semicolon TYPE c VALUE ';'.
CONSTANTS c_uc_codepoint TYPE string VALUE '&#x'.
* Selection screen
SELECTION-SCREEN BEGIN OF BLOCK b WITH FRAME.
PARAMETERS p_string TYPE char255 LOWER CASE.
SELECTION-SCREEN END OF BLOCK b.
* Processing Logic
PERFORM conv_emoji2codepoint.
*&---------------------------------------------------------------------*
* Convert Emojis to Unicode Codepoint
*&---------------------------------------------------------------------*
FORM conv_emoji2codepoint.
DATA lv_xstr_idx TYPE sy-index.
DATA lv_hex TYPE xstring.
DATA lv_hex_i TYPE i.
DATA lv_cur_pos TYPE sy-index.
DATA lv_unicode_cp TYPE string.
DATA lv_string_utf8 TYPE char255.
DATA lv_string_conv TYPE string.
FIELD-SYMBOLS <fs_char>.
lv_xstr_idx = 0.
lv_cur_pos = 0.
* Convert text UTF-8 format (Hex string)
DATA(lo_converter) = cl_abap_conv_out_ce=>create( encoding = 'UTF-8' ).
lo_converter->reset( ).
lo_converter->write( EXPORTING data = p_string ).
lv_string_utf8 = lo_converter->get_buffer( ).
DATA(lv_len) = strlen( p_string ).
* Parse through the Hex string and identify each Unicode character
* according to its UTF-8 bit distribution pattern and
* apply codepoint conversion, if necessary
WHILE lv_cur_pos < lv_len.
ASSIGN lv_string_utf8+lv_xstr_idx(2) TO <fs_char> TYPE 'C'.
lv_hex = <fs_char>.
lv_hex_i = lv_hex.
IF lv_hex_i >= 240. " >= F0
ASSIGN lv_string_utf8+lv_xstr_idx(8) TO <fs_char> TYPE 'C'.
lv_hex = <fs_char>.
PERFORM conv_utf8_4b USING lv_hex lv_unicode_cp.
lv_xstr_idx = lv_xstr_idx + 8.
lv_cur_pos = lv_cur_pos + 2.
ELSEIF lv_hex_i >= 224. " >= E0
ASSIGN lv_string_utf8+lv_xstr_idx(6) TO <fs_char> TYPE 'C'.
lv_hex = <fs_char>.
PERFORM conv_utf8_3b USING lv_hex lv_unicode_cp.
lv_xstr_idx = lv_xstr_idx + 6.
lv_cur_pos = lv_cur_pos + 1.
ELSEIF lv_hex_i >= 192. " >= C0
ASSIGN lv_string_utf8+lv_xstr_idx(4) TO <fs_char> TYPE 'C'.
lv_hex = <fs_char>.
PERFORM conv_utf8_2b USING lv_hex lv_unicode_cp.
lv_xstr_idx = lv_xstr_idx + 4.
lv_cur_pos = lv_cur_pos + 1.
ELSE. " Other cases
lv_unicode_cp = COND #( WHEN p_string+lv_cur_pos(1) IS NOT INITIAL
THEN p_string+lv_cur_pos(1)
ELSE | | ).
lv_xstr_idx = lv_xstr_idx + 2.
lv_cur_pos = lv_cur_pos + 1.
ENDIF.
lv_string_conv = |{ lv_string_conv }{ lv_unicode_cp }|.
ENDWHILE.
WRITE: lv_string_conv.
ENDFORM.
*&---------------------------------------------------------------------*
* Convert 4 bytes UTF-8 character to Unicode Codepoint
*&---------------------------------------------------------------------*
FORM conv_utf8_4b USING iv_hex TYPE xstring ev_emoji_cp TYPE string.
DATA lv_emoji_hex TYPE xstring VALUE '000000'.
PERFORM copy_hex_bits USING: 6 iv_hex 4 lv_emoji_hex,
7 iv_hex 5 lv_emoji_hex,
8 iv_hex 6 lv_emoji_hex,
11 iv_hex 7 lv_emoji_hex,
12 iv_hex 8 lv_emoji_hex,
13 iv_hex 9 lv_emoji_hex,
14 iv_hex 10 lv_emoji_hex,
15 iv_hex 11 lv_emoji_hex,
16 iv_hex 12 lv_emoji_hex,
19 iv_hex 13 lv_emoji_hex,
20 iv_hex 14 lv_emoji_hex,
21 iv_hex 15 lv_emoji_hex,
22 iv_hex 16 lv_emoji_hex,
23 iv_hex 17 lv_emoji_hex,
24 iv_hex 18 lv_emoji_hex,
27 iv_hex 19 lv_emoji_hex,
28 iv_hex 20 lv_emoji_hex,
29 iv_hex 21 lv_emoji_hex,
30 iv_hex 22 lv_emoji_hex,
31 iv_hex 23 lv_emoji_hex,
32 iv_hex 24 lv_emoji_hex.
ev_emoji_cp = |{ c_uc_codepoint }{ lv_emoji_hex }{ c_semicolon }|.
ENDFORM.
*&---------------------------------------------------------------------*
* Convert 3 bytes UTF-8 character to Unicode Codepoint
*&---------------------------------------------------------------------*
FORM conv_utf8_3b USING iv_hex TYPE xstring ev_emoji_cp TYPE string.
DATA lv_emoji_hex TYPE xstring VALUE '0000'.
PERFORM copy_hex_bits USING: 5 iv_hex 1 lv_emoji_hex,
6 iv_hex 2 lv_emoji_hex,
7 iv_hex 3 lv_emoji_hex,
8 iv_hex 4 lv_emoji_hex,
11 iv_hex 5 lv_emoji_hex,
12 iv_hex 6 lv_emoji_hex,
13 iv_hex 7 lv_emoji_hex,
14 iv_hex 8 lv_emoji_hex,
15 iv_hex 9 lv_emoji_hex,
16 iv_hex 10 lv_emoji_hex,
19 iv_hex 11 lv_emoji_hex,
20 iv_hex 12 lv_emoji_hex,
21 iv_hex 13 lv_emoji_hex,
22 iv_hex 14 lv_emoji_hex,
23 iv_hex 15 lv_emoji_hex,
24 iv_hex 16 lv_emoji_hex.
ev_emoji_cp = |{ c_uc_codepoint }{ lv_emoji_hex }{ c_semicolon }|.
ENDFORM.
*&---------------------------------------------------------------------*
* Convert 2 bytes UTF-8 character to Unicode Codepoint
*&---------------------------------------------------------------------*
FORM conv_utf8_2b USING iv_hex TYPE xstring ev_emoji_cp TYPE string.
DATA lv_emoji_hex TYPE xstring VALUE '0000'.
PERFORM copy_hex_bits USING: 4 iv_hex 6 lv_emoji_hex,
5 iv_hex 7 lv_emoji_hex,
6 iv_hex 8 lv_emoji_hex,
7 iv_hex 9 lv_emoji_hex,
8 iv_hex 10 lv_emoji_hex,
11 iv_hex 11 lv_emoji_hex,
12 iv_hex 12 lv_emoji_hex,
13 iv_hex 13 lv_emoji_hex,
14 iv_hex 14 lv_emoji_hex,
15 iv_hex 15 lv_emoji_hex,
16 iv_hex 16 lv_emoji_hex.
ev_emoji_cp = |{ c_uc_codepoint }{ lv_emoji_hex }{ c_semicolon }|.
ENDFORM.
*&---------------------------------------------------------------------*
* Copy HEX bits from source byte to target byte
*&---------------------------------------------------------------------*
FORM copy_hex_bits USING iv_src_bit TYPE i
iv_src_str TYPE xstring
iv_trgt_bit TYPE i
CHANGING cv_trgt_str TYPE xstring.
GET BIT iv_src_bit OF iv_src_str INTO DATA(lv_bit).
SET BIT iv_trgt_bit OF cv_trgt_str TO lv_bit.
ENDFORM.
Emoji test => ??
?? sap ????
No Emoji text 🙂
You must be a registered user to add a comment. If you've already registered, sign in. Otherwise, register and sign in.
User | Count |
---|---|
6 | |
5 | |
3 | |
3 | |
3 | |
3 | |
2 | |
2 | |
2 | |
2 |