Boost.Xpressiveの落書き

  • boost::regexとboost::spiritをまとめて置きかえるライブラリであるらしい
  • ヘッダーのみのライブラリでビルドは必要ない
  • 正規表現を文字からではなく、オブジェクトから組み立てる(文字列の正規表現も扱える)


[]#include[][] "stdafx.h"[]

[]#include[][] <sstream>[]
[]#include[][] <iostream>[]
[]#include[][] <fstream>[]
[]#include[][] <cassert>[]
[]#include[][] <iterator>[]
[]#include[][] <boost/xpressive/xpressive_static.hpp>[]


[]namespace[] {
[]using[] []namespace[] []boost[]::[]xpressive[];

[]// regex[]
[]wsregex[] []re_cpp_comment[] = []L[][]"//"[] >> *~[]_n[];
[]wsregex[] []re_c_comment[] = []L[][]"/*"[] >> !*[]_[] >> []L[][]"*/"[];

[]wchar_t[] []sq[] = []L[][]'\''[];
[]wchar_t[] []dq[] = []L[][]'"'[];
[]wchar_t[] []em[] = []L[][]'\\'[];
[]wsregex[] []re_ec[] = []em[] >> []_[];
[]wsregex[] []re_char_literal[] = []sq[] >> *(~([]set[]=[]sq[],[]em[])|[]re_ec[]) >> []sq[];
[]wsregex[] []re_string_literal[] = []dq[] >> *(~([]set[]=[]dq[],[]em[])|[]re_ec[]) >> []dq[];

[]using[] []namespace[] []std[];

[]// format[]
[]wstring[] []fmt_whole[]([]L[][]"<pre class=cpp_source>$1</pre>"[]);

[]mark_tag[] []comment[](1), []string_literal[](2), []other[](3);
[]wstring[] []fmt_comment[]([]L[][]"<span class=cpp_comment>$1</span>"[]);
[]wstring[] []fmt_string_literal[]([]L[][]"<span class=cpp_string_literal>$1</span>"[]);


[]// grammer[]
[]wsregex[] []re_cpp_code[] =
( []comment[] = ([]re_cpp_comment[]) ) |
( []string_literal[] = ([]re_char_literal[]|[]re_string_literal[]) )
;

[]wstring[] []xml_escape_replace[]([]wstring[] []const[]& []src[]) {
[]wstring[] []out[] = []src[];
[]out[] = []regex_replace[]([]out[], []L[][]"&"[] >> []epsilon[], []wstring[]([]L[][]"&amp;"[]));
[]out[] = []regex_replace[]([]out[], []L[][]"<"[] >> []epsilon[], []wstring[]([]L[][]"&lt;"[]));
[]out[] = []regex_replace[]([]out[], []L[][]">"[] >> []epsilon[], []wstring[]([]L[][]"&gt;"[]));
[]out[] = []regex_replace[]([]out[], []L[][]"\""[] >> []epsilon[], []wstring[]([]L[][]"&quot;"[]));

[]out[] = []regex_replace[]([]out[], []L[][]"]"[] >> []epsilon[], []wstring[]([]L[][]"&#93;"[]));
[]out[] = []regex_replace[]([]out[], []L[][]"["[] >> []epsilon[], []wstring[]([]L[][]"&#91;"[]));
[]return[] []out[];
}

[]// the parse[]
[]void[] []cpp_parse[]([]char[] []const[]* []filename[]) {
[]// open input file[]
[]wifstream[] []fin[]([]filename[]);
[]assert[]([]fin[].[]good[]());

[]wstring[] []in[];
[]getline[]([]fin[], []in[], []L[][]'\0'[]);

[]// wstring output = regex_replace(input, re_cpp_code, fmt_cpp_code);[]

[]wstringstream[] []out[];

[]wsregex_iterator[] []cur[]([]in[].[]begin[](), []in[].[]end[](), []re_cpp_code[]); []// trigger[]
[]wstring[]::[]const_iterator[] []first_pos[] = []in[].[]begin[]();
[]wsregex_iterator[] []end[];
[]for[]( ; []cur[] != []end[]; ++[]cur[])
{
[]wsmatch[] []const[]& []what[] = *[]cur[];
[]// pass unmatched string to output[]
[]wstring[]::[]const_iterator[] []last_pos[] = []in[].[]begin[]();
[]advance[]([]last_pos[], []what[].[]position[](0));
[]out[] << []xml_escape_replace[]([]wstring[]([]first_pos[], []last_pos[]));
[]// update first_pos[]
[]first_pos[] = []last_pos[];
[]advance[]([]first_pos[], []what[].[]length[](0));

[]wstring[] []str[];
[]if[] ([]what[][[]comment[]]) {
[]// Note. I can't understand why format_first_only required...if you miss it,[]
[]// ABC ---> <span>ABC</span><span></span>[]
[]str[] = []what[][[]comment[]].[]str[]();
[]str[] = []xml_escape_replace[]([]str[]);
[]str[] = []regex_replace[]([]str[], ([]s1[]=*[]_[]), []fmt_comment[], []regex_constants[]::[]format_first_only[]);

} []else[] []if[] ([]what[][[]string_literal[]]) {
[]str[] = []what[][[]string_literal[]].[]str[]();
[]str[] = []xml_escape_replace[]([]str[]);
[]str[] = []regex_replace[]([]str[], ([]s1[]=*[]_[]), []fmt_string_literal[], []regex_constants[]::[]format_first_only[]);
} []else[] {
[]assert[]([]false[]);
}

[]out[] << []str[];
}

[]out[] << []wstring[]( []first_pos[], []in[].[]end[]() );

[]wstring[] []result[] = []out[].[]str[]();
[]result[] = []regex_replace[]([]result[], ([]s1[]=*[]_[]), []fmt_whole[], []regex_constants[]::[]format_first_only[]);

[]// open output file[]
[]string[] []out_name[]([]filename[]);
[]out_name[] += []".html"[];
[]wofstream[] []fout[]([]out_name[].[]c_str[]());
[]assert[]([]fout[].[]good[]());
[]fout[] << []result[];
}
}

[]int[] []main[]([]int[] []argc[], []char[]* []argv[][])
{
[]std[]::[]locale[]::[]global[]([]std[]::[]locale[]([]"ja"[]));

[]if[] ([]argc[] > 1) {
[]for[] ([]int[] []i[] = 1; []i[] < []argc[]; ++[]i[]) {
[]cout[] << []argv[][[]i[]] << []endl[];
[]cpp_parse[]([]argv[][[]i[]]);
}
} []else[] {
[]cerr[] << []"---NO FILENAME GIVEN---"[] << []endl[];
}
[]wcin[].[]ignore[]();
[]return[] 0;
}