cpp2hatena.cpp


[]// code2xml.cpp : コンソール アプリケーションのエントリ ポイントを定義します。[]
[]//[]

[]#include[][] "stdafx.h"[]

[]#include[][] <fstream>[]
[]#include[][] <iostream>[]
[]#include[][] <sstream>[]
[]#include[][] <string>[]


[]#include[][] <vector>[]
[]#include[][] <algorithm>[]
[]#include[][] <boost/regex.hpp>[]
[]#include[][] <cassert>[]

[]#include[][] "cpp2hatena.hpp"[]

[]namespace[] {
[]using[] []namespace[] []std[];
[]namespace[] []bst[] = []boost[];
[]namespace[] []spl[] = []boost[]::[]spirit[];

[]// replacements[]
[]// wstring rp_source(L"<pre class=cpp_source>$1</pre>");[]
[]wstring[] []rp_whole_source[]([]L[][]"<pre class=cpp_source>\n$1</pre>\n====\n<pre class=cpp_source>\n$2</pre>"[]);
[]bst[]::[]wregex[] []re_whole_source[]([]L[][]"\\A((?:[^\\n]*\\n){1,9})(.*)"[]);

[]wstring[] []rp_comment[]([]L[][]"<span class=cpp_comment>[]$1[]</span>"[]);
[]wstring[] []rp_string_literal[]([]L[][]"<span class=cpp_string_literal>[]$1[]</span>"[]);
[]wstring[] []rp_keyword[]([]L[][]"<span class=cpp_keyword>[]$1[]</span>"[]);
[]wstring[] []rp_identifier[]([]L[][]"[]$1[]"[]);
[]// preprocessor replacements[]
[]wstring[] []rp_pp_directive[]([]L[][]"<span class=cpp_pp_directive>[]$1[]</span>"[]);
[]wstring[] []rp_pp_include_path[]([]L[][]"<span class=cpp_pp_include_path>[]$1[]</span>"[]);
[]wstring[] []rp_pp_pragma_token[]([]L[][]"<span class=cpp_pp_pragma_token>[]$1[]</span>"[]);

[]// regex utils[]
[]bst[]::[]wregex[] []re_whole[]([]L[][]"(.*)"[]);
[]bst[]::[]wregex[] []re_line[]([]L[][]"^([^\\n]+)"[]);

[]// helper[]
[]template[]< []typename[] []Iterator[] >
[]wstring[] []make_wstring[]([]Iterator[] []first_[], []Iterator[] []last_[]) {
[]wstring[] []s_[];
[]copy[]([]first_[], []last_[], []std[]::[]back_inserter[]([]s_[]));
[]return[] []s_[];
}

[]// C++ keyword regex[]
[]bst[]::[]wregex[] []re_cpp_keyword[](
[]L[][]"\\b(and_eq|and|asm|auto|bitand|bitor"[]
[]L[][]"|bool|break|case|catch|char|class"[]
[]L[][]"|compl|const_cast|const|continue|default"[]
[]L[][]"|delete|do|double|dynamic_cast|else"[]
[]L[][]"|enum|explicit|export|extern|false"[]
[]L[][]"|float|for|friend|goto|if|inline"[]
[]L[][]"|int|long|mutable|namespace|new|not_eq"[]
[]L[][]"|not|operator|or_eq|or|private"[]
[]L[][]"|protected|public|register|reinterpret_cast"[]
[]L[][]"|return|short|signed|sizeof|static"[]
[]L[][]"|static_cast|struct|switch|template|this"[]
[]L[][]"|throw|true|try|typedef|typeid"[]
[]L[][]"|typename|union|unsigned|using|virtual"[]
[]L[][]"|void|volatile|wchar_t|while|xor_eq|xor)\\b"[]);

[]bst[]::[]wregex[] []re_pragma_token[](
[]L[][]"\\b(alloc_text|code_seq|const_seq|hdrstop|inline_recursion|once|pop_macro"[]
[]L[][]"|auto_inline|comment|data_seq|include_alias|intrinsic|optimize|push_macro|unmanaged"[]
[]L[][]"|bss_seq|component|deprecated|init_seq|managed|pack|runtime_checks|vtordisp"[]
[]L[][]"|check_stack|conform|function|inline_depth|message|pointers_to_members|section|warning)\\b"[]);

[]void[] []xml_hatena_escape[]([]wstring[]& []s_[]) {
[]s_[] = []bst[]::[]regex_replace[]([]s_[], []bst[]::[]wregex[]([]L[][]"&"[]), []L[][]"&amp;"[]);
[]s_[] = []bst[]::[]regex_replace[]([]s_[], []bst[]::[]wregex[]([]L[][]"<"[]), []L[][]"&lt;"[]);
[]s_[] = []bst[]::[]regex_replace[]([]s_[], []bst[]::[]wregex[]([]L[][]">"[]), []L[][]"&gt;"[]);
[]s_[] = []bst[]::[]regex_replace[]([]s_[], []bst[]::[]wregex[]([]L[][]"\""[]), []L[][]"&quot;"[]);

[]// s_ = bst::regex_replace(s_, bst::wregex(L"\\[\\]"), L"&#91;&#93;");[]
[]s_[] = []bst[]::[]regex_replace[]([]s_[], []bst[]::[]wregex[]([]L[][]"\\]"[]), []L[][]"&#93;"[]);
[]s_[] = []bst[]::[]regex_replace[]([]s_[], []bst[]::[]wregex[]([]L[][]"\\["[]), []L[][]"&#91;"[]);
}

[]// thanks to http://cham.ne.jp/piro/p_cp.html[]
[]wstring[]& []tab2space[]([]wstring[]& []str_[], []int[] []tab_[]) {
[]wstring[]::[]size_type[] []t_[], []n_[], []i_[], []nb_[]=0;
[]while[] (([]nb_[] = []t_[] = []str_[].[]find[]([]L[][]'\t'[],[]nb_[])) != []wstring[]::[]npos[]) {
[]n_[] = []str_[].[]rfind[]([]L[][]'\n'[],[]t_[]) + 1;
[]if[] ([]n_[] == []wstring[]::[]npos[]) []n_[] = 0;
[]i_[] = []tab_[] - ([]t_[] - []n_[])%[]tab_[];
[]str_[].[]replace[]([]t_[], 1, []i_[], []L[][]' '[]);
}
[]return[] []str_[];
}

[]vector[]<[]wchar_t[]> []wstring2vector[]([]wstring[] []const[]& []str_[]) {
[]vector[]<[]wchar_t[]> []vec_[];
[]std[]::[]copy[]([]str_[].[]begin[](), []str_[].[]end[](), []std[]::[]back_inserter[]([]vec_[]));
[]return[] []vec_[];
}

[]void[] []hatena_escape_whole[]([]wstring[]& []s_[]) {
[]// thanks to http://homepage3.nifty.com/aokura/jscript/ncr_js.html[]
[]s_[] = []bst[]::[]regex_replace[]([]s_[], []bst[]::[]wregex[]([]L[][]"\\(\\("[]), []L[][]"&#40;&#40;"[]);
[]s_[] = []bst[]::[]regex_replace[]([]s_[], []bst[]::[]wregex[]([]L[][]"\\)\\)"[]), []L[][]"&#41;&#41;"[]);


[]// s_ = bst::regex_replace(s_, bst::wregex(L"\\]\\]"), L"] ]");[]
[]// s_ = bst::regex_replace(s_, bst::wregex(L"\\[\\["), L"[ [");[]
}

[]struct[] []hatena_semantic_actions[] {
[]private[]:
[]wostream[]& []out[];
[]public[]:
[]hatena_semantic_actions[]([]wostream[]& []out_[]) : []out[]([]out_[]) { }

[]// Semantic-Action[]
[]struct[] []hatena_semantic_action_base[] {
[]hatena_semantic_action_base[]([]wostream[]* []out_[]) : []out[]([]out_[]) { }
[]wostream[]* []out[];
};
[]// normal[]
[]struct[] []sa_normal[] : []hatena_semantic_action_base[] {
[]sa_normal[]([]wostream[]* []out_[]) : []hatena_semantic_action_base[]([]out_[]) { }
[]// string[]
[]template[] <[]typename[] []Iterator[]>
[]void[] []operator[]()([]Iterator[] []first[], []Iterator[] []last[]) []const[] {
[]wstring[] []s_[] = []make_wstring[]([]first[], []last[]);
[]xml_hatena_escape[]([]s_[]);
*[]out[] << []s_[];
}
[]// character[]
[]template[] <[]typename[] []Char[]>
[]void[] []operator[]()([]Char[] []ch_[]) []const[] {
[]wstring[] []s_[]([]ch_[]);
[]xml_hatena_escape[]([]s_[]);
*[]out[] << []s_[];
}
};
[]// cpp_comment[]
[]struct[] []sa_cpp_comment[] : []hatena_semantic_action_base[] {
[]sa_cpp_comment[]([]wostream[]* []out_[]) : []hatena_semantic_action_base[]([]out_[]) { }

[]template[] <[]typename[] []Iterator[]>
[]void[] []operator[]()([]Iterator[] []first[], []Iterator[] []last[]) []const[] {
[]wstring[] []s_[] = []make_wstring[]([]first[], []last[]);
[]xml_hatena_escape[]([]s_[]);
[]s_[] = []bst[]::[]regex_replace[]([]s_[], []re_line[], []rp_comment[]);
*[]out[] << []s_[];
}
};
[]// special[]
[]struct[] []sa_special[] : []hatena_semantic_action_base[] {
[]sa_special[]([]wostream[]* []out_[]) : []hatena_semantic_action_base[]([]out_[]) { }

[]template[] <[]typename[] []Iterator[]>
[]void[] []operator[]()([]Iterator[] []first[], []Iterator[] []last[]) []const[] {
[]wstring[] []s_[] = []make_wstring[]([]first[], []last[]);
[]xml_hatena_escape[]([]s_[]);
*[]out[] << []s_[];
}
};
[]// string-literal[]
[]struct[] []sa_string_literal[] : []hatena_semantic_action_base[] {
[]sa_string_literal[]([]wostream[]* []out_[]) : []hatena_semantic_action_base[]([]out_[]) { }

[]template[] <[]typename[] []Iterator[]>
[]void[] []operator[]()([]Iterator[] []first[], []Iterator[] []last[]) []const[] {
[]wstring[] []s_[] = []make_wstring[]([]first[], []last[]);
[]xml_hatena_escape[]([]s_[]);
[]s_[] = []bst[]::[]regex_replace[]([]s_[], []re_whole[], []rp_string_literal[]);
*[]out[] << []s_[];
}
};
[]// identifier[]
[]struct[] []sa_identifier[] : []hatena_semantic_action_base[] {
[]sa_identifier[]([]wostream[]* []out_[]) : []hatena_semantic_action_base[]([]out_[]) { }

[]template[] <[]typename[] []Iterator[]>
[]void[] []operator[]()([]Iterator[] []first[], []Iterator[] []last[]) []const[] {
[]wstring[] []s_[] = []make_wstring[]([]first[], []last[]);
[]xml_hatena_escape[]([]s_[]);
[]bst[]::[]wsmatch[] []w_[];
[]if[] ([]bst[]::[]regex_match[]([]s_[], []w_[], []re_cpp_keyword[])) {
[]s_[] = []bst[]::[]regex_replace[]([]s_[], []re_whole[], []rp_keyword[]);
} []else[] {
[]s_[] = []bst[]::[]regex_replace[]([]s_[], []re_whole[], []rp_identifier[]);
}
*[]out[] << []s_[];
}
};
[]// preprocessor[]
[]struct[] []sa_preprocesser[] : []hatena_semantic_action_base[] {
[]sa_preprocesser[]([]wostream[]* []out_[]) : []hatena_semantic_action_base[]([]out_[]) { }

[]template[] <[]typename[] []Iterator[]>
[]void[] []operator[]()([]Iterator[] []first[], []Iterator[] []last[]) []const[] {

[]bst[]::[]wregex[] []re_preprocessor_[]([]L[][]"\\A(#[\\t ]*\\w*)(.*)"[]);

[]bst[]::[]wregex[] []re_include_[]([]L[][]"\\A#\\s*include"[]);
[]bst[]::[]wregex[] []re_pragma_[]([]L[][]"\\A#\\s*pragma"[]);
[]bst[]::[]wregex[] []re_argument_is_cpp_source[]([]L[][]"\\A#\\s*(define|error)"[]);

[]wstring[] []s_[] = []make_wstring[]([]first[], []last[]);
[]bst[]::[]wsmatch[] []w_[];
[]if[] (![]bst[]::[]regex_match[]([]s_[], []w_[], []re_preprocessor_[])) {
[]assert[]([]false[]);
}
[]wstring[] []directive_[] = []w_[][1];
[]wstring[] []arguments_[] = []w_[][2];

{ []// directive[]
[]wstring[] []out_directive_[]([]directive_[]);
[]xml_hatena_escape[]([]out_directive_[]);
[]out_directive_[] = []bst[]::[]regex_replace[]([]out_directive_[], []re_whole[], []rp_pp_directive[]);
*[]out[] << []out_directive_[];
}

[]if[] ([]bst[]::[]regex_search[]([]directive_[], []re_include_[])) {
[]xml_hatena_escape[]([]arguments_[]);
[]arguments_[] = []bst[]::[]regex_replace[]([]arguments_[], []re_line[], []rp_pp_include_path[]);
*[]out[] << []arguments_[];
} []else[] []if[] ([]bst[]::[]regex_search[]([]s_[], []re_pragma_[])) {
[]xml_hatena_escape[]([]arguments_[]);
*[]out[] << []bst[]::[]regex_replace[]([]arguments_[], []re_pragma_token[], []rp_pp_pragma_token[]);
} []else[] []if[] ([]bst[]::[]regex_search[]([]directive_[], []re_argument_is_cpp_source[])) {
[]// parse recursively[]
[]vector[]<[]wchar_t[]> []vec_in[] = []wstring2vector[]([]arguments_[]);
[]typedef[] []vector[]<[]wchar_t[]>::[]const_iterator[] []iter_t[];
[]hatena_semantic_actions[] []actions[](*[]out[]);
[]pst[]::[]cpp2xml[]< []hatena_semantic_actions[] > []p[]([]actions[]);
[]spl[]::[]parse_info[]<[]iter_t[]> []info[] = []spl[]::[]parse[]([]vec_in[].[]begin[](), []vec_in[].[]end[](), []p[]);
[]if[] (![]info[].[]full[]) {
[]cerr[] << []"---PREPROCESSOR ARGUMENTS PARSING FAILURE---\n"[];
}
} []else[] {
[]xml_hatena_escape[]([]arguments_[]);
[]arguments_[] = []bst[]::[]regex_replace[]([]arguments_[], []re_line[], []rp_identifier[]);
*[]out[] << []arguments_[];
}

}
};
[]// unexpected_char[]
[]struct[] []sa_unexpected_char[] : []hatena_semantic_action_base[] {
[]sa_unexpected_char[]([]wostream[]* []out_[]) : []hatena_semantic_action_base[]([]out_[]) { }
[]template[] <[]typename[] []Char[]>
[]void[] []operator[]()([]Char[]) []const[] {
*[]out[] << []L[][]'#'[];
}
};

[]// SemanticActions policy[]
[]sa_normal[] []get_action[]([]pst[]::[]cpp2xml_normal_sa_tag[]) { []return[] []sa_normal[](&[]out[]); }
[]sa_cpp_comment[] []get_action[]([]pst[]::[]cpp2xml_cpp_comment_sa_tag[]) { []return[] []sa_cpp_comment[](&[]out[]); }
[]sa_special[] []get_action[]([]pst[]::[]cpp2xml_special_sa_tag[]) { []return[] []sa_special[](&[]out[]); }
[]sa_string_literal[] []get_action[]([]pst[]::[]cpp2xml_string_literal_sa_tag[]) { []return[] []sa_string_literal[](&[]out[]); }
[]sa_identifier[] []get_action[]([]pst[]::[]cpp2xml_identifier_sa_tag[]) { []return[] []sa_identifier[](&[]out[]); }
[]sa_preprocesser[] []get_action[]([]pst[]::[]cpp2xml_preprocesser_sa_tag[]) { []return[] []sa_preprocesser[](&[]out[]); }
[]sa_unexpected_char[] []get_action[]([]pst[]::[]cpp2xml_unexpected_char_sa_tag[]) { []return[] []sa_unexpected_char[](&[]out[]); }
};


[]// the parse[]
[]void[] []the_parse[]([]char[] []const[]* []filename[]) {

[]std[]::[]locale[]::[]global[]([]std[]::[]locale[]([]"ja"[]));

[]// open input file[]
[]wifstream[] []in[]([]filename[]); {
[]if[] (![]in[]) {
[]cerr[] << []"Could not open input file: "[] << []filename[] << []endl[];
[]return[];
}
[]in[].[]unsetf[]([]ios[]::[]skipws[]); []// Turn off white space skipping on the stream[]
[]// in.imbue(std::locale("japanese")); never call[]
}

[]wstring[] []wstr_in[]; {
[]typedef[] []istream_iterator[]<[]wchar_t[], []wchar_t[]> []s_iter_t[];
[]s_iter_t[] []first[]([]in[]);
[]s_iter_t[] []last[];
[]wstr_in[] = []make_wstring[]([]first[], []last[]);
[]tab2space[]([]wstr_in[], 2);
}

[]// boost::spirit needs RandomAccessIterator[]
[]vector[]<[]wchar_t[]> []vec_in[] = []wstring2vector[]([]wstr_in[]);

[]wstringstream[] []wstr_out[];
[]typedef[] []vector[]<[]wchar_t[]>::[]const_iterator[] []iter_t[];

[]hatena_semantic_actions[] []actions[]([]wstr_out[]);
[]pst[]::[]cpp2xml[]< []hatena_semantic_actions[] > []p[]([]actions[]);
[]spl[]::[]parse_info[]<[]iter_t[]> []info[] = []spl[]::[]parse[]([]vec_in[].[]begin[](), []vec_in[].[]end[](), []p[]);

[]if[] (![]info[].[]full[]) {
[]cerr[] << []"---PARSING FAILURE---\n"[];
}

[]wstring[] []result_[] = []wstr_out[].[]str[]();
[]hatena_escape_whole[]([]result_[]);
[]result_[] = []bst[]::[]regex_replace[]([]result_[], []re_whole_source[], []rp_whole_source[]);

[]// open output file[]
[]string[] []out_name[]([]filename[]);
[]out_name[] += []".html"[];
[]wofstream[] []out[]([]out_name[].[]c_str[]());
[]if[] (![]out[]) {
[]cerr[] << []"Could not open output file: "[] << []out_name[] << []endl[];
[]return[];
}

[]out[] << []result_[];
}
}

[]int[] []main[]([]int[] []argc[], []char[]* []argv[][]) {
[]if[] ([]argc[] > 1) {
[]for[] ([]int[] []i[] = 1; []i[] < []argc[]; ++[]i[]) {
[]cout[] << []argv[][[]i[]] << []endl[];
[]the_parse[]([]argv[][[]i[]]);
}
} []else[] {
[]cerr[] << []"---NO FILENAME GIVEN---"[] << []endl[];
}
[]return[] 0;
}