Boost.UnicodeIterator

  • Boost.Regexにこっそりあるのを発見
  • utf8_codecvt_facetというのもあるらしい
  • 以下、UTF-8xmlファイルからコメントを抽出するコード
    • ところで、をインクルードすると、std::equalがstd::mismatchを'std::'を付けずに呼び出している(実装がある)ので、それによって引き起こされるADLがBoost.RangeExのboost::mismatchに侵入し、コンパイルエラーになります
    • ADLは本当に危険で、エラーの場所を見つけるのも難しいです


[]#include[] []<algorithm>[]
[]#include[] []<fstream>[]
[]#include[] []<iterator>[]
[]#include[] []<string>[]

[]#include[] []<boost/range/iterator_range.hpp>[]
[]// #include <boost/range_ex/algorithm.hpp> killed by std::equal[]
[]#include[] []<boost/regex/pending/unicode_iterator.hpp>[]
[]#include[] []<boost/spirit/iterator/file_iterator.hpp>[]

[]#include[] []<biscuit/biscuit.hpp>[]

[]namespace[] []xml_grammar[][] {[]

[]using[] []namespace[] []biscuit[][];[]

[]struct[] []comment[][] :[]
[]seq[][]<[]
[]chseq[][]<[][]'<'[][],[][]'!'[][],[][]'-'[][],[][]'-'[][]>,[]
[]star[][]<[]
[]or_[][]<[]
[]minus[][]< [][]any[][], [][]chseq[][]<[][]'-'[][]> >,[]
[]seq[][]<[]
[]chseq[][]<[][]'-'[][]>,[]
[]minus[][]< [][]any[][], [][]chseq[][]<[][]'-'[][]> >[]
[] >[]
[] >[]
[] >,[]
[]chseq[][]<[][]'-'[][],[][]'-'[][],[][]'>'[][]>[]
[] >[]
[] { };[]

[]} [][]// namespace xml_grammar[]

[]// from Boost.Spirit, random-access iterator[]
[]typedef[] []boost[][]::[][]spirit[][]::[][]file_iterator[][]<> [][]ifiter_t[][];[]

[]// from Boost.Regex, bidirectional iterator[]
[]typedef[] []boost[][]::[][]u8_to_u32_iterator[][]<[][]ifiter_t[][]> [][]uiiter_t[][];[]

[]// from Boost.Regex, output iterator[]
[]typedef[] []boost[][]::[][]utf8_output_iterator[][]< [][]std[][]::[][]ostream_iterator[][]<[][]char[][]> > [][]uoiter_t[][];[]

[]int[] []main[][]([][]int[] []argc[][], [][]char[][] *[][]argv[][][])[]
[]{[]
[]for[][] ([][]int[] []i[][] = [][]1[][]; [][]i[][] < [][]argc[][]; ++[][]i[][]) {[]

[]std[][]::[][]string[] []ifname[][]([][]argv[][][[][]i[][]]);[]

[]ifiter_t[] []iffirst[][]([][]ifname[][].[][]c_str[][]()); []
[]boost[][]::[][]iterator_range[][]<[][]uiiter_t[][]> [][]uirng[][]([][]iffirst[][], [][]iffirst[][].[][]make_end[][]());[]

[]std[][]::[][]ofstream[] []fout[][](([][]ifname[][] + [][]".comments.xml"[][]).[][]c_str[][](), [][]std[][]::[][]ios[][]::[][]binary[][]);[]

[]fout[][] << [][]"<?xml version=\"1.0\" encoding=\"UTF-8\"?><comments>"[][];[]
[] {[]
[]biscuit[][]::[][]filter_range[][]<[]
[]xml_grammar[][]::[][]comment[][], [][]boost[][]::[][]iterator_range[][]<[][]uiiter_t[][]>[]
[] > [][]cmtrng[][]([][]uirng[][]);[]

[]std[][]::[][]copy[][]([]
[]boost[][]::[][]begin[][]([][]cmtrng[][]), [][]boost[][]::[][]end[][]([][]cmtrng[][]),[]
[]uoiter_t[][]([][]std[][]::[][]ostream_iterator[][]<[][]char[][]>([][]fout[][]))[]
[] );[]
[] }[]
[]fout[][] << [][]"</comments>"[][];[]

[] } [][]// for[]

[]return[] []0[][];[]
[]}[]

[]// Another candidate :[]
[]// http://www.boost.org/libs/serialization/doc/codecvt.html[]