Separator.h
1//-*-C++-*-
2/***************************************************************************
3 *
4 * Copyright (C) 2016 by Willem van Straten
5 * Licensed under the Academic Free License version 2.1
6 *
7 ***************************************************************************/
8
9// psrchive/Util/units/Separator.h
10
11#ifndef __Separator_H
12#define __Separator_H
13
14#include <string>
15#include <vector>
16#include <map>
17
18// #define _DEBUG 1
19
20#if _DEBUG
21#include <iostream>
22#endif
23
25class Separator
26{
27 std::string delimiters;
28 std::string opening_brackets;
29 std::map<char,char> brackets;
30
31 bool preserve_numeric_ranges;
32 bool isnumeric (char c);
33 bool part_of_numeric_range (const std::string& s, std::string::size_type pos);
34
35 public:
36
37 // construct with the default set of brackets
38 Separator ();
39
40 // add a pair of brackets that protect contents from separation
41 void add_bracket (char open, char close);
42
43 // set the delimiters used to separate
44 void set_delimiters (const std::string& d) { delimiters = d; }
45
46 // do not separate tokens that appear to be part of a numeric range
47 void set_preserve_numeric_ranges (bool f) { preserve_numeric_ranges = f; }
48
49 template<typename Container>
50 void separate (std::string text, Container& container);
51};
52
53template<typename Container>
54void Separator::separate (std::string text, Container& container)
55{
56#if _DEBUG
57 std::cerr << "Separator::separate"
58 " delimiters=\"" << delimiters << "\""
59 " opening brackets=\"" << opening_brackets << "\"" << std::endl;
60#endif
61 std::string opener = delimiters + opening_brackets;
62
63 while (text.length())
64 {
65 /*
66 search for the first instance of a delimiter that is
67
68 1) not enclosed in brackets
69 2) not part of a range of numbers
70
71 nested brackets are not supported
72 */
73
74 std::string::size_type end = 0;
75
76 while ( (end = text.find_first_of (opener, end) ) != std::string::npos )
77 {
78 if ( opening_brackets.find (text[end]) != std::string::npos )
79 {
80#if _DEBUG
81 std::cerr << "Separator::separate '" << text[end] << "'"
82 " is an opening bracket" << std::endl;
83#endif
84 end = text.find (brackets[text[end]], end+1);
85 }
86
87 else if (preserve_numeric_ranges && part_of_numeric_range (text, end))
88 {
89#if _DEBUG
90 std::cerr << "Separator::separate '" << text[end] << "'"
91 " is part of a numeric range" << std::endl;
92#endif
93 end ++;
94 }
95 else
96 {
97#if _DEBUG
98 std::cerr << "Separator::separate '" << text[end] << "'"
99 " is naked" << std::endl;
100#endif
101 break;
102 }
103 }
104
105 // the first naked delimiter
106 std::string token = text.substr (0, end);
107
108 std::string::size_type length = token.length();
109
110#if _DEBUG
111 std::cerr << "Separator::separate token='" << token << "'" << std::endl;
112#endif
113
114 if ( (token[0] == '"' && token[length-1] == '"') ||
115 (token[0] == '\'' && token[length-1] == '\'') )
116 {
117 token = token.substr (1, length-2);
118#if _DEBUG
119 std::cerr << "Separator::separate quotation stripped token ='" << token << "'" << std::endl;
120#endif
121 }
122
123 container.push_back (token);
124
125 end = text.find_first_not_of (delimiters, end);
126 text.erase (0, end);
127
128#if _DEBUG
129 std::cerr << "Separator::separate text='" << text << "'" << std::endl;
130#endif
131
132 }
133}
134
135
136#endif // !__Separator_H

Generated using doxygen 1.14.0