libcrn  3.9.5
A document image processing library
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CRNPath.cpp
Go to the documentation of this file.
1 /* Copyright 2010-2016 CoReNum, ENS-Lyon
2  *
3  * This file is part of libcrn.
4  *
5  * libcrn is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * libcrn is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with libcrn. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * file: CRNPath.cpp
19  * \author Yann LEYDIER
20  */
21 
22 #include <CRNIO/CRNPath.h>
23 #include <CRNString.h>
24 #include <CRNData/CRNForeach.h>
25 #include <CRNData/CRNDataFactory.h>
26 #include <CRNException.h>
27 #include <CRNi18n.h>
28 
29 using namespace crn;
30 
35 {
36  return '?';
37 }
38 
42 char Path::Separator() noexcept
43 {
44 #ifdef _MSC_VER
45  return '\\';
46 #else
47  return '/';
48 #endif
49 }
50 
56 const Path& Path::operator+=(const Path &s)
57 {
58  Path tmp(s);
59  tmp.ConvertTo(GetFormat());
60  if (tmp.IsURI())
61  { // remove the file:// before concatenating!
62  size_t sep(tmp.Find("://"));
63  if (sep == NPos())
64  StringUTF8::operator+=(tmp);
65  else if (sep + 3 < tmp.Size())
66  StringUTF8::operator+=(tmp.CStr() + sep + 3);
67  }
68  else
69  {
71  }
72  return *this;
73 }
74 
80 const Path& Path::operator/=(const Path &s)
81 {
82  // append separator
83  char sep('/');
84  if (IsWindows() && !IsURI() && !IsUnix())
85  sep = '\\';
86  bool needsep = true;
87  if (IsNotEmpty() && ((*this)[Size() - 1] == sep))
88  needsep = false;
89  else if (s.IsNotEmpty() && (s[0] == sep))
90  needsep = false;
91  if (needsep)
93 
94  // append string
95  Path tmp(s);
96  tmp.ConvertTo(GetFormat());
97  operator+=(tmp);
98  return *this;
99 }
100 
106 {
107  if (format != Format::AUTO)
108  return format;
109  if (IsURI())
110  return Format::URI;
111  if (IsWindows() && !IsUnix())
112  return Format::WINDOWS;
113  return Format::UNIX;
114 }
115 
119 bool Path::IsURI() const
120 { // a URI begins with "scheme://"
121  return Find("://") != NPos();
122 }
123 
127 bool Path::IsUnix() const
128 { // unix paths do not begin with "scheme://" nor "X:\"
129  return !IsURI() && (Find(":\\") == NPos());
130 }
131 
135 bool Path::IsWindows() const
136 { // Windows paths begin with "X:\" or do not contain any "/"
137  if (Find(":\\") == 1)
138  return true;
139  return Find("/") == NPos();
140 }
141 
147 {
148  if (fmt == format)
149  return *this;
150  switch (fmt)
151  {
152  case Format::AUTO:
153  if (IsWindows())
154  ToWindows(); // XXX is this necessary?
155  format = fmt;
156  break;
157  case Format::URI:
158  ToURI();
159  break;
160  case Format::UNIX:
161  ToUnix();
162  break;
163  case Format::WINDOWS:
164  ToWindows();
165  break;
166  }
167  return *this;
168 }
169 
173 bool Path::IsAbsolute() const
174 {
175  if (IsEmpty())
176  return true; // XXX well this is technically not absolute…
177  if ((*this)[0] == '.')
178  return true; // XXX well this is technically not absolute…
179  if (IsURI())
180  {
181  size_t sep(Find("://"));
182  if ((sep != NPos()) && (sep + 3 < Size()))
183  {
184  if ((*this)[sep + 3] == '/')
185  return true;
186  }
187  }
188  else if (IsWindows())
189  {
190  if (Size() > 1)
191  if ((*this)[1] == ':')
192  return true;
193  }
194  else if (IsUnix())
195  {
196  if ((*this)[0] == '/')
197  return true;
198  }
199  return false;
200 }
201 
206 {
207  size_t pos;
208  if (IsWindows())
209  {
210  Path tmp(*this);
211  tmp.ToWindows(); // ensure that there are no trailing '/'
212  pos = tmp.BackwardFind("\\");
213  }
214  else
215  {
216  pos = BackwardFind("/");
217  }
218  if (pos == NPos())
219  { // not found
220  return *this;
221  }
222  else if (pos == Size() - 1)
223  { // the separator is the last character
224  return Path("", format);
225  }
226  else
227  return Path(SubString(pos + 1), format);
228 }
229 
236 {
237  Path fname(GetFilename());
238  if (!fname)
239  throw ExceptionLogic(StringUTF8("const Path Path::GetBase() const: ") + _("the path does not contain a filename."));
240  size_t ppos = fname.BackwardFind(".");
241  if ((ppos != NPos()) && (ppos != 0))
242  return Path(fname.SubString(0, ppos), format);
243  else
244  return Path("", format);
245 }
246 
253 {
254  Path fname(GetFilename());
255  if (!fname)
256  throw ExceptionLogic(StringUTF8("const Path Path::GetBase() const: ") + _("the path does not contain a filename."));
257  size_t ppos = fname.BackwardFind(".");
258  if ((ppos != NPos()) && (ppos != fname.Size() - 1))
259  return Path(fname.SubString(ppos + 1), format);
260  else
261  return Path("", format);
262 }
263 
268 {
269  size_t pos;
270  if (IsWindows())
271  {
272  pos = BackwardFind("\\");
273  }
274  else
275  {
276  pos = BackwardFind("/");
277  }
278  if ((pos == NPos()) || (pos == Size() - 1))
279  { // not found or the / is the last character
280  if (IsURI())
281  {
282  size_t beg = Find("://") + 3;
283  return Path(SubString(beg), format);
284  }
285  else
286  return *this;
287  }
288  else
289  {
290  if (IsURI())
291  {
292  size_t beg = Find("://") + 3;
293  return Path(SubString(beg, pos + 1 - beg), format);
294  }
295  else
296  return Path(SubString(0, pos + 1), format);
297  }
298 }
299 
304 {
305  if (IsURI())
306  {
307  return SubString(0, Find("://"));
308  }
309  return "";
310 }
311 
316 {
317  if (IsURI())
318  return *this;
319  format = Format::URI;
320  Path uri("file://", Format::URI);
321  if (IsEmpty())
322  {
323  Swap(uri);
324  return *this;
325  }
326 
327  if (IsWindows())
328  {
329  size_t beg = 0;
330  if (Size() >= 2)
331  {
332  if ((*this)[1] == ':')
333  {
334  uri.Std() += "/";
335  uri.Std() += (*this)[0];
336  uri.Std() += (*this)[1];
337  uri.Std() += "/";
338  beg = 2; // C:toto
339  if (Size() >= 3)
340  {
341  if ((*this)[2] == '\\') // C:\toto
342  beg = 3;
343  }
344  }
345  }
346  for (size_t tmp = beg; tmp < Size(); ++tmp)
347  {
348  if ((*this)[tmp] == '\\')
349  uri.Std() += '/';
350  else
351  uri.Std() += (*this)[tmp];
352  }
353  }
354  else if (IsUnix())
355  {
356  uri.Std() += this->Std(); // convert to char* to avoid multiple conversions
357  }
358  // we do not encode, this is too shitty
359  Swap(uri);
360  return *this;
361 }
362 
367 {
368  if (IsUnix() || IsEmpty())
369  return *this;
370  format = Format::UNIX;
371  Path uri(Format::UNIX);
372  if (IsWindows())
373  {
374  size_t beg = 0;
375  if (Size() >= 2)
376  {
377  if ((*this)[1] == ':')
378  {
379  uri.Std() += "/";
380  uri.Std() += (*this)[0];
381  uri.Std() += "/";
382  beg = 2; // C:toto
383  if (Size() >= 3)
384  {
385  if ((*this)[2] == '\\') // C:\toto
386  beg = 3;
387  }
388  }
389  }
390  for (size_t tmp = beg; tmp < Size(); ++tmp)
391  {
392  if ((*this)[tmp] == '\\')
393  uri.Std() += '/';
394  else
395  uri.Std() += (*this)[tmp];
396  }
397  }
398  else if (IsURI())
399  {
400  uri.Std() += SubString(Find("://") + 3).Std();
401  uri.Decode();
402  }
403  Swap(uri);
404  return *this;
405 }
406 
411 {
412  if (IsEmpty())
413  return *this;
414  if (IsWindows())
415  {
416  for (size_t tmp = 0; tmp < Size(); ++tmp)
417  {
418  if ((*this)[tmp] == '/')
419  (*this)[tmp] = '\\';
420  }
421  return *this;
422  }
423  format = Format::WINDOWS;
424  Path uri(Format::UNIX);
425  if (IsURI())
426  {
427  uri += SubString(Find("://") + 3);
428  uri.Decode();
429  }
430  else
431  {
432  uri = *this;
433  }
434  Path uri2(Format::WINDOWS);
435  size_t beg = 0;
436  if (Size() >= 2)
437  {
438  if ((uri[1] == '/') && (uri[0] != '.'))
439  {
440  uri2.Std() += uri[0];
441  uri2.Std() += ":\\";
442  beg = 2;
443  }
444  }
445  if ((Size() >= 3) && (beg == 0))
446  {
447  if ((uri[0] == '/') && (uri[2] == '/'))
448  {
449  uri2.Std() += uri[1];
450  uri2.Std() += ":\\";
451  beg = 3;
452  }
453  }
454  if ((Size() >= 4) && (beg == 0))
455  {
456  if ((uri[0] == '/') && (uri[2] == ':') && (uri[3] == '/'))
457  {
458  uri2.Std() += uri[1];
459  uri2.Std() += ":\\";
460  beg = 4;
461  }
462  }
463  for (size_t tmp = beg; tmp < uri.Size(); ++tmp)
464  {
465  if (uri[tmp] == '/')
466  uri2.Std() += '\\';
467  else
468  uri2.Std() += uri[tmp];
469  }
470  Swap(uri2);
471  return *this;
472 }
473 
477 char Path::GetDrive() const
478 {
479  if (IsWindows() && IsAbsolute())
480  {
481  return (*this)[0];
482  }
483  return NoDrive();
484 }
485 
491 {
492  Path dec;
493  for (size_t tmp = 0; tmp < Size(); ++tmp)
494  {
495  if ((*this)[tmp] == '%')
496  {
497  // read the hex code
498  int val = 0;
499  tmp += 1;
500  if (((*this)[tmp] >= '0') && ((*this)[tmp] <= '9'))
501  {
502  val = ((*this)[tmp] - '0') * 16;
503  }
504  else if (((*this)[tmp] >= 'a') && ((*this)[tmp] <= 'f'))
505  {
506  val = ((*this)[tmp] - 'a' + 10) * 16;
507  }
508  else if (((*this)[tmp] >= 'A') && ((*this)[tmp] <= 'F'))
509  {
510  val = ((*this)[tmp] - 'A' + 10) * 16;
511  }
512  tmp += 1;
513  if (((*this)[tmp] >= '0') && ((*this)[tmp] <= '9'))
514  {
515  val += (*this)[tmp] - '0';
516  }
517  else if (((*this)[tmp] >= 'a') && ((*this)[tmp] <= 'f'))
518  {
519  val += (*this)[tmp] - 'a' + 10;
520  }
521  else if (((*this)[tmp] >= 'A') && ((*this)[tmp] <= 'F'))
522  {
523  val += (*this)[tmp] - 'A' + 10;
524  }
525  dec.Std() += char(val);
526  }
527  else
528  dec.Std() += (*this)[tmp];
529  }
530  *this = dec;
531  return *this;
532 }
533 
535 {
536  return ConvertTo(Format::LOCAL);
537 }
538 
545 std::vector<Path> Path::Split(const StringUTF8 &sep) const
546 {
547  const std::vector<StringUTF8> w(StringUTF8::Split(sep));
548  std::vector<Path> words;
549  for (const StringUTF8 &s : w)
550  words.push_back(s);
551  return words;
552 }
553 
563 {
564  if (el.GetValue() != "Path")
565  {
566  throw ExceptionInvalidArgument(StringUTF8("void Path::deserialize(xml::Element &el): ") +
567  _("Wrong XML element."));
568  }
569  xml::Node c(el.GetFirstChild());
570  if (!c)
571  return; // no content
572  xml::Text t(c.AsText()); // may throw
573  *this = t.GetValue();
574  ShrinkToFit();
575 }
576 
584 {
585  xml::Element el(parent.PushBackElement("Path"));
586  el.PushBackText(*this);
587  return el;
588 }
589 
590 
593  Cloner::Register<Path>();
595 
const Path & operator+=(const Path &s)
Appends a string.
Definition: CRNPath.cpp:56
virtual StringUTF8 GetValue() const override
Gets the content of the node.
Definition: CRNXml.cpp:888
void Swap(Path &str) noexcept
Swaps two strings.
Definition: CRNPath.h:126
virtual void Deserialize(xml::Element &el) override
Initializes the object from an XML element.
Definition: CRNPath.cpp:562
XML element.
Definition: CRNXml.h:135
#define _(String)
Definition: CRNi18n.h:51
StringUTF8()=default
Default constructor (empty string)
Path & ToUnix()
Converts the path to Unix format.
Definition: CRNPath.cpp:366
std::vector< StringUTF8 > Split(const StringUTF8 &sep) const
Splits the string in multiple strings delimited by a set of separators.
StringUTF8 GetScheme() const
Returns the scheme of the URI.
Definition: CRNPath.cpp:303
bool IsNotEmpty() const noexcept
Checks if the string is not empty.
static char Separator() noexcept
Local directory separator.
Definition: CRNPath.cpp:42
#define CRN_END_CLASS_CONSTRUCTOR(classname)
Defines a class constructor.
Definition: CRNObject.h:198
A generic logic error.
Definition: CRNException.h:71
Path & Decode()
Replaces % codes with the corresponding character.
Definition: CRNPath.cpp:490
bool IsAbsolute() const
Is the path absolute?
Definition: CRNPath.cpp:173
char GetDrive() const
Gets the drive letter.
Definition: CRNPath.cpp:477
Text PushBackText(const StringUTF8 &text, bool cdata=false)
Adds a text at the end of the children list.
Definition: CRNXml.cpp:467
bool IsWindows() const
Is the path in Windows format?
Definition: CRNPath.cpp:135
const Path & operator/=(const Path &s)
Appends a string after adding directory separator if needed.
Definition: CRNPath.cpp:80
StringUTF8 SubString(size_t pos, size_t n=0) const
Extracts a part of the string.
const char * CStr() const noexcept
Conversion to UTF8 cstring.
bool IsEmpty() const noexcept
Checks if the string is empty.
A convenience class for file paths.
Definition: CRNPath.h:39
Path & ToWindows()
Converts the path to Windows format.
Definition: CRNPath.cpp:410
Path GetDirectory() const
Returns the full directory path.
Definition: CRNPath.cpp:267
XML text.
Definition: CRNXml.h:394
#define CRN_DATA_FACTORY_REGISTER(elemname, classname)
Registers a class to the data factory.
virtual StringUTF8 GetValue() const
Gets the content of the node.
Definition: CRNXml.cpp:171
static char NoDrive()
Invalid drive or no drive found.
Definition: CRNPath.cpp:34
static size_t NPos() noexcept
Last position in a string.
bool IsURI() const
Is the path a URI?
Definition: CRNPath.cpp:119
Path(Format fmt=Format::LOCAL)
Default constructor (empty string)
Definition: CRNPath.h:56
Path & ConvertTo(Format fmt)
Converts to a specific format.
Definition: CRNPath.cpp:146
Node GetFirstChild()
Gets the first child node.
Definition: CRNXml.cpp:303
Path & ToURI()
Converts the path to URI.
Definition: CRNPath.cpp:315
size_t BackwardFind(const StringUTF8 &s, size_t last_pos=NPos()) const
Finds the last occurrence of a string.
std::vector< Path > Split(const StringUTF8 &sep) const
Splits the string in multiple strings delimited by a set of separators.
Definition: CRNPath.cpp:545
Path GetBase() const
Returns the base of the filename.
Definition: CRNPath.cpp:235
void ShrinkToFit()
Optimizes the memory usage.
StringUTF8 & operator+=(const StringUTF8 &s)
Appends a string.
std::string & Std()&noexcept
Conversion to std string.
Path & ToLocal()
Converts the path to the local format.
Definition: CRNPath.cpp:534
virtual xml::Element Serialize(xml::Element &parent) const override
Dumps the object to an XML element.
Definition: CRNPath.cpp:583
size_t Size() const noexcept
Returns the number of bytes in the string.
A character string class.
Definition: CRNStringUTF8.h:49
size_t Find(const StringUTF8 &s, size_t from_pos=0) const
Finds the first occurrence of a string.
bool IsUnix() const
Is the path in Unix format?
Definition: CRNPath.cpp:127
Element PushBackElement(const StringUTF8 &name)
Adds an element at the end of the children list.
Definition: CRNXml.cpp:355
Path GetExtension() const
Returns the extension.
Definition: CRNPath.cpp:252
Format GetFormat() const
Returns the format.
Definition: CRNPath.cpp:105
XML node.
Definition: CRNXml.h:60
Invalid argument error (e.g.: nullptr pointer)
Definition: CRNException.h:107
#define CRN_BEGIN_CLASS_CONSTRUCTOR(classname)
Defines a class constructor.
Definition: CRNObject.h:185
Path GetFilename() const
Returns the filename.
Definition: CRNPath.cpp:205