Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Alternative to 'realpath' to resolve "../" and "./" in a path

realpath does what I need, but only works if the files in the path actually exist.

I need a function which returns a normalized path from a string (e.g. ../some/./directory/a/b/c/../d to some/directory/a/b/d), regardless of whether the directories/files actually exist

Essentially the equivalent to PathCanonicalize on windows.

Does such a function already exist?

like image 779
Silverlan Avatar asked Feb 22 '15 15:02

Silverlan


3 Answers

Python source code has an implementation of os.path.normpath for several platforms. The POSIX one (in the Lib/posixpath.py, for Python 3, line 318, or for Python 2, line 308) is unfortunately in Python, but the general logic can be easily reimplemented in C (the function is quite compact). Tested by many years of use.

There are other platform normpath implementations in Python interpreter and standard library source code as well, so portable solution can be a combination of those.

Probably other systems/libraries, written in C, do have implementations of the same, as the normpath function is critical in a security sense.

(And the main advantage of having Python code is to be able to test your function in C with whatever, even random, input in parallel - and this kind of testing is important to make the function secure)

like image 78
Roman Susi Avatar answered Oct 11 '22 11:10

Roman Susi


I do not think there is any standard library function available for this.

You can use the function ap_getparents() in Apache httpd source code file server/util.c. I believe it does exactly what you want: https://github.com/apache/httpd/blob/trunk/server/util.c#L500

#ifdef WIN32
#define IS_SLASH(s) ((s == '/') || (s == '\\'))
#else
#define IS_SLASH(s) (s == '/')
#endif

void ap_getparents(char *name)
{
    char *next;
    int l, w, first_dot;

    /* Four paseses, as per RFC 1808 */
    /* a) remove ./ path segments */
    for (next = name; *next && (*next != '.'); next++) {
    }

    l = w = first_dot = next - name;
    while (name[l] != '\0') {
        if (name[l] == '.' && IS_SLASH(name[l + 1])
            && (l == 0 || IS_SLASH(name[l - 1])))
            l += 2;
        else
            name[w++] = name[l++];
    }

    /* b) remove trailing . path, segment */
    if (w == 1 && name[0] == '.')
        w--;
    else if (w > 1 && name[w - 1] == '.' && IS_SLASH(name[w - 2]))
        w--;
    name[w] = '\0';

    /* c) remove all xx/../ segments. (including leading ../ and /../) */
    l = first_dot;

    while (name[l] != '\0') {
        if (name[l] == '.' && name[l + 1] == '.' && IS_SLASH(name[l + 2])
            && (l == 0 || IS_SLASH(name[l - 1]))) {
            int m = l + 3, n;

            l = l - 2;
            if (l >= 0) {
                while (l >= 0 && !IS_SLASH(name[l]))
                    l--;
                l++;
            }
            else
                l = 0;
            n = l;
            while ((name[n] = name[m]))
                (++n, ++m);
        }
        else
            ++l;
    }

    /* d) remove trailing xx/.. segment. */
    if (l == 2 && name[0] == '.' && name[1] == '.')
        name[0] = '\0';
    else if (l > 2 && name[l - 1] == '.' && name[l - 2] == '.'
             && IS_SLASH(name[l - 3])) {
        l = l - 4;
        if (l >= 0) {
            while (l >= 0 && !IS_SLASH(name[l]))
                l--;
            l++;
        }
        else
            l = 0;
        name[l] = '\0';
    }
}

(This is assuming re-using Apache Licensed code in your project is acceptable.)

like image 36
snap Avatar answered Oct 11 '22 12:10

snap


According to your problem statement, the following does exactly what you ask. The bulk of the code was from path.c as provided in the link in the comment. The modification to remove the preceding ../ was added to comply with your problem statement:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

void pathCanonicalize (char *path);

int main (int argc, char **argv)
{
    if (argc < 2) {
        fprintf (stderr, "error: insufficient input, usage: %s <path>\n",
                argv[0]);
        return 1;
    }

    char *fullpath = strdup (argv[1]);
    if (!fullpath) {
        fprintf (stderr, "error: virtual memory exhausted.\n");
        return 1;
    }

    pathCanonicalize (fullpath);

    printf ("\n original : %s\n canonical: %s\n\n", argv[1], fullpath);

    free (fullpath);

    return 0;
}

void pathCanonicalize (char *path)
{
    size_t i;
    size_t j;
    size_t k;

    //Move to the beginning of the string
    i = 0;
    k = 0;

    //Replace backslashes with forward slashes
    while (path[i] != '\0') {
        //Forward slash or backslash separator found?
        if (path[i] == '/' || path[i] == '\\') {
            path[k++] = '/';
            while (path[i] == '/' || path[i] == '\\')
                i++;
        } else {
            path[k++] = path[i++];
        }
    }

    //Properly terminate the string with a NULL character
    path[k] = '\0';

    //Move back to the beginning of the string
    i = 0;
    j = 0;
    k = 0;

    //Parse the entire string
    do {
        //Forward slash separator found?
        if (path[i] == '/' || path[i] == '\0') {
            //"." element found?
            if ((i - j) == 1 && !strncmp (path + j, ".", 1)) {
                //Check whether the pathname is empty?
                if (k == 0) {
                    if (path[i] == '\0') {
                        path[k++] = '.';
                    } else if (path[i] == '/' && path[i + 1] == '\0') {
                        path[k++] = '.';
                        path[k++] = '/';
                    }
                } else if (k > 1) {
                    //Remove the final slash if necessary
                    if (path[i] == '\0')
                        k--;
                }
            }
            //".." element found?
            else if ((i - j) == 2 && !strncmp (path + j, "..", 2)) {
                //Check whether the pathname is empty?
                if (k == 0) {
                    path[k++] = '.';
                    path[k++] = '.';

                    //Append a slash if necessary
                    if (path[i] == '/')
                        path[k++] = '/';
                } else if (k > 1) {
                    //Search the path for the previous slash
                    for (j = 1; j < k; j++) {
                        if (path[k - j - 1] == '/')
                            break;
                    }

                    //Slash separator found?
                    if (j < k) {
                        if (!strncmp (path + k - j, "..", 2)) {
                            path[k++] = '.';
                            path[k++] = '.';
                        } else {
                            k = k - j - 1;
                        }

                        //Append a slash if necessary
                        if (k == 0 && path[0] == '/')
                            path[k++] = '/';
                        else if (path[i] == '/')
                            path[k++] = '/';
                    }
                    //No slash separator found?
                    else {
                        if (k == 3 && !strncmp (path, "..", 2)) {
                            path[k++] = '.';
                            path[k++] = '.';

                            //Append a slash if necessary
                            if (path[i] == '/')
                                path[k++] = '/';
                        } else if (path[i] == '\0') {
                            k = 0;
                            path[k++] = '.';
                        } else if (path[i] == '/' && path[i + 1] == '\0') {
                            k = 0;
                            path[k++] = '.';
                            path[k++] = '/';
                        } else {
                            k = 0;
                        }
                    }
                }
            } else {
                //Copy directory name
                memmove (path + k, path + j, i - j);
                //Advance write pointer
                k += i - j;

                //Append a slash if necessary
                if (path[i] == '/')
                    path[k++] = '/';
            }

            //Move to the next token
            while (path[i] == '/')
                i++;
            j = i;
        }
        else if (k == 0) {
            while (path[i] == '.' || path[i] == '/') {
                 j++,i++;
            }
        }
    } while (path[i++] != '\0');

    //Properly terminate the string with a NULL character
    path[k] = '\0';
}

Use/Output

$ ./bin/pathcanonical ../some/./directory/a/b/c/../d

 original : ../some/./directory/a/b/c/../d
 canonical: some/directory/a/b/d
like image 7
David C. Rankin Avatar answered Oct 11 '22 10:10

David C. Rankin