Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

What algorithm is used in Vim's `gq` paragraph fill?

Tags:

vim

What algorithm does Vim use to break paragraphs into lines for paragraph fill (gq)? It is a simple greedy algorithm like the one used in Emacs, or something more sophisticated like the one used in TeX? And most important, where would I look in the source code to confirm the algorithm that is used?

like image 655
Norman Ramsey Avatar asked Dec 11 '22 23:12

Norman Ramsey


2 Answers

You can get vim's source code here. Download that and the formatting function is at vim/src/ops.c line: 4710. You may also want to take a look at par.

format_lines(line_count, avoid_fex)
    linenr_T    line_count;
    int     avoid_fex;      /* don't use 'formatexpr' */
{
    int     max_len;
    int     is_not_par;     /* current line not part of parag. */
    int     next_is_not_par;    /* next line not part of paragraph */
    int     is_end_par;     /* at end of paragraph */
    int     prev_is_end_par = FALSE;/* prev. line not part of parag. */
    int     next_is_start_par = FALSE;
#ifdef FEAT_COMMENTS
    int     leader_len = 0;     /* leader len of current line */
    int     next_leader_len;    /* leader len of next line */
    char_u  *leader_flags = NULL;   /* flags for leader of current line */
    char_u  *next_leader_flags; /* flags for leader of next line */
    int     do_comments;        /* format comments */
    int     do_comments_list = 0;   /* format comments with 'n' or '2' */
#endif
    int     advance = TRUE;
    int     second_indent = -1; /* indent for second line (comment
                     * aware) */
    int     do_second_indent;
    int     do_number_indent;
    int     do_trail_white;
    int     first_par_line = TRUE;
    int     smd_save;
    long    count;
    int     need_set_indent = TRUE; /* set indent of next paragraph */
    int     force_format = FALSE;
    int     old_State = State;

    /* length of a line to force formatting: 3 * 'tw' */
    max_len = comp_textwidth(TRUE) * 3;

    /* check for 'q', '2' and '1' in 'formatoptions' */
#ifdef FEAT_COMMENTS
    do_comments = has_format_option(FO_Q_COMS);
#endif
    do_second_indent = has_format_option(FO_Q_SECOND);
    do_number_indent = has_format_option(FO_Q_NUMBER);
    do_trail_white = has_format_option(FO_WHITE_PAR);

    /*
     * Get info about the previous and current line.
     */
    if (curwin->w_cursor.lnum > 1)
    is_not_par = fmt_check_par(curwin->w_cursor.lnum - 1
#ifdef FEAT_COMMENTS
                , &leader_len, &leader_flags, do_comments
#endif
                );
    else
    is_not_par = TRUE;
    next_is_not_par = fmt_check_par(curwin->w_cursor.lnum
#ifdef FEAT_COMMENTS
               , &next_leader_len, &next_leader_flags, do_comments
#endif
                );
    is_end_par = (is_not_par || next_is_not_par);
    if (!is_end_par && do_trail_white)
    is_end_par = !ends_in_white(curwin->w_cursor.lnum - 1);

    curwin->w_cursor.lnum--;
    for (count = line_count; count != 0 && !got_int; --count)
    {
    /*
     * Advance to next paragraph.
     */
    if (advance)
    {
        curwin->w_cursor.lnum++;
        prev_is_end_par = is_end_par;
        is_not_par = next_is_not_par;
#ifdef FEAT_COMMENTS
        leader_len = next_leader_len;
        leader_flags = next_leader_flags;
#endif
    }

    /*
     * The last line to be formatted.
     */
    if (count == 1 || curwin->w_cursor.lnum == curbuf->b_ml.ml_line_count)
    {
        next_is_not_par = TRUE;
#ifdef FEAT_COMMENTS
        next_leader_len = 0;
        next_leader_flags = NULL;
#endif
    }
    else
    {
        next_is_not_par = fmt_check_par(curwin->w_cursor.lnum + 1
#ifdef FEAT_COMMENTS
               , &next_leader_len, &next_leader_flags, do_comments
#endif
                    );
        if (do_number_indent)
        next_is_start_par =
               (get_number_indent(curwin->w_cursor.lnum + 1) > 0);
    }
    advance = TRUE;
    is_end_par = (is_not_par || next_is_not_par || next_is_start_par);
    if (!is_end_par && do_trail_white)
        is_end_par = !ends_in_white(curwin->w_cursor.lnum);

    /*
     * Skip lines that are not in a paragraph.
     */
    if (is_not_par)
    {
        if (line_count < 0)
        break;
    }
    else
    {
        /*
         * For the first line of a paragraph, check indent of second line.
         * Don't do this for comments and empty lines.
         */
        if (first_par_line
            && (do_second_indent || do_number_indent)
            && prev_is_end_par
            && curwin->w_cursor.lnum < curbuf->b_ml.ml_line_count)
        {
        if (do_second_indent && !lineempty(curwin->w_cursor.lnum + 1))
        {
#ifdef FEAT_COMMENTS
            if (leader_len == 0 && next_leader_len == 0)
            {
            /* no comment found */
#endif
            second_indent =
                   get_indent_lnum(curwin->w_cursor.lnum + 1);
#ifdef FEAT_COMMENTS
            }
            else
            {
            second_indent = next_leader_len;
            do_comments_list = 1;
            }
#endif
        }
        else if (do_number_indent)
        {
#ifdef FEAT_COMMENTS
            if (leader_len == 0 && next_leader_len == 0)
            {
            /* no comment found */
#endif
            second_indent =
                     get_number_indent(curwin->w_cursor.lnum);
#ifdef FEAT_COMMENTS
            }
            else
            {
            /* get_number_indent() is now "comment aware"... */
            second_indent =
                     get_number_indent(curwin->w_cursor.lnum);
            do_comments_list = 1;
            }
#endif
        }
        }

        /*
         * When the comment leader changes, it's the end of the paragraph.
         */
        if (curwin->w_cursor.lnum >= curbuf->b_ml.ml_line_count
#ifdef FEAT_COMMENTS
            || !same_leader(curwin->w_cursor.lnum,
                    leader_len, leader_flags,
                      next_leader_len, next_leader_flags)
#endif
            )
        is_end_par = TRUE;

        /*
         * If we have got to the end of a paragraph, or the line is
         * getting long, format it.
         */
        if (is_end_par || force_format)
        {
        if (need_set_indent)
            /* replace indent in first line with minimal number of
             * tabs and spaces, according to current options */
            (void)set_indent(get_indent(), SIN_CHANGED);

        /* put cursor on last non-space */
        State = NORMAL; /* don't go past end-of-line */
        coladvance((colnr_T)MAXCOL);
        while (curwin->w_cursor.col && vim_isspace(gchar_cursor()))
            dec_cursor();

        /* do the formatting, without 'showmode' */
        State = INSERT; /* for open_line() */
        smd_save = p_smd;
        p_smd = FALSE;
        insertchar(NUL, INSCHAR_FORMAT
#ifdef FEAT_COMMENTS
            + (do_comments ? INSCHAR_DO_COM : 0)
            + (do_comments && do_comments_list
                               ? INSCHAR_COM_LIST : 0)
#endif
            + (avoid_fex ? INSCHAR_NO_FEX : 0), second_indent);
        State = old_State;
        p_smd = smd_save;
        second_indent = -1;
        /* at end of par.: need to set indent of next par. */
        need_set_indent = is_end_par;
        if (is_end_par)
        {
            /* When called with a negative line count, break at the
             * end of the paragraph. */
            if (line_count < 0)
            break;
            first_par_line = TRUE;
        }
        force_format = FALSE;
        }

        /*
         * When still in same paragraph, join the lines together.  But
         * first delete the comment leader from the second line.
         */
        if (!is_end_par)
        {
        advance = FALSE;
        curwin->w_cursor.lnum++;
        curwin->w_cursor.col = 0;
        if (line_count < 0 && u_save_cursor() == FAIL)
            break;
#ifdef FEAT_COMMENTS
        (void)del_bytes((long)next_leader_len, FALSE, FALSE);
        if (next_leader_len > 0)
            mark_col_adjust(curwin->w_cursor.lnum, (colnr_T)0, 0L,
                              (long)-next_leader_len);
#endif
        curwin->w_cursor.lnum--;
        if (do_join(2, TRUE, FALSE, FALSE) == FAIL)
        {
            beep_flush();
            break;
        }
        first_par_line = FALSE;
        /* If the line is getting long, format it next time */
        if (STRLEN(ml_get_curline()) > (size_t)max_len)
            force_format = TRUE;
        else
            force_format = FALSE;
        }
    }
    line_breakcheck();
    }
}
like image 159
Conner Avatar answered Jan 15 '23 11:01

Conner


As with many vim things, it's customisable. The default is a greedy algorithm ("A longer [than textwidth] line will be broken after whitespace to get this width"), but by setting formatexpr you can use vimscript to format your paragraphs, and by setting formatprg (for example, to fmt or par) you can cede control to an external program.

like image 33
Andrew Aylett Avatar answered Jan 15 '23 11:01

Andrew Aylett