admin管理员组

文章数量:1125573

I am trying to get the list of PDF files from a directory and generate a html based report grouping and counting them based on modification date.

The problem it is extremely slow as the directory has over 200000 objects and I am queries each one separately.

So is there a way to optimize the code, I tried using ls -ltr but couldn't get loops and parsing. (Due to lack of experience)

Any leads would be appreciated.

Current Code : (try 1) (42 seconds runtime)

<pre>
<?php
$array_ids = glob('/pdf_files/dir/*.pdf',GLOB_BRACE);
foreach ( $array_ids as $x)
{
    $array2[] = array($x, filemtime($x));
}

$count_array = array();
foreach ($array2 as $x)
{
    $mon = date ("m", filemtime($x[0]));
    $mon2 = date ("M", filemtime($x[0]));
    $year = date ("Y", filemtime($x[0]));
    ${'count_' . $year . '_' . $mon. '_' . $mon2} = ${'count_' . $year . '_' . $mon. '_' . $mon2} + 1 ;

    if (! in_array("count_${year}_${mon}_${mon2}" , $count_array ) )
    {
        $count_array[] = "count_${year}_${mon}_${mon2}" ;
    }

}
sort($count_array);

echo "<table border=1>";

foreach ( $count_array as $x)
{
    $d_x = substr($x,-3) . ' ' . substr($x,6,4) ;
    echo "<tr><td style='text-align:center; vertical-align:middle; padding: 15px;'>$d_x</td><td style='text-align:center; vertical-align:middle; padding: 15px;'>${$x}</td></tr>";
    $total = $total + $$x ;
}
echo "<tr><td style='text-align:center; vertical-align:middle; padding: 15px;'>Total</td><td style='text-align:center; vertical-align:middle; padding: 15px;'>$total</td></tr></table>";
?>
</pre>

try 2 (using stat) (37 seconds runtime)

<pre>
<?php
$count_array = array();

for ( $i = 1 ; $i < 10 ; $i++ )
{
    $output = shell_exec("cd /pdf_files/dir ; stat -c '%n,%Y' $i*.pdf");
    if ($output != "" )
    {
        $array2[] = explode("\n", $output);
    }
}
$array2 = call_user_func_array('array_merge', $array2);
$array2 = array_filter($array2);

foreach ($array2 as $x)
{
    $filename = explode(",", $x)[0];
    $filetime = explode(",", $x)[1];
    $mon = date ("m", $filetime);
    $mon2 = date ("M", $filetime);
    $year = date ("Y", $filetime);
    ${'count_' . $year . '_' . $mon. '_' . $mon2} = ${'count_' . $year . '_' . $mon. '_' . $mon2} + 1 ;

    if (! in_array("count_${year}_${mon}_${mon2}" , $count_array ) )
    {
        $count_array[] = "count_${year}_${mon}_${mon2}" ;
    }

}
sort($count_array);

echo "<table border=1>";

foreach ( $count_array as $x)
{
    $d_x = substr($x,-3) . ' ' . substr($x,6,4) ;
    echo "<tr><td style='text-align:center; vertical-align:middle; padding: 15px;'>$d_x</td><td style='text-align:center; vertical-align:middle; padding: 15px;'>${$x}</td></tr>";
    $total = $total + $$x ;
}
echo "<tr><td style='text-align:center; vertical-align:middle; padding: 15px;'>Total</td><td style='text-align:center; vertical-align:middle; padding: 15px;'>$total</td></tr></table>";
?>
</pre>

try 3 ( using ls ) ( 33 seconds runtime )

<pre>
<?php
$count_array = array();

for ( $i = 1 ; $i < 10 ; $i++ )
{
    $output = shell_exec("cd /pdf_files/dir ; ls -l --time-style=\"+%s\" $i*.pdf | awk '{print $7 \",\" $6}'");
    $array2[] = explode("\n", $output);
}
$array2 = call_user_func_array('array_merge', $array2);
$array2 = array_filter($array2);

foreach ($array2 as $x)
{
    $filename = explode(",", $x)[0];
    $filetime = explode(",", $x)[1];
    $mon = date ("m", $filetime);
    $mon2 = date ("M", $filetime);
    $year = date ("Y", $filetime);
    ${'count_' . $year . '_' . $mon. '_' . $mon2} = ${'count_' . $year . '_' . $mon. '_' . $mon2} + 1 ;

    if (! in_array("count_${year}_${mon}_${mon2}" , $count_array ) )
    {
        $count_array[] = "count_${year}_${mon}_${mon2}" ;
    }

}
sort($count_array);

echo "<table border=1>";

foreach ( $count_array as $x)
{
    $d_x = substr($x,-3) . ' ' . substr($x,6,4) ;
    echo "<tr><td style='text-align:center; vertical-align:middle; padding: 15px;'>$d_x</td><td style='text-align:center; vertical-align:middle; padding: 15px;'>${$x}</td></tr>";
    $total = $total + $$x ;
}
echo "<tr><td style='text-align:center; vertical-align:middle; padding: 15px;'>Total</td><td style='text-align:center; vertical-align:middle; padding: 15px;'>$total</td></tr></table>";
?>
</pre>

Output :

May 2024    8852
Jun 2024    14108
Jul 2024    15965
Aug 2024    18139
Sep 2024    13920
Oct 2024    14625
Nov 2024    17029
Dec 2024    39909
Total   142547

本文标签: linuxPHPScan a directorygroup and count objects based on modification dateStack Overflow